1use open_kioku_core::{
2 AnalysisFact, CodeChunk, Confidence, EvidenceSourceType, File, FileId, FileProvenance,
3 GitCochangeEdge, GitCommitId, GitCommitRecord, GitFileTouch, GitSymbolTouch, GraphEdge,
4 GraphEdgeType, GraphNode, GraphNodeType, HistoryRecordId, HistorySnapshot, HistorySummary,
5 Import, IndexManifest, ProvenanceTouch, Symbol, SymbolId, SymbolOccurrence, SymbolProvenance,
6 TestTarget, HISTORY_SCHEMA_VERSION,
7};
8use open_kioku_errors::{OkError, Result};
9use open_kioku_storage::{
10 GraphCounts, GraphSchemaCounts, GraphStore, HistoryStore, IndexData, MetadataStore,
11 PartialIndexUpdate,
12};
13use rusqlite::{params, Connection, OptionalExtension, Transaction};
14use std::collections::BTreeSet;
15use std::path::{Path, PathBuf};
16use std::sync::Mutex;
17
18const SQLITE_HISTORY_SCHEMA_VERSION: i64 = 1;
19pub const SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION: i64 = 2;
20const SQLITE_GRAPH_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
21const SQLITE_SUPPORTED_SCHEMA_VERSION: i64 = SQLITE_SUPPORTED_INDEX_SCHEMA_VERSION;
22
23const HISTORY_SCHEMA_V1: &str = r#"
24CREATE TABLE IF NOT EXISTS git_commits (
25 id TEXT PRIMARY KEY,
26 authored_at TEXT NOT NULL,
27 committed_at TEXT NOT NULL,
28 author_email TEXT,
29 json TEXT NOT NULL
30);
31CREATE INDEX IF NOT EXISTS idx_git_commits_committed_at
32 ON git_commits(committed_at DESC, id);
33CREATE INDEX IF NOT EXISTS idx_git_commits_author_email
34 ON git_commits(author_email);
35
36CREATE TABLE IF NOT EXISTS git_file_touches (
37 id TEXT PRIMARY KEY,
38 commit_id TEXT NOT NULL,
39 path TEXT NOT NULL,
40 previous_path TEXT,
41 touched_at TEXT NOT NULL,
42 json TEXT NOT NULL,
43 FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
44);
45CREATE INDEX IF NOT EXISTS idx_git_file_touches_path
46 ON git_file_touches(path, touched_at DESC);
47CREATE INDEX IF NOT EXISTS idx_git_file_touches_previous_path
48 ON git_file_touches(previous_path, touched_at DESC);
49CREATE INDEX IF NOT EXISTS idx_git_file_touches_commit
50 ON git_file_touches(commit_id);
51
52CREATE TABLE IF NOT EXISTS git_symbol_touches (
53 id TEXT PRIMARY KEY,
54 commit_id TEXT NOT NULL,
55 symbol_id TEXT,
56 qualified_name TEXT NOT NULL,
57 file_path TEXT NOT NULL,
58 touched_at TEXT NOT NULL,
59 json TEXT NOT NULL,
60 FOREIGN KEY(commit_id) REFERENCES git_commits(id) ON DELETE CASCADE
61);
62CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_file
63 ON git_symbol_touches(file_path, touched_at DESC);
64CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_symbol
65 ON git_symbol_touches(symbol_id, touched_at DESC);
66CREATE INDEX IF NOT EXISTS idx_git_symbol_touches_commit
67 ON git_symbol_touches(commit_id);
68
69CREATE TABLE IF NOT EXISTS git_cochange_edges (
70 id TEXT PRIMARY KEY,
71 path TEXT NOT NULL,
72 cochanged_path TEXT NOT NULL,
73 commit_count INTEGER NOT NULL,
74 recency_weight REAL NOT NULL,
75 last_changed_at TEXT,
76 json TEXT NOT NULL,
77 UNIQUE(path, cochanged_path)
78);
79CREATE INDEX IF NOT EXISTS idx_git_cochange_edges_path
80 ON git_cochange_edges(path, recency_weight DESC, commit_count DESC);
81
82CREATE TABLE IF NOT EXISTS git_review_events (
83 id TEXT PRIMARY KEY,
84 commit_id TEXT,
85 path TEXT,
86 reviewer_identity TEXT NOT NULL,
87 observed_at TEXT NOT NULL,
88 json TEXT NOT NULL
89);
90CREATE INDEX IF NOT EXISTS idx_git_review_events_path
91 ON git_review_events(path, observed_at DESC);
92CREATE INDEX IF NOT EXISTS idx_git_review_events_commit
93 ON git_review_events(commit_id, observed_at DESC);
94CREATE INDEX IF NOT EXISTS idx_git_review_events_reviewer
95 ON git_review_events(reviewer_identity, observed_at DESC);
96"#;
97
98pub struct SqliteStore {
99 path: PathBuf,
100 connection: Mutex<Connection>,
101}
102
103impl SqliteStore {
104 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
105 let path = path.as_ref().to_path_buf();
106 if let Some(parent) = path.parent() {
107 std::fs::create_dir_all(parent)?;
108 }
109 let connection = Connection::open_with_flags(
110 &path,
111 rusqlite::OpenFlags::SQLITE_OPEN_READ_WRITE
112 | rusqlite::OpenFlags::SQLITE_OPEN_CREATE
113 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
114 )
115 .map_err(storage_err)?;
116 let store = Self {
117 path,
118 connection: Mutex::new(connection),
119 };
120 store.initialize()?;
121 Ok(store)
122 }
123
124 pub fn path(&self) -> &Path {
125 &self.path
126 }
127}
128
129impl MetadataStore for SqliteStore {
130 fn initialize(&self) -> Result<()> {
131 let mut conn = self
132 .connection
133 .lock()
134 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
135 ensure_supported_sqlite_schema(&conn)?;
136 conn.execute_batch(
137 r#"
138 PRAGMA journal_mode = WAL;
139 PRAGMA foreign_keys = ON;
140 CREATE TABLE IF NOT EXISTS manifests (
141 id INTEGER PRIMARY KEY CHECK (id = 1),
142 json TEXT NOT NULL
143 );
144 CREATE TABLE IF NOT EXISTS files (
145 id TEXT PRIMARY KEY,
146 path TEXT NOT NULL UNIQUE,
147 json TEXT NOT NULL
148 );
149 CREATE TABLE IF NOT EXISTS symbols (
150 id TEXT PRIMARY KEY,
151 name TEXT NOT NULL,
152 qualified_name TEXT NOT NULL,
153 file_id TEXT NOT NULL,
154 json TEXT NOT NULL
155 );
156 CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
157 CREATE TABLE IF NOT EXISTS chunks (
158 id TEXT PRIMARY KEY,
159 file_id TEXT NOT NULL,
160 start_line INTEGER NOT NULL,
161 end_line INTEGER NOT NULL,
162 text TEXT NOT NULL,
163 json TEXT NOT NULL
164 );
165 CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);
166 CREATE TABLE IF NOT EXISTS tests (
167 id TEXT PRIMARY KEY,
168 file_id TEXT NOT NULL,
169 json TEXT NOT NULL
170 );
171 CREATE INDEX IF NOT EXISTS idx_tests_file ON tests(file_id);
172 CREATE TABLE IF NOT EXISTS imports (
173 id TEXT PRIMARY KEY,
174 file_id TEXT NOT NULL,
175 imported TEXT NOT NULL,
176 json TEXT NOT NULL
177 );
178 CREATE INDEX IF NOT EXISTS idx_imports_file ON imports(file_id);
179 CREATE TABLE IF NOT EXISTS occurrences (
180 id TEXT PRIMARY KEY,
181 symbol_id TEXT NOT NULL,
182 file_id TEXT NOT NULL,
183 is_definition INTEGER NOT NULL,
184 json TEXT NOT NULL
185 );
186 CREATE INDEX IF NOT EXISTS idx_occurrences_symbol ON occurrences(symbol_id);
187 CREATE INDEX IF NOT EXISTS idx_occurrences_file ON occurrences(file_id);
188 CREATE TABLE IF NOT EXISTS analysis_facts (
189 id TEXT PRIMARY KEY,
190 file_id TEXT NOT NULL,
191 source_type TEXT NOT NULL,
192 target TEXT NOT NULL,
193 json TEXT NOT NULL
194 );
195 CREATE INDEX IF NOT EXISTS idx_analysis_facts_file ON analysis_facts(file_id);
196 CREATE INDEX IF NOT EXISTS idx_analysis_facts_source ON analysis_facts(source_type);
197 CREATE TABLE IF NOT EXISTS vector_targets (
198 id TEXT PRIMARY KEY,
199 file_id TEXT NOT NULL,
200 target_kind TEXT NOT NULL,
201 content_hash TEXT NOT NULL,
202 vector_id INTEGER NOT NULL,
203 model TEXT NOT NULL,
204 dimensions INTEGER NOT NULL,
205 json TEXT NOT NULL
206 );
207 CREATE INDEX IF NOT EXISTS idx_vector_targets_file ON vector_targets(file_id);
208 CREATE TABLE IF NOT EXISTS embedding_cache (
209 cache_key TEXT PRIMARY KEY,
210 target_id TEXT NOT NULL,
211 content_hash TEXT NOT NULL,
212 model TEXT NOT NULL,
213 dimensions INTEGER NOT NULL,
214 json TEXT NOT NULL
215 );
216 CREATE TABLE IF NOT EXISTS semantic_index_runs (
217 id TEXT PRIMARY KEY,
218 status TEXT NOT NULL,
219 model TEXT NOT NULL,
220 dimensions INTEGER NOT NULL,
221 vector_count INTEGER NOT NULL,
222 created_at TEXT NOT NULL,
223 json TEXT NOT NULL
224 );
225 CREATE TABLE IF NOT EXISTS semantic_coverage (
226 id TEXT PRIMARY KEY,
227 target_kind TEXT NOT NULL,
228 indexed_count INTEGER NOT NULL,
229 stale_count INTEGER NOT NULL,
230 failed_count INTEGER NOT NULL,
231 json TEXT NOT NULL
232 );
233 CREATE TABLE IF NOT EXISTS graph_nodes (
234 id TEXT PRIMARY KEY,
235 label TEXT NOT NULL,
236 node_type TEXT DEFAULT '',
237 file_id TEXT DEFAULT '',
238 symbol_id TEXT DEFAULT '',
239 json TEXT NOT NULL
240 );
241 CREATE TABLE IF NOT EXISTS graph_edges (
242 id TEXT PRIMARY KEY,
243 from_id TEXT NOT NULL,
244 to_id TEXT NOT NULL,
245 edge_type TEXT NOT NULL,
246 confidence TEXT DEFAULT '',
247 source_type TEXT DEFAULT '',
248 source_file TEXT DEFAULT '',
249 json TEXT NOT NULL
250 );
251 CREATE INDEX IF NOT EXISTS idx_graph_edges_from ON graph_edges(from_id);
252 CREATE INDEX IF NOT EXISTS idx_graph_edges_to ON graph_edges(to_id);
253 "#,
254 )
255 .map_err(storage_err)?;
256 migrate_history_schema(&mut conn)?;
257 migrate_graph_schema(&mut conn)?;
258 Ok(())
259 }
260
261 fn put_manifest(&self, manifest: &IndexManifest) -> Result<()> {
262 let conn = self
263 .connection
264 .lock()
265 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
266 let json = serde_json::to_string(manifest)?;
267 conn.execute(
268 "INSERT INTO manifests(id, json) VALUES(1, ?1) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
269 params![json],
270 )
271 .map_err(storage_err)?;
272 Ok(())
273 }
274
275 fn manifest(&self) -> Result<Option<IndexManifest>> {
276 let conn = self
277 .connection
278 .lock()
279 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
280 let raw: Option<String> = conn
281 .query_row("SELECT json FROM manifests WHERE id = 1", [], |row| {
282 row.get(0)
283 })
284 .optional()
285 .map_err(storage_err)?;
286 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
287 .transpose()
288 }
289
290 fn replace_index(&self, data: IndexData<'_>) -> Result<()> {
291 let mut conn = self
292 .connection
293 .lock()
294 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
295 let tx = conn.transaction().map_err(storage_err)?;
296 tx.execute("DELETE FROM occurrences", [])
297 .map_err(storage_err)?;
298 tx.execute("DELETE FROM analysis_facts", [])
299 .map_err(storage_err)?;
300 tx.execute("DELETE FROM imports", []).map_err(storage_err)?;
301 tx.execute("DELETE FROM tests", []).map_err(storage_err)?;
302 tx.execute("DELETE FROM chunks", []).map_err(storage_err)?;
303 tx.execute("DELETE FROM symbols", []).map_err(storage_err)?;
304 tx.execute("DELETE FROM files", []).map_err(storage_err)?;
305 tx.execute("DELETE FROM manifests", [])
306 .map_err(storage_err)?;
307 tx.execute(
308 "INSERT INTO manifests(id, json) VALUES(1, ?1)",
309 params![serde_json::to_string(data.manifest)?],
310 )
311 .map_err(storage_err)?;
312 insert_index_rows(
313 &tx,
314 IndexRows {
315 files: data.files,
316 symbols: data.symbols,
317 chunks: data.chunks,
318 tests: data.tests,
319 imports: data.imports,
320 occurrences: data.occurrences,
321 analysis_facts: data.analysis_facts,
322 },
323 )?;
324 tx.commit().map_err(storage_err)?;
325 Ok(())
326 }
327
328 fn replace_files_index(&self, update: PartialIndexUpdate<'_>) -> Result<()> {
329 let mut conn = self
330 .connection
331 .lock()
332 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
333 let tx = conn.transaction().map_err(storage_err)?;
334 let affected_file_ids = update
335 .changed_files
336 .iter()
337 .map(|file| file.id.clone())
338 .chain(update.deleted_file_ids.iter().cloned())
339 .collect::<BTreeSet<_>>();
340 let mut affected_file_paths = update
341 .changed_files
342 .iter()
343 .map(|file| file.path.to_string_lossy().to_string())
344 .collect::<BTreeSet<_>>();
345 for file_id in &affected_file_ids {
346 let path: Option<String> = tx
347 .query_row(
348 "SELECT path FROM files WHERE id = ?1",
349 params![&file_id.0],
350 |row| row.get(0),
351 )
352 .optional()
353 .map_err(storage_err)?;
354 if let Some(path) = path {
355 affected_file_paths.insert(path);
356 }
357 }
358
359 let mut affected_symbol_ids = update
360 .symbols
361 .iter()
362 .map(|symbol| symbol.id.clone())
363 .collect::<BTreeSet<_>>();
364 for file_id in &affected_file_ids {
365 let mut stmt = tx
366 .prepare("SELECT id FROM symbols WHERE file_id = ?1")
367 .map_err(storage_err)?;
368 let rows = stmt
369 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
370 .map_err(storage_err)?;
371 for row in rows {
372 affected_symbol_ids.insert(SymbolId::new(row.map_err(storage_err)?));
373 }
374 }
375
376 let mut affected_node_ids = update
377 .graph_nodes
378 .iter()
379 .map(|node| node.id.0.clone())
380 .collect::<BTreeSet<_>>();
381 for file_id in &affected_file_ids {
382 let mut stmt = tx
383 .prepare("SELECT id FROM graph_nodes WHERE file_id = ?1")
384 .map_err(storage_err)?;
385 let rows = stmt
386 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
387 .map_err(storage_err)?;
388 for row in rows {
389 affected_node_ids.insert(row.map_err(storage_err)?);
390 }
391 }
392 for symbol_id in &affected_symbol_ids {
393 let mut stmt = tx
394 .prepare("SELECT id FROM graph_nodes WHERE symbol_id = ?1")
395 .map_err(storage_err)?;
396 let rows = stmt
397 .query_map(params![&symbol_id.0], |row| row.get::<_, String>(0))
398 .map_err(storage_err)?;
399 for row in rows {
400 affected_node_ids.insert(row.map_err(storage_err)?);
401 }
402 }
403
404 tx.execute(
405 "INSERT INTO manifests(id, json) VALUES(1, ?1)
406 ON CONFLICT(id) DO UPDATE SET json = excluded.json",
407 params![serde_json::to_string(update.manifest)?],
408 )
409 .map_err(storage_err)?;
410
411 for node_id in &affected_node_ids {
412 tx.execute(
413 "DELETE FROM graph_edges WHERE from_id = ?1 OR to_id = ?1",
414 params![node_id],
415 )
416 .map_err(storage_err)?;
417 }
418 for path in &affected_file_paths {
419 tx.execute(
420 "DELETE FROM graph_edges WHERE source_file = ?1",
421 params![path],
422 )
423 .map_err(storage_err)?;
424 }
425 for node_id in &affected_node_ids {
426 tx.execute("DELETE FROM graph_nodes WHERE id = ?1", params![node_id])
427 .map_err(storage_err)?;
428 }
429 for file_id in &affected_file_ids {
430 tx.execute(
431 "DELETE FROM graph_nodes WHERE file_id = ?1",
432 params![&file_id.0],
433 )
434 .map_err(storage_err)?;
435 }
436 for symbol_id in &affected_symbol_ids {
437 tx.execute(
438 "DELETE FROM graph_nodes WHERE symbol_id = ?1",
439 params![&symbol_id.0],
440 )
441 .map_err(storage_err)?;
442 }
443
444 for symbol_id in &affected_symbol_ids {
445 tx.execute(
446 "DELETE FROM occurrences WHERE symbol_id = ?1",
447 params![&symbol_id.0],
448 )
449 .map_err(storage_err)?;
450 }
451 for file_id in &affected_file_ids {
452 tx.execute(
453 "DELETE FROM occurrences WHERE file_id = ?1",
454 params![&file_id.0],
455 )
456 .map_err(storage_err)?;
457 tx.execute(
458 "DELETE FROM analysis_facts WHERE file_id = ?1",
459 params![&file_id.0],
460 )
461 .map_err(storage_err)?;
462 tx.execute(
463 "DELETE FROM imports WHERE file_id = ?1",
464 params![&file_id.0],
465 )
466 .map_err(storage_err)?;
467 tx.execute("DELETE FROM tests WHERE file_id = ?1", params![&file_id.0])
468 .map_err(storage_err)?;
469 tx.execute("DELETE FROM chunks WHERE file_id = ?1", params![&file_id.0])
470 .map_err(storage_err)?;
471 tx.execute(
472 "DELETE FROM symbols WHERE file_id = ?1",
473 params![&file_id.0],
474 )
475 .map_err(storage_err)?;
476 tx.execute("DELETE FROM files WHERE id = ?1", params![&file_id.0])
477 .map_err(storage_err)?;
478 }
479
480 insert_index_rows(
481 &tx,
482 IndexRows {
483 files: update.changed_files,
484 symbols: update.symbols,
485 chunks: update.chunks,
486 tests: update.tests,
487 imports: update.imports,
488 occurrences: update.occurrences,
489 analysis_facts: update.analysis_facts,
490 },
491 )?;
492 insert_graph_rows(&tx, update.graph_nodes, update.graph_edges)?;
493 tx.commit().map_err(storage_err)?;
494 Ok(())
495 }
496
497 fn list_files(&self, limit: usize, offset: usize) -> Result<Vec<File>> {
498 let conn = self
499 .connection
500 .lock()
501 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
502 let mut stmt = conn
503 .prepare("SELECT json FROM files ORDER BY path LIMIT ?1 OFFSET ?2")
504 .map_err(storage_err)?;
505 let rows = stmt
506 .query_map(params![limit as i64, offset as i64], |row| {
507 row.get::<_, String>(0)
508 })
509 .map_err(storage_err)?;
510 collect_json(rows)
511 }
512
513 fn get_file_by_path(&self, path: &Path) -> Result<Option<File>> {
514 let conn = self
515 .connection
516 .lock()
517 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
518 let raw: Option<String> = conn
519 .query_row(
520 "SELECT json FROM files WHERE path = ?1",
521 params![path.to_string_lossy().as_ref()],
522 |row| row.get(0),
523 )
524 .optional()
525 .map_err(storage_err)?;
526 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
527 .transpose()
528 }
529
530 fn list_symbols(
531 &self,
532 query: Option<&str>,
533 limit: usize,
534 offset: usize,
535 ) -> Result<Vec<Symbol>> {
536 let conn = self
537 .connection
538 .lock()
539 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
540 let pattern = format!("%{}%", query.unwrap_or_default());
541 let mut stmt = conn
542 .prepare(
543 "SELECT json FROM symbols WHERE (?1 = '%%' OR name LIKE ?1 COLLATE NOCASE OR qualified_name LIKE ?1 COLLATE NOCASE) ORDER BY qualified_name LIMIT ?2 OFFSET ?3",
544 )
545 .map_err(storage_err)?;
546 let rows = stmt
547 .query_map(params![pattern, limit as i64, offset as i64], |row| {
548 row.get::<_, String>(0)
549 })
550 .map_err(storage_err)?;
551 collect_json(rows)
552 }
553
554 fn symbol_by_id(&self, id: &SymbolId) -> Result<Option<Symbol>> {
555 let conn = self
556 .connection
557 .lock()
558 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
559 let raw: Option<String> = conn
560 .query_row(
561 "SELECT json FROM symbols WHERE id = ?1",
562 params![&id.0],
563 |row| row.get(0),
564 )
565 .optional()
566 .map_err(storage_err)?;
567 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
568 .transpose()
569 }
570
571 fn chunks_for_file(&self, file_id: &FileId) -> Result<Vec<CodeChunk>> {
572 let conn = self
573 .connection
574 .lock()
575 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
576 let mut stmt = conn
577 .prepare("SELECT json FROM chunks WHERE file_id = ?1 ORDER BY start_line")
578 .map_err(storage_err)?;
579 let rows = stmt
580 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
581 .map_err(storage_err)?;
582 collect_json(rows)
583 }
584
585 fn all_chunks(&self) -> Result<Vec<CodeChunk>> {
586 let conn = self
587 .connection
588 .lock()
589 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
590 let mut stmt = conn
591 .prepare("SELECT json FROM chunks ORDER BY file_id, start_line")
592 .map_err(storage_err)?;
593 let rows = stmt
594 .query_map([], |row| row.get::<_, String>(0))
595 .map_err(storage_err)?;
596 collect_json(rows)
597 }
598
599 fn tests(&self) -> Result<Vec<TestTarget>> {
600 let conn = self
601 .connection
602 .lock()
603 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
604 let mut stmt = conn
605 .prepare("SELECT json FROM tests ORDER BY file_id")
606 .map_err(storage_err)?;
607 let rows = stmt
608 .query_map([], |row| row.get::<_, String>(0))
609 .map_err(storage_err)?;
610 collect_json(rows)
611 }
612
613 fn imports(&self) -> Result<Vec<Import>> {
614 let conn = self
615 .connection
616 .lock()
617 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
618 let mut stmt = conn
619 .prepare("SELECT json FROM imports ORDER BY file_id")
620 .map_err(storage_err)?;
621 let rows = stmt
622 .query_map([], |row| row.get::<_, String>(0))
623 .map_err(storage_err)?;
624 collect_json(rows)
625 }
626
627 fn analysis_facts(
628 &self,
629 source_type: Option<EvidenceSourceType>,
630 limit: usize,
631 ) -> Result<Vec<AnalysisFact>> {
632 let conn = self
633 .connection
634 .lock()
635 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
636 let limit = limit.min(i64::MAX as usize) as i64;
637 let rows = if let Some(source_type) = source_type {
638 let mut stmt = conn
639 .prepare(
640 "SELECT json FROM analysis_facts WHERE source_type = ?1 ORDER BY file_id, target LIMIT ?2",
641 )
642 .map_err(storage_err)?;
643 let rows = stmt
644 .query_map(params![source_type_name(&source_type), limit], |row| {
645 row.get::<_, String>(0)
646 })
647 .map_err(storage_err)?;
648 collect_json(rows)?
649 } else {
650 let mut stmt = conn
651 .prepare("SELECT json FROM analysis_facts ORDER BY file_id, target LIMIT ?1")
652 .map_err(storage_err)?;
653 let rows = stmt
654 .query_map(params![limit], |row| row.get::<_, String>(0))
655 .map_err(storage_err)?;
656 collect_json(rows)?
657 };
658 Ok(rows)
659 }
660
661 fn references_for_symbol(&self, id: &SymbolId, limit: usize) -> Result<Vec<SymbolOccurrence>> {
662 let conn = self
663 .connection
664 .lock()
665 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
666 let mut stmt = conn
667 .prepare(
668 "SELECT json FROM occurrences WHERE symbol_id = ?1 AND is_definition = 0 ORDER BY file_id LIMIT ?2",
669 )
670 .map_err(storage_err)?;
671 let rows = stmt
672 .query_map(params![&id.0, limit as i64], |row| row.get::<_, String>(0))
673 .map_err(storage_err)?;
674 collect_json(rows)
675 }
676
677 fn occurrences_for_file(&self, file_id: &FileId) -> Result<Vec<SymbolOccurrence>> {
678 let conn = self
679 .connection
680 .lock()
681 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
682 let mut stmt = conn
683 .prepare("SELECT json FROM occurrences WHERE file_id = ?1 ORDER BY symbol_id")
684 .map_err(storage_err)?;
685 let rows = stmt
686 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
687 .map_err(storage_err)?;
688 collect_json(rows)
689 }
690
691 fn symbols_for_file(&self, file_id: &FileId) -> Result<Vec<Symbol>> {
692 let conn = self
693 .connection
694 .lock()
695 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
696 let mut stmt = conn
697 .prepare("SELECT json FROM symbols WHERE file_id = ?1 ORDER BY name")
698 .map_err(storage_err)?;
699 let rows = stmt
700 .query_map(params![&file_id.0], |row| row.get::<_, String>(0))
701 .map_err(storage_err)?;
702 collect_json(rows)
703 }
704
705 fn find_chunks_containing(&self, query: &str, limit: usize) -> Result<Vec<CodeChunk>> {
706 let conn = self
707 .connection
708 .lock()
709 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
710 let pattern = format!("%{}%", query);
711 let mut stmt = conn
712 .prepare("SELECT json FROM chunks WHERE text LIKE ?1 LIMIT ?2")
713 .map_err(storage_err)?;
714 let rows = stmt
715 .query_map(params![pattern, limit as i64], |row| {
716 row.get::<_, String>(0)
717 })
718 .map_err(storage_err)?;
719 collect_json(rows)
720 }
721
722 fn find_files_by_path_pattern(&self, pattern: &str) -> Result<Vec<File>> {
723 let conn = self
724 .connection
725 .lock()
726 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
727 let match_pat = format!("%{}%", pattern);
728 let mut stmt = conn
729 .prepare("SELECT json FROM files WHERE path LIKE ?1 COLLATE NOCASE")
730 .map_err(storage_err)?;
731 let rows = stmt
732 .query_map(params![match_pat], |row| row.get::<_, String>(0))
733 .map_err(storage_err)?;
734 collect_json(rows)
735 }
736
737 fn tests_for_files(&self, file_ids: &[FileId]) -> Result<Vec<TestTarget>> {
738 if file_ids.is_empty() {
739 return Ok(Vec::new());
740 }
741 let conn = self
742 .connection
743 .lock()
744 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
745
746 let placeholders = file_ids.iter().map(|_| "?").collect::<Vec<_>>().join(",");
747 let sql = format!("SELECT json FROM tests WHERE file_id IN ({})", placeholders);
748 let mut stmt = conn.prepare(&sql).map_err(storage_err)?;
749
750 let params = rusqlite::params_from_iter(file_ids.iter().map(|id| &id.0));
751 let rows = stmt
752 .query_map(params, |row| row.get::<_, String>(0))
753 .map_err(storage_err)?;
754 collect_json(rows)
755 }
756}
757
758impl HistoryStore for SqliteStore {
759 fn put_history_snapshot(&self, snapshot: &HistorySnapshot) -> Result<()> {
760 validate_history_snapshot(snapshot)?;
761 let mut conn = self
762 .connection
763 .lock()
764 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
765 let tx = conn.transaction().map_err(storage_err)?;
766
767 tx.execute("DELETE FROM git_review_events", [])
768 .map_err(storage_err)?;
769 tx.execute("DELETE FROM git_cochange_edges", [])
770 .map_err(storage_err)?;
771 tx.execute("DELETE FROM git_symbol_touches", [])
772 .map_err(storage_err)?;
773 tx.execute("DELETE FROM git_file_touches", [])
774 .map_err(storage_err)?;
775 tx.execute("DELETE FROM git_commits", [])
776 .map_err(storage_err)?;
777
778 for commit in &snapshot.commits {
779 tx.execute(
780 "INSERT INTO git_commits(id, authored_at, committed_at, author_email, json) VALUES(?1, ?2, ?3, ?4, ?5)",
781 params![
782 &commit.id.0,
783 commit.authored_at.to_rfc3339(),
784 commit.committed_at.to_rfc3339(),
785 commit.author.email.as_deref(),
786 serde_json::to_string(commit)?,
787 ],
788 )
789 .map_err(storage_err)?;
790 }
791 for touch in &snapshot.file_touches {
792 tx.execute(
793 "INSERT INTO git_file_touches(id, commit_id, path, previous_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
794 params![
795 &touch.id.0,
796 &touch.commit_id.0,
797 history_path(&touch.path)?,
798 touch
799 .previous_path
800 .as_deref()
801 .map(history_path)
802 .transpose()?,
803 touch.touched_at.to_rfc3339(),
804 serde_json::to_string(touch)?,
805 ],
806 )
807 .map_err(storage_err)?;
808 }
809 for touch in &snapshot.symbol_touches {
810 tx.execute(
811 "INSERT INTO git_symbol_touches(id, commit_id, symbol_id, qualified_name, file_path, touched_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
812 params![
813 &touch.id.0,
814 &touch.commit_id.0,
815 touch.symbol_id.as_ref().map(|id| id.0.as_str()),
816 &touch.qualified_name,
817 history_path(&touch.file_path)?,
818 touch.touched_at.to_rfc3339(),
819 serde_json::to_string(touch)?,
820 ],
821 )
822 .map_err(storage_err)?;
823 }
824 for edge in &snapshot.cochange_edges {
825 tx.execute(
826 "INSERT INTO git_cochange_edges(id, path, cochanged_path, commit_count, recency_weight, last_changed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7)",
827 params![
828 &edge.id.0,
829 history_path(&edge.path)?,
830 history_path(&edge.cochanged_path)?,
831 usize_to_i64(edge.commit_count, "co-change commit count")?,
832 edge.recency_weight,
833 edge.last_changed_at.map(|value| value.to_rfc3339()),
834 serde_json::to_string(edge)?,
835 ],
836 )
837 .map_err(storage_err)?;
838 }
839 for evidence in &snapshot.reviewer_evidence {
840 let reviewer_identity = evidence
841 .reviewer
842 .email
843 .as_deref()
844 .unwrap_or(&evidence.reviewer.name);
845 tx.execute(
846 "INSERT INTO git_review_events(id, commit_id, path, reviewer_identity, observed_at, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
847 params![
848 &evidence.id.0,
849 evidence.commit_id.as_ref().map(|id| id.0.as_str()),
850 evidence.path.as_deref().map(history_path).transpose()?,
851 reviewer_identity,
852 evidence.observed_at.to_rfc3339(),
853 serde_json::to_string(evidence)?,
854 ],
855 )
856 .map_err(storage_err)?;
857 }
858
859 tx.commit().map_err(storage_err)?;
860 Ok(())
861 }
862
863 fn history_for_file(&self, path: &Path, limit: usize) -> Result<HistorySummary> {
864 let normalized_path = history_path(path)?;
865 if limit == 0 {
866 return Ok(HistorySummary {
867 path: path.to_path_buf(),
868 recent_commits: Vec::new(),
869 file_touches: Vec::new(),
870 symbol_touches: Vec::new(),
871 cochange_neighbors: Vec::new(),
872 reviewer_evidence: Vec::new(),
873 truncated: false,
874 uncertainty: vec!["history query limit is zero".into()],
875 });
876 }
877
878 let conn = self
879 .connection
880 .lock()
881 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
882 let query_limit = history_query_limit(limit);
883
884 let mut commit_stmt = conn
885 .prepare(
886 "SELECT c.json FROM git_commits c
887 WHERE EXISTS (
888 SELECT 1 FROM git_file_touches t
889 WHERE t.commit_id = c.id AND (t.path = ?1 OR t.previous_path = ?1)
890 )
891 ORDER BY c.committed_at DESC, c.id
892 LIMIT ?2",
893 )
894 .map_err(storage_err)?;
895 let commit_rows = commit_stmt
896 .query_map(params![&normalized_path, query_limit], |row| {
897 row.get::<_, String>(0)
898 })
899 .map_err(storage_err)?;
900 let (recent_commits, commits_truncated) = collect_limited_json(commit_rows, limit)?;
901
902 let mut file_touch_stmt = conn
903 .prepare(
904 "SELECT json FROM git_file_touches
905 WHERE path = ?1 OR previous_path = ?1
906 ORDER BY touched_at DESC, id
907 LIMIT ?2",
908 )
909 .map_err(storage_err)?;
910 let file_touch_rows = file_touch_stmt
911 .query_map(params![&normalized_path, query_limit], |row| {
912 row.get::<_, String>(0)
913 })
914 .map_err(storage_err)?;
915 let (file_touches, file_touches_truncated) = collect_limited_json(file_touch_rows, limit)?;
916
917 let mut symbol_touch_stmt = conn
918 .prepare(
919 "SELECT json FROM git_symbol_touches
920 WHERE file_path = ?1
921 ORDER BY touched_at DESC, id
922 LIMIT ?2",
923 )
924 .map_err(storage_err)?;
925 let symbol_touch_rows = symbol_touch_stmt
926 .query_map(params![&normalized_path, query_limit], |row| {
927 row.get::<_, String>(0)
928 })
929 .map_err(storage_err)?;
930 let (symbol_touches, symbol_touches_truncated) =
931 collect_limited_json(symbol_touch_rows, limit)?;
932
933 let mut cochange_stmt = conn
934 .prepare(
935 "SELECT json FROM git_cochange_edges
936 WHERE path = ?1
937 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
938 LIMIT ?2",
939 )
940 .map_err(storage_err)?;
941 let cochange_rows = cochange_stmt
942 .query_map(params![&normalized_path, query_limit], |row| {
943 row.get::<_, String>(0)
944 })
945 .map_err(storage_err)?;
946 let (cochange_neighbors, cochange_truncated) = collect_limited_json(cochange_rows, limit)?;
947
948 let mut reviewer_stmt = conn
949 .prepare(
950 "SELECT e.json FROM git_review_events e
951 WHERE e.path = ?1
952 OR (
953 e.path IS NULL
954 AND e.commit_id IN (
955 SELECT t.commit_id FROM git_file_touches t
956 WHERE t.path = ?1 OR t.previous_path = ?1
957 )
958 )
959 ORDER BY e.observed_at DESC, e.id
960 LIMIT ?2",
961 )
962 .map_err(storage_err)?;
963 let reviewer_rows = reviewer_stmt
964 .query_map(params![&normalized_path, query_limit], |row| {
965 row.get::<_, String>(0)
966 })
967 .map_err(storage_err)?;
968 let (reviewer_evidence, reviewers_truncated) = collect_limited_json(reviewer_rows, limit)?;
969
970 let truncated = commits_truncated
971 || file_touches_truncated
972 || symbol_touches_truncated
973 || cochange_truncated
974 || reviewers_truncated;
975 let mut uncertainty = Vec::new();
976 if recent_commits.is_empty()
977 && file_touches.is_empty()
978 && symbol_touches.is_empty()
979 && cochange_neighbors.is_empty()
980 && reviewer_evidence.is_empty()
981 {
982 uncertainty.push("no persisted history evidence is available for this path".into());
983 } else {
984 if symbol_touches.is_empty() {
985 uncertainty.push("no symbol-level history is stored for this path".into());
986 }
987 if reviewer_evidence.is_empty() {
988 uncertainty.push("no reviewer or owner evidence is stored for this path".into());
989 }
990 }
991 if truncated {
992 uncertainty.push(format!(
993 "history results are truncated to {limit} records per category"
994 ));
995 }
996
997 Ok(HistorySummary {
998 path: path.to_path_buf(),
999 recent_commits,
1000 file_touches,
1001 symbol_touches,
1002 cochange_neighbors,
1003 reviewer_evidence,
1004 truncated,
1005 uncertainty,
1006 })
1007 }
1008
1009 fn provenance_for_path(&self, path: &Path, limit: usize) -> Result<FileProvenance> {
1010 let normalized_path = history_path(path)?;
1011 if limit == 0 {
1012 return Ok(FileProvenance {
1013 path: path.to_path_buf(),
1014 first_seen: None,
1015 last_touched: None,
1016 recent_touches: Vec::new(),
1017 confidence: Confidence::Low,
1018 truncated: false,
1019 uncertainty: vec!["provenance query limit is zero".into()],
1020 });
1021 }
1022
1023 let conn = self
1024 .connection
1025 .lock()
1026 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1027 let query_limit = history_query_limit(limit);
1028 let aliases = "
1029 WITH RECURSIVE aliases(path) AS (
1030 SELECT ?1
1031 UNION
1032 SELECT t.previous_path
1033 FROM git_file_touches t JOIN aliases a ON t.path = a.path
1034 WHERE t.previous_path IS NOT NULL
1035 UNION
1036 SELECT t.path
1037 FROM git_file_touches t JOIN aliases a ON t.previous_path = a.path
1038 )";
1039 let recent_sql = format!(
1040 "{aliases}
1041 SELECT DISTINCT t.json, c.json
1042 FROM git_file_touches t
1043 JOIN git_commits c ON c.id = t.commit_id
1044 WHERE t.path IN aliases OR t.previous_path IN aliases
1045 ORDER BY t.touched_at DESC, t.id
1046 LIMIT ?2"
1047 );
1048 let mut recent_stmt = conn.prepare(&recent_sql).map_err(storage_err)?;
1049 let rows = recent_stmt
1050 .query_map(params![&normalized_path, query_limit], |row| {
1051 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1052 })
1053 .map_err(storage_err)?;
1054 let mut recent_touches = collect_provenance_rows(rows, file_provenance_touch)?;
1055 let truncated = recent_touches.len() > limit;
1056 recent_touches.truncate(limit);
1057
1058 let first_sql = format!(
1059 "{aliases}
1060 SELECT DISTINCT t.json, c.json
1061 FROM git_file_touches t
1062 JOIN git_commits c ON c.id = t.commit_id
1063 WHERE t.path IN aliases OR t.previous_path IN aliases
1064 ORDER BY t.touched_at ASC, t.id
1065 LIMIT 1"
1066 );
1067 let first_seen = conn
1068 .query_row(&first_sql, params![&normalized_path], |row| {
1069 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1070 })
1071 .optional()
1072 .map_err(storage_err)?
1073 .map(|(touch, commit)| file_provenance_touch(&touch, &commit))
1074 .transpose()?;
1075 let last_touched = recent_touches.first().cloned();
1076 let mut uncertainty = Vec::new();
1077 if first_seen.is_none() {
1078 uncertainty.push("no persisted commit provenance is available for this path".into());
1079 } else if first_seen
1080 .as_ref()
1081 .is_some_and(|touch| touch.change_kind != open_kioku_core::GitChangeKind::Added)
1082 {
1083 uncertainty.push(
1084 "first_seen is the earliest persisted touch in the configured local history window, not a proven file-creation commit"
1085 .into(),
1086 );
1087 }
1088 if truncated {
1089 uncertainty.push(format!(
1090 "recent provenance is truncated to {limit} touch records"
1091 ));
1092 }
1093
1094 let confidence = if uncertainty.is_empty() {
1095 Confidence::Exact
1096 } else if last_touched.is_some() {
1097 Confidence::High
1098 } else {
1099 Confidence::Low
1100 };
1101 Ok(FileProvenance {
1102 path: path.to_path_buf(),
1103 first_seen,
1104 last_touched,
1105 recent_touches,
1106 confidence,
1107 truncated,
1108 uncertainty,
1109 })
1110 }
1111
1112 fn provenance_for_symbol(
1113 &self,
1114 symbol_id: &SymbolId,
1115 limit: usize,
1116 ) -> Result<SymbolProvenance> {
1117 let conn = self
1118 .connection
1119 .lock()
1120 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1121 let symbol_json: Option<String> = conn
1122 .query_row(
1123 "SELECT json FROM symbols WHERE id = ?1",
1124 params![&symbol_id.0],
1125 |row| row.get(0),
1126 )
1127 .optional()
1128 .map_err(storage_err)?;
1129 let Some(symbol_json) = symbol_json else {
1130 return Err(OkError::SymbolNotFound(symbol_id.0.clone()));
1131 };
1132 let symbol: Symbol = serde_json::from_str(&symbol_json)?;
1133 let file_path: String = conn
1134 .query_row(
1135 "SELECT path FROM files WHERE id = ?1",
1136 params![&symbol.file_id.0],
1137 |row| row.get(0),
1138 )
1139 .map_err(storage_err)?;
1140 if limit == 0 {
1141 return Ok(SymbolProvenance {
1142 symbol_id: symbol.id,
1143 qualified_name: symbol.qualified_name,
1144 file_path: PathBuf::from(file_path),
1145 range: symbol.range,
1146 first_seen: None,
1147 last_touched: None,
1148 recent_touches: Vec::new(),
1149 confidence: Confidence::Low,
1150 truncated: false,
1151 uncertainty: vec!["provenance query limit is zero".into()],
1152 });
1153 }
1154
1155 let query_limit = history_query_limit(limit);
1156 let mut recent_stmt = conn
1157 .prepare(
1158 "SELECT t.json, c.json
1159 FROM git_symbol_touches t
1160 JOIN git_commits c ON c.id = t.commit_id
1161 WHERE t.symbol_id = ?1
1162 ORDER BY t.touched_at DESC, t.id
1163 LIMIT ?2",
1164 )
1165 .map_err(storage_err)?;
1166 let rows = recent_stmt
1167 .query_map(params![&symbol_id.0, query_limit], |row| {
1168 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1169 })
1170 .map_err(storage_err)?;
1171 let mut recent_touches = collect_provenance_rows(rows, symbol_provenance_touch)?;
1172 let truncated = recent_touches.len() > limit;
1173 recent_touches.truncate(limit);
1174 let first_seen = conn
1175 .query_row(
1176 "SELECT t.json, c.json
1177 FROM git_symbol_touches t
1178 JOIN git_commits c ON c.id = t.commit_id
1179 WHERE t.symbol_id = ?1
1180 ORDER BY t.touched_at ASC, t.id
1181 LIMIT 1",
1182 params![&symbol_id.0],
1183 |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)),
1184 )
1185 .optional()
1186 .map_err(storage_err)?
1187 .map(|(touch, commit)| symbol_provenance_touch(&touch, &commit))
1188 .transpose()?;
1189 let last_touched = recent_touches.first().cloned();
1190 let mut uncertainty = recent_touches
1191 .iter()
1192 .flat_map(|touch| touch.uncertainty.clone())
1193 .collect::<Vec<_>>();
1194 if let Some(first_seen) = &first_seen {
1195 uncertainty.extend(first_seen.uncertainty.clone());
1196 uncertainty.push(
1197 "first_seen is the earliest line-mapped touch in the configured local history window; it may not be the symbol-introduction commit"
1198 .into(),
1199 );
1200 } else {
1201 uncertainty
1202 .push("no persisted line-level commit mapping is available for this symbol".into());
1203 }
1204 if symbol.range.is_none() {
1205 uncertainty.push(
1206 "the indexed symbol has no line range, so commit hunks cannot be mapped".into(),
1207 );
1208 }
1209 if truncated {
1210 uncertainty.push(format!(
1211 "recent provenance is truncated to {limit} touch records"
1212 ));
1213 }
1214 uncertainty.sort();
1215 uncertainty.dedup();
1216 let confidence = recent_touches
1217 .iter()
1218 .map(|touch| touch.confidence)
1219 .chain(first_seen.iter().map(|touch| touch.confidence))
1220 .reduce(lower_history_confidence)
1221 .unwrap_or(Confidence::Low);
1222
1223 Ok(SymbolProvenance {
1224 symbol_id: symbol.id,
1225 qualified_name: symbol.qualified_name,
1226 file_path: PathBuf::from(file_path),
1227 range: symbol.range,
1228 first_seen,
1229 last_touched,
1230 recent_touches,
1231 confidence,
1232 truncated,
1233 uncertainty,
1234 })
1235 }
1236
1237 fn cochange_neighbors(&self, path: &Path, limit: usize) -> Result<Vec<GitCochangeEdge>> {
1238 if limit == 0 {
1239 return Ok(Vec::new());
1240 }
1241 let normalized_path = history_path(path)?;
1242 let conn = self
1243 .connection
1244 .lock()
1245 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1246 let mut stmt = conn
1247 .prepare(
1248 "SELECT json FROM git_cochange_edges
1249 WHERE path = ?1
1250 ORDER BY recency_weight DESC, commit_count DESC, cochanged_path
1251 LIMIT ?2",
1252 )
1253 .map_err(storage_err)?;
1254 let rows = stmt
1255 .query_map(
1256 params![normalized_path, limit.min(i64::MAX as usize) as i64],
1257 |row| row.get::<_, String>(0),
1258 )
1259 .map_err(storage_err)?;
1260 collect_json(rows)
1261 }
1262
1263 fn recent_commits(&self, limit: usize) -> Result<Vec<GitCommitRecord>> {
1264 if limit == 0 {
1265 return Ok(Vec::new());
1266 }
1267 let conn = self
1268 .connection
1269 .lock()
1270 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1271 let mut stmt = conn
1272 .prepare("SELECT json FROM git_commits ORDER BY committed_at DESC, id LIMIT ?1")
1273 .map_err(storage_err)?;
1274 let rows = stmt
1275 .query_map(params![limit.min(i64::MAX as usize) as i64], |row| {
1276 row.get::<_, String>(0)
1277 })
1278 .map_err(storage_err)?;
1279 collect_json(rows)
1280 }
1281}
1282
1283fn collect_provenance_rows<F>(
1284 rows: rusqlite::MappedRows<'_, F>,
1285 decode: fn(&str, &str) -> Result<ProvenanceTouch>,
1286) -> Result<Vec<ProvenanceTouch>>
1287where
1288 F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<(String, String)>,
1289{
1290 let mut touches = Vec::new();
1291 for row in rows {
1292 let (touch, commit) = row.map_err(storage_err)?;
1293 touches.push(decode(&touch, &commit)?);
1294 }
1295 Ok(touches)
1296}
1297
1298fn file_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1299 let touch: GitFileTouch = serde_json::from_str(touch)?;
1300 let commit: GitCommitRecord = serde_json::from_str(commit)?;
1301 Ok(ProvenanceTouch {
1302 commit,
1303 path: touch.path,
1304 previous_path: touch.previous_path,
1305 symbol_id: None,
1306 qualified_name: None,
1307 change_kind: touch.change_kind,
1308 line_ranges: Vec::new(),
1309 confidence: Confidence::Exact,
1310 uncertainty: Vec::new(),
1311 })
1312}
1313
1314fn symbol_provenance_touch(touch: &str, commit: &str) -> Result<ProvenanceTouch> {
1315 let touch: GitSymbolTouch = serde_json::from_str(touch)?;
1316 let commit: GitCommitRecord = serde_json::from_str(commit)?;
1317 Ok(ProvenanceTouch {
1318 commit,
1319 path: touch.file_path,
1320 previous_path: None,
1321 symbol_id: touch.symbol_id,
1322 qualified_name: Some(touch.qualified_name),
1323 change_kind: touch.change_kind,
1324 line_ranges: touch.line_ranges,
1325 confidence: touch.confidence,
1326 uncertainty: touch.uncertainty,
1327 })
1328}
1329
1330fn lower_history_confidence(left: Confidence, right: Confidence) -> Confidence {
1331 if history_confidence_rank(left) <= history_confidence_rank(right) {
1332 left
1333 } else {
1334 right
1335 }
1336}
1337
1338fn history_confidence_rank(confidence: Confidence) -> u8 {
1339 match confidence {
1340 Confidence::Low => 0,
1341 Confidence::Medium => 1,
1342 Confidence::High => 2,
1343 Confidence::Exact => 3,
1344 }
1345}
1346const DEFAULT_GRAPH_QUERY_LIMIT: usize = 100;
1347const MAX_GRAPH_QUERY_LIMIT: usize = 1_000;
1348
1349struct IndexRows<'a> {
1350 files: &'a [File],
1351 symbols: &'a [Symbol],
1352 chunks: &'a [CodeChunk],
1353 tests: &'a [TestTarget],
1354 imports: &'a [Import],
1355 occurrences: &'a [SymbolOccurrence],
1356 analysis_facts: &'a [AnalysisFact],
1357}
1358
1359fn insert_index_rows(tx: &Transaction<'_>, rows: IndexRows<'_>) -> Result<()> {
1360 for file in rows.files {
1361 tx.execute(
1362 "INSERT INTO files(id, path, json) VALUES(?1, ?2, ?3)",
1363 params![
1364 &file.id.0,
1365 file.path.to_string_lossy().as_ref(),
1366 serde_json::to_string(file)?
1367 ],
1368 )
1369 .map_err(storage_err)?;
1370 }
1371 for symbol in rows.symbols {
1372 tx.execute(
1373 "INSERT INTO symbols(id, name, qualified_name, file_id, json) VALUES(?1, ?2, ?3, ?4, ?5)",
1374 params![
1375 &symbol.id.0,
1376 &symbol.name,
1377 &symbol.qualified_name,
1378 &symbol.file_id.0,
1379 serde_json::to_string(symbol)?
1380 ],
1381 )
1382 .map_err(storage_err)?;
1383 }
1384 for chunk in rows.chunks {
1385 tx.execute(
1386 "INSERT INTO chunks(id, file_id, start_line, end_line, text, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
1387 params![
1388 &chunk.id,
1389 &chunk.file_id.0,
1390 chunk.range.start,
1391 chunk.range.end,
1392 &chunk.text,
1393 serde_json::to_string(chunk)?
1394 ],
1395 )
1396 .map_err(storage_err)?;
1397 }
1398 for test in rows.tests {
1399 tx.execute(
1400 "INSERT INTO tests(id, file_id, json) VALUES(?1, ?2, ?3) ON CONFLICT(id) DO UPDATE SET json = excluded.json",
1401 params![&test.id, &test.file_id.0, serde_json::to_string(test)?],
1402 )
1403 .map_err(storage_err)?;
1404 }
1405 for import in rows.imports {
1406 tx.execute(
1407 "INSERT INTO imports(id, file_id, imported, json) VALUES(?1, ?2, ?3, ?4)",
1408 params![
1409 occurrence_id(
1410 &import.file_id.0,
1411 &import.imported,
1412 import.range.as_ref().map(|range| range.start),
1413 true
1414 ),
1415 &import.file_id.0,
1416 &import.imported,
1417 serde_json::to_string(import)?
1418 ],
1419 )
1420 .map_err(storage_err)?;
1421 }
1422 for occurrence in rows.occurrences {
1423 tx.execute(
1424 "INSERT INTO occurrences(id, symbol_id, file_id, is_definition, json) VALUES(?1, ?2, ?3, ?4, ?5)",
1425 params![
1426 occurrence_id(
1427 &occurrence.file_id.0,
1428 &occurrence.symbol_id.0,
1429 occurrence.range.as_ref().map(|range| range.start),
1430 occurrence.is_definition,
1431 ),
1432 &occurrence.symbol_id.0,
1433 &occurrence.file_id.0,
1434 if occurrence.is_definition { 1 } else { 0 },
1435 serde_json::to_string(occurrence)?
1436 ],
1437 )
1438 .map_err(storage_err)?;
1439 }
1440 for fact in rows.analysis_facts {
1441 tx.execute(
1442 "INSERT INTO analysis_facts(id, file_id, source_type, target, json) VALUES(?1, ?2, ?3, ?4, ?5)",
1443 params![
1444 &fact.id,
1445 &fact.file_id.0,
1446 source_type_name(&fact.source_type),
1447 &fact.target,
1448 serde_json::to_string(fact)?
1449 ],
1450 )
1451 .map_err(storage_err)?;
1452 }
1453 Ok(())
1454}
1455
1456fn insert_graph_rows(tx: &Transaction<'_>, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
1457 for node in nodes {
1458 let evidence_available = node.file_id.is_some() || node.symbol_id.is_some();
1459 tx.execute(
1460 "INSERT INTO graph_nodes(id, label, node_type, file_id, symbol_id, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
1461 params![
1462 &node.id.0,
1463 &node.label,
1464 format!("{:?}", node.node_type),
1465 node.file_id.as_ref().map(|f| &f.0),
1466 node.symbol_id.as_ref().map(|s| &s.0),
1467 evidence_available,
1468 0_i64,
1469 serde_json::to_string(node)?
1470 ],
1471 )
1472 .map_err(storage_err)?;
1473 }
1474 for edge in edges {
1475 let freshness = edge.evidence.indexed_at.timestamp();
1476 tx.execute(
1477 "INSERT INTO graph_edges(id, from_id, to_id, edge_type, confidence, source_type, source_file, evidence_available, freshness, json) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10)",
1478 params![
1479 &edge.id.0,
1480 &edge.from.0,
1481 &edge.to.0,
1482 format!("{:?}", edge.edge_type),
1483 format!("{:?}", edge.evidence.confidence),
1484 format!("{:?}", edge.evidence.source_type),
1485 &edge.evidence.source,
1486 true,
1487 freshness,
1488 serde_json::to_string(edge)?
1489 ],
1490 )
1491 .map_err(storage_err)?;
1492 }
1493 Ok(())
1494}
1495
1496fn clamp_limit(limit: usize) -> usize {
1497 if limit == 0 {
1498 DEFAULT_GRAPH_QUERY_LIMIT
1499 } else {
1500 limit.min(MAX_GRAPH_QUERY_LIMIT)
1501 }
1502}
1503
1504impl GraphStore for SqliteStore {
1505 fn replace_graph(&self, nodes: &[GraphNode], edges: &[GraphEdge]) -> Result<()> {
1506 let mut conn = self
1507 .connection
1508 .lock()
1509 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1510 let tx = conn.transaction().map_err(storage_err)?;
1511 tx.execute("DELETE FROM graph_edges", [])
1512 .map_err(storage_err)?;
1513 tx.execute("DELETE FROM graph_nodes", [])
1514 .map_err(storage_err)?;
1515 insert_graph_rows(&tx, nodes, edges)?;
1516 tx.commit().map_err(storage_err)?;
1517 Ok(())
1518 }
1519
1520 fn node_type_stats(
1521 &self,
1522 ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
1523 let conn = self
1524 .connection
1525 .lock()
1526 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1527 let mut stmt = conn
1528 .prepare("SELECT node_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_nodes GROUP BY node_type")
1529 .map_err(storage_err)?;
1530 let mut rows = stmt.query([]).map_err(storage_err)?;
1531 let mut map = std::collections::HashMap::new();
1532 while let Some(row) = rows.next().map_err(storage_err)? {
1533 let t: String = row.get(0).map_err(storage_err)?;
1534 let c: i64 = row.get(1).map_err(storage_err)?;
1535 let ev: bool = row.get(2).unwrap_or(false);
1536 let fr: Option<i64> = row.get(3).unwrap_or(None);
1537 map.insert(
1538 t,
1539 open_kioku_storage::TypeStats {
1540 count: c as usize,
1541 evidence_available: ev,
1542 freshness: fr.map(|v| v as u64),
1543 },
1544 );
1545 }
1546 Ok(map)
1547 }
1548
1549 fn edge_type_stats(
1550 &self,
1551 ) -> Result<std::collections::HashMap<String, open_kioku_storage::TypeStats>> {
1552 let conn = self
1553 .connection
1554 .lock()
1555 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1556 let mut stmt = conn
1557 .prepare("SELECT edge_type, COUNT(*), MAX(evidence_available), MAX(freshness) FROM graph_edges GROUP BY edge_type")
1558 .map_err(storage_err)?;
1559 let mut rows = stmt.query([]).map_err(storage_err)?;
1560 let mut map = std::collections::HashMap::new();
1561 while let Some(row) = rows.next().map_err(storage_err)? {
1562 let t: String = row.get(0).map_err(storage_err)?;
1563 let c: i64 = row.get(1).map_err(storage_err)?;
1564 let ev: bool = row.get(2).unwrap_or(false);
1565 let fr: Option<i64> = row.get(3).unwrap_or(None);
1566 map.insert(
1567 t,
1568 open_kioku_storage::TypeStats {
1569 count: c as usize,
1570 evidence_available: ev,
1571 freshness: fr.map(|v| v as u64),
1572 },
1573 );
1574 }
1575 Ok(map)
1576 }
1577
1578 fn node_by_id(&self, id: &str) -> Result<Option<GraphNode>> {
1579 let conn = self
1580 .connection
1581 .lock()
1582 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1583 graph_node_by_id(&conn, id)
1584 }
1585
1586 fn neighbors(&self, node: &str, limit: usize) -> Result<(Vec<GraphNode>, Vec<GraphEdge>)> {
1587 let conn = self
1588 .connection
1589 .lock()
1590 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1591 let mut stmt = conn
1592 .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 OR to_id = ?1 LIMIT ?2")
1593 .map_err(storage_err)?;
1594 let rows = stmt
1595 .query_map(params![node, limit as i64], |row| row.get::<_, String>(0))
1596 .map_err(storage_err)?;
1597 let edges: Vec<GraphEdge> = collect_json(rows)?;
1598 let mut ids = edges
1599 .iter()
1600 .flat_map(|edge| [edge.from.0.clone(), edge.to.0.clone()])
1601 .collect::<Vec<_>>();
1602 ids.sort();
1603 ids.dedup();
1604 let mut nodes = Vec::new();
1605 for id in ids {
1606 if let Some(node) = graph_node_by_id(&conn, &id)? {
1607 nodes.push(node);
1608 }
1609 }
1610 Ok((nodes, edges))
1611 }
1612
1613 fn shortest_path(&self, from: &str, to: &str, max_depth: usize) -> Result<Vec<GraphEdge>> {
1614 use std::collections::{HashSet, VecDeque};
1615
1616 let conn = self
1617 .connection
1618 .lock()
1619 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1620
1621 let mut edge_stmt = conn
1624 .prepare("SELECT json FROM graph_edges WHERE from_id = ?1")
1625 .map_err(storage_err)?;
1626
1627 let mut queue = VecDeque::from([(from.to_string(), Vec::<GraphEdge>::new())]);
1628 let mut seen = HashSet::new();
1629 while let Some((node, path)) = queue.pop_front() {
1630 if node == to {
1631 return Ok(path);
1632 }
1633 if path.len() >= max_depth || !seen.insert(node.clone()) {
1634 continue;
1635 }
1636 let rows = edge_stmt
1637 .query_map(params![&node], |row| row.get::<_, String>(0))
1638 .map_err(storage_err)?;
1639 let edges: Vec<GraphEdge> = collect_json(rows)?;
1640 for edge in edges {
1641 let mut next_path = path.clone();
1642 next_path.push(edge.clone());
1643 queue.push_back((edge.to.0.clone(), next_path));
1644 }
1645 }
1646 Ok(Vec::new())
1647 }
1648 fn nodes_by_type(
1649 &self,
1650 node_type: GraphNodeType,
1651 limit: usize,
1652 offset: usize,
1653 ) -> Result<Vec<GraphNode>> {
1654 let conn = self
1655 .connection
1656 .lock()
1657 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1658 let limit = clamp_limit(limit) as i64;
1659 let offset = offset as i64;
1660 let type_str = format!("{:?}", node_type);
1661 let mut stmt = conn
1662 .prepare(
1663 "SELECT json FROM graph_nodes WHERE node_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
1664 )
1665 .map_err(storage_err)?;
1666 let rows = stmt
1667 .query_map(params![type_str, limit, offset], |row| {
1668 row.get::<_, String>(0)
1669 })
1670 .map_err(storage_err)?;
1671 collect_json(rows)
1672 }
1673
1674 fn all_graph_nodes(&self) -> Result<Vec<GraphNode>> {
1675 let conn = self
1676 .connection
1677 .lock()
1678 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1679 let mut stmt = conn
1680 .prepare("SELECT json FROM graph_nodes ORDER BY id")
1681 .map_err(storage_err)?;
1682 let rows = stmt
1683 .query_map([], |row| row.get::<_, String>(0))
1684 .map_err(storage_err)?;
1685 collect_json(rows)
1686 }
1687
1688 fn edges_by_type(
1689 &self,
1690 edge_type: GraphEdgeType,
1691 limit: usize,
1692 offset: usize,
1693 ) -> Result<Vec<GraphEdge>> {
1694 let conn = self
1695 .connection
1696 .lock()
1697 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1698 let limit = clamp_limit(limit) as i64;
1699 let offset = offset as i64;
1700 let type_str = format!("{:?}", edge_type);
1701 let mut stmt = conn
1702 .prepare(
1703 "SELECT json FROM graph_edges WHERE edge_type = ?1 ORDER BY id LIMIT ?2 OFFSET ?3",
1704 )
1705 .map_err(storage_err)?;
1706 let rows = stmt
1707 .query_map(params![type_str, limit, offset], |row| {
1708 row.get::<_, String>(0)
1709 })
1710 .map_err(storage_err)?;
1711 collect_json(rows)
1712 }
1713
1714 fn graph_counts(&self) -> Result<GraphCounts> {
1715 let conn = self
1716 .connection
1717 .lock()
1718 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1719 let nodes: usize = conn
1720 .query_row("SELECT COUNT(*) FROM graph_nodes", [], |row| row.get(0))
1721 .map_err(storage_err)?;
1722 let edges: usize = conn
1723 .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
1724 .map_err(storage_err)?;
1725 Ok(GraphCounts { nodes, edges })
1726 }
1727
1728 fn graph_schema_counts(&self) -> Result<GraphSchemaCounts> {
1729 let conn = self
1730 .connection
1731 .lock()
1732 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1733
1734 let mut node_types = std::collections::BTreeMap::new();
1735 let mut stmt = conn
1736 .prepare("SELECT node_type, COUNT(*) FROM graph_nodes GROUP BY node_type")
1737 .map_err(storage_err)?;
1738 let mut rows = stmt.query([]).map_err(storage_err)?;
1739 while let Some(row) = rows.next().map_err(storage_err)? {
1740 let ntype: String = row.get(0).map_err(storage_err)?;
1741 let count: usize = row.get(1).map_err(storage_err)?;
1742 if !ntype.is_empty() {
1743 node_types.insert(ntype, count);
1744 }
1745 }
1746
1747 let mut edge_types = std::collections::BTreeMap::new();
1748 let mut stmt = conn
1749 .prepare("SELECT edge_type, COUNT(*) FROM graph_edges GROUP BY edge_type")
1750 .map_err(storage_err)?;
1751 let mut rows = stmt.query([]).map_err(storage_err)?;
1752 while let Some(row) = rows.next().map_err(storage_err)? {
1753 let etype: String = row.get(0).map_err(storage_err)?;
1754 let count: usize = row.get(1).map_err(storage_err)?;
1755 if !etype.is_empty() {
1756 edge_types.insert(etype, count);
1757 }
1758 }
1759
1760 Ok(GraphSchemaCounts {
1761 node_types,
1762 edge_types,
1763 })
1764 }
1765
1766 fn graph_edges_between(&self, from: &str, to: &str, limit: usize) -> Result<Vec<GraphEdge>> {
1767 let conn = self
1768 .connection
1769 .lock()
1770 .map_err(|_| OkError::Storage("sqlite mutex poisoned".into()))?;
1771 let limit = clamp_limit(limit) as i64;
1772 let mut stmt = conn
1773 .prepare("SELECT json FROM graph_edges WHERE from_id = ?1 AND to_id = ?2 ORDER BY id LIMIT ?3")
1774 .map_err(storage_err)?;
1775 let rows = stmt
1776 .query_map(params![from, to, limit], |row| row.get::<_, String>(0))
1777 .map_err(storage_err)?;
1778 collect_json(rows)
1779 }
1780}
1781
1782fn is_duplicate_column(err: &rusqlite::Error) -> bool {
1783 if let rusqlite::Error::SqliteFailure(_, Some(msg)) = err {
1784 msg.contains("duplicate column name")
1785 } else {
1786 false
1787 }
1788}
1789
1790fn add_column_if_not_exists(conn: &mut Connection, stmt: &str) -> Result<()> {
1791 match conn.execute(stmt, []) {
1792 Ok(_) => Ok(()),
1793 Err(err) if is_duplicate_column(&err) => Ok(()),
1794 Err(err) => Err(storage_err(err)),
1795 }
1796}
1797
1798fn migrate_graph_schema(conn: &mut Connection) -> Result<()> {
1799 add_column_if_not_exists(
1801 conn,
1802 "ALTER TABLE graph_nodes ADD COLUMN node_type TEXT DEFAULT ''",
1803 )?;
1804 add_column_if_not_exists(
1805 conn,
1806 "ALTER TABLE graph_nodes ADD COLUMN file_id TEXT DEFAULT ''",
1807 )?;
1808 add_column_if_not_exists(
1809 conn,
1810 "ALTER TABLE graph_nodes ADD COLUMN symbol_id TEXT DEFAULT ''",
1811 )?;
1812 add_column_if_not_exists(
1813 conn,
1814 "ALTER TABLE graph_nodes ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
1815 )?;
1816 add_column_if_not_exists(
1817 conn,
1818 "ALTER TABLE graph_nodes ADD COLUMN freshness INTEGER DEFAULT 0",
1819 )?;
1820
1821 add_column_if_not_exists(
1823 conn,
1824 "ALTER TABLE graph_edges ADD COLUMN confidence TEXT DEFAULT ''",
1825 )?;
1826 add_column_if_not_exists(
1827 conn,
1828 "ALTER TABLE graph_edges ADD COLUMN source_type TEXT DEFAULT ''",
1829 )?;
1830 add_column_if_not_exists(
1831 conn,
1832 "ALTER TABLE graph_edges ADD COLUMN source_file TEXT DEFAULT ''",
1833 )?;
1834 add_column_if_not_exists(
1835 conn,
1836 "ALTER TABLE graph_edges ADD COLUMN evidence_available BOOLEAN DEFAULT 0",
1837 )?;
1838 add_column_if_not_exists(
1839 conn,
1840 "ALTER TABLE graph_edges ADD COLUMN freshness INTEGER DEFAULT 0",
1841 )?;
1842
1843 backfill_graph_query_columns(conn)?;
1844
1845 conn.execute(
1847 "CREATE INDEX IF NOT EXISTS idx_graph_nodes_type ON graph_nodes(node_type)",
1848 [],
1849 )
1850 .map_err(storage_err)?;
1851 conn.execute(
1852 "CREATE INDEX IF NOT EXISTS idx_graph_nodes_file ON graph_nodes(file_id)",
1853 [],
1854 )
1855 .map_err(storage_err)?;
1856 conn.execute(
1857 "CREATE INDEX IF NOT EXISTS idx_graph_nodes_symbol ON graph_nodes(symbol_id)",
1858 [],
1859 )
1860 .map_err(storage_err)?;
1861 conn.execute(
1862 "CREATE INDEX IF NOT EXISTS idx_graph_edges_type ON graph_edges(edge_type)",
1863 [],
1864 )
1865 .map_err(storage_err)?;
1866 conn.execute(
1867 "CREATE INDEX IF NOT EXISTS idx_graph_edges_from_type ON graph_edges(from_id, edge_type)",
1868 [],
1869 )
1870 .map_err(storage_err)?;
1871 conn.execute(
1872 "CREATE INDEX IF NOT EXISTS idx_graph_edges_to_type ON graph_edges(to_id, edge_type)",
1873 [],
1874 )
1875 .map_err(storage_err)?;
1876 conn.execute(
1877 "CREATE INDEX IF NOT EXISTS idx_graph_edges_source_type ON graph_edges(source_type)",
1878 [],
1879 )
1880 .map_err(storage_err)?;
1881
1882 let version: i64 = conn
1883 .pragma_query_value(None, "user_version", |row| row.get(0))
1884 .map_err(storage_err)?;
1885 if version < SQLITE_GRAPH_SCHEMA_VERSION {
1886 conn.pragma_update(None, "user_version", SQLITE_GRAPH_SCHEMA_VERSION)
1887 .map_err(storage_err)?;
1888 }
1889
1890 Ok(())
1891}
1892
1893fn backfill_graph_query_columns(conn: &mut Connection) -> Result<()> {
1894 let node_rows = {
1895 let mut stmt = conn
1896 .prepare(
1897 "SELECT id, json FROM graph_nodes
1898 WHERE COALESCE(node_type, '') = ''
1899 OR COALESCE(file_id, '') = ''
1900 OR COALESCE(symbol_id, '') = ''",
1901 )
1902 .map_err(storage_err)?;
1903 let rows = stmt
1904 .query_map([], |row| {
1905 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1906 })
1907 .map_err(storage_err)?;
1908 let mut rows_out = Vec::new();
1909 for row in rows {
1910 rows_out.push(row.map_err(storage_err)?);
1911 }
1912 rows_out
1913 };
1914 for (id, json) in node_rows {
1915 let Ok(node) = serde_json::from_str::<GraphNode>(&json) else {
1916 continue;
1917 };
1918 conn.execute(
1919 "UPDATE graph_nodes
1920 SET node_type = ?1,
1921 file_id = ?2,
1922 symbol_id = ?3,
1923 evidence_available = ?4
1924 WHERE id = ?5",
1925 params![
1926 format!("{:?}", node.node_type),
1927 node.file_id.as_ref().map(|file_id| file_id.0.as_str()),
1928 node.symbol_id
1929 .as_ref()
1930 .map(|symbol_id| symbol_id.0.as_str()),
1931 node.file_id.is_some() || node.symbol_id.is_some(),
1932 id,
1933 ],
1934 )
1935 .map_err(storage_err)?;
1936 }
1937
1938 let edge_rows = {
1939 let mut stmt = conn
1940 .prepare(
1941 "SELECT id, json FROM graph_edges
1942 WHERE COALESCE(edge_type, '') = ''
1943 OR COALESCE(confidence, '') = ''
1944 OR COALESCE(source_type, '') = ''
1945 OR COALESCE(source_file, '') = ''",
1946 )
1947 .map_err(storage_err)?;
1948 let rows = stmt
1949 .query_map([], |row| {
1950 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1951 })
1952 .map_err(storage_err)?;
1953 let mut rows_out = Vec::new();
1954 for row in rows {
1955 rows_out.push(row.map_err(storage_err)?);
1956 }
1957 rows_out
1958 };
1959 for (id, json) in edge_rows {
1960 let Ok(edge) = serde_json::from_str::<GraphEdge>(&json) else {
1961 continue;
1962 };
1963 conn.execute(
1964 "UPDATE graph_edges
1965 SET from_id = ?1,
1966 to_id = ?2,
1967 edge_type = ?3,
1968 confidence = ?4,
1969 source_type = ?5,
1970 source_file = ?6,
1971 evidence_available = ?7,
1972 freshness = ?8
1973 WHERE id = ?9",
1974 params![
1975 edge.from.0.as_str(),
1976 edge.to.0.as_str(),
1977 format!("{:?}", edge.edge_type),
1978 format!("{:?}", edge.evidence.confidence),
1979 format!("{:?}", edge.evidence.source_type),
1980 edge.evidence.source.as_str(),
1981 true,
1982 edge.evidence.indexed_at.timestamp(),
1983 id,
1984 ],
1985 )
1986 .map_err(storage_err)?;
1987 }
1988
1989 Ok(())
1990}
1991
1992fn migrate_history_schema(conn: &mut Connection) -> Result<()> {
1993 ensure_supported_sqlite_schema(conn)?;
1994 let version: i64 = conn
1995 .pragma_query_value(None, "user_version", |row| row.get(0))
1996 .map_err(storage_err)?;
1997 let tx = conn.transaction().map_err(storage_err)?;
1998 tx.execute_batch(HISTORY_SCHEMA_V1).map_err(storage_err)?;
1999 if version < SQLITE_HISTORY_SCHEMA_VERSION {
2000 tx.pragma_update(None, "user_version", SQLITE_HISTORY_SCHEMA_VERSION)
2001 .map_err(storage_err)?;
2002 }
2003 tx.commit().map_err(storage_err)?;
2004 Ok(())
2005}
2006
2007fn ensure_supported_sqlite_schema(conn: &Connection) -> Result<()> {
2008 let version: i64 = conn
2009 .pragma_query_value(None, "user_version", |row| row.get(0))
2010 .map_err(storage_err)?;
2011 if version > SQLITE_SUPPORTED_SCHEMA_VERSION {
2012 return Err(OkError::Storage(format!(
2013 "sqlite schema version {version} is newer than supported version {SQLITE_SUPPORTED_SCHEMA_VERSION}"
2014 )));
2015 }
2016 Ok(())
2017}
2018
2019fn validate_history_snapshot(snapshot: &HistorySnapshot) -> Result<()> {
2020 if snapshot.schema_version != HISTORY_SCHEMA_VERSION {
2021 return Err(OkError::Storage(format!(
2022 "unsupported history snapshot schema version {}; expected {}",
2023 snapshot.schema_version, HISTORY_SCHEMA_VERSION
2024 )));
2025 }
2026
2027 let mut commit_ids = BTreeSet::new();
2028 for commit in &snapshot.commits {
2029 validate_text("commit id", &commit.id.0)?;
2030 if !commit_ids.insert(commit.id.0.clone()) {
2031 return Err(OkError::Storage(format!(
2032 "duplicate history commit id `{}`",
2033 commit.id
2034 )));
2035 }
2036 validate_text("commit author name", &commit.author.name)?;
2037 if let Some(committer) = &commit.committer {
2038 validate_text("commit committer name", &committer.name)?;
2039 }
2040 let mut parent_ids = BTreeSet::new();
2041 for parent_id in &commit.parent_ids {
2042 validate_text("parent commit id", &parent_id.0)?;
2043 if !parent_ids.insert(parent_id.0.as_str()) {
2044 return Err(OkError::Storage(format!(
2045 "commit `{}` contains duplicate parent `{parent_id}`",
2046 commit.id
2047 )));
2048 }
2049 }
2050 }
2051
2052 let mut file_touch_ids = BTreeSet::new();
2053 for touch in &snapshot.file_touches {
2054 validate_history_record_id(&touch.id, "file touch", &mut file_touch_ids)?;
2055 validate_commit_reference(&touch.commit_id, &commit_ids, "file touch")?;
2056 history_path(&touch.path)?;
2057 if let Some(previous_path) = &touch.previous_path {
2058 history_path(previous_path)?;
2059 }
2060 }
2061
2062 let mut symbol_touch_ids = BTreeSet::new();
2063 for touch in &snapshot.symbol_touches {
2064 validate_history_record_id(&touch.id, "symbol touch", &mut symbol_touch_ids)?;
2065 validate_commit_reference(&touch.commit_id, &commit_ids, "symbol touch")?;
2066 validate_text("symbol qualified name", &touch.qualified_name)?;
2067 history_path(&touch.file_path)?;
2068 }
2069
2070 let mut cochange_ids = BTreeSet::new();
2071 let mut cochange_pairs = BTreeSet::new();
2072 for edge in &snapshot.cochange_edges {
2073 validate_history_record_id(&edge.id, "co-change edge", &mut cochange_ids)?;
2074 let path = history_path(&edge.path)?;
2075 let cochanged_path = history_path(&edge.cochanged_path)?;
2076 if path == cochanged_path {
2077 return Err(OkError::Storage(format!(
2078 "co-change edge `{}` must connect two different paths",
2079 edge.id
2080 )));
2081 }
2082 if !cochange_pairs.insert((path.clone(), cochanged_path.clone())) {
2083 return Err(OkError::Storage(format!(
2084 "duplicate co-change edge `{path}` -> `{cochanged_path}`"
2085 )));
2086 }
2087 if edge.commit_count == 0 {
2088 return Err(OkError::Storage(format!(
2089 "co-change edge `{}` must have a positive commit count",
2090 edge.id
2091 )));
2092 }
2093 if !edge.recency_weight.is_finite() || edge.recency_weight < 0.0 {
2094 return Err(OkError::Storage(format!(
2095 "co-change edge `{}` has invalid recency weight {}",
2096 edge.id, edge.recency_weight
2097 )));
2098 }
2099 let mut sample_commits = BTreeSet::new();
2100 for commit_id in &edge.sample_commits {
2101 validate_text("sample commit id", &commit_id.0)?;
2102 if !sample_commits.insert(commit_id.0.as_str()) {
2103 return Err(OkError::Storage(format!(
2104 "co-change edge `{}` contains duplicate sample commit `{commit_id}`",
2105 edge.id
2106 )));
2107 }
2108 }
2109 }
2110
2111 let mut reviewer_ids = BTreeSet::new();
2112 for evidence in &snapshot.reviewer_evidence {
2113 validate_history_record_id(&evidence.id, "review event", &mut reviewer_ids)?;
2114 validate_text("reviewer name", &evidence.reviewer.name)?;
2115 validate_text("review evidence source", &evidence.source)?;
2116 if let Some(commit_id) = &evidence.commit_id {
2117 validate_text("review commit id", &commit_id.0)?;
2118 }
2119 if let Some(path) = &evidence.path {
2120 history_path(path)?;
2121 }
2122 }
2123
2124 Ok(())
2125}
2126
2127fn validate_history_record_id(
2128 id: &HistoryRecordId,
2129 kind: &str,
2130 ids: &mut BTreeSet<String>,
2131) -> Result<()> {
2132 validate_text(&format!("{kind} id"), &id.0)?;
2133 if !ids.insert(id.0.clone()) {
2134 return Err(OkError::Storage(format!("duplicate {kind} id `{id}`")));
2135 }
2136 Ok(())
2137}
2138
2139fn validate_commit_reference(
2140 commit_id: &GitCommitId,
2141 commit_ids: &BTreeSet<String>,
2142 kind: &str,
2143) -> Result<()> {
2144 validate_text("commit id", &commit_id.0)?;
2145 if !commit_ids.contains(&commit_id.0) {
2146 return Err(OkError::Storage(format!(
2147 "{kind} references missing commit `{commit_id}`"
2148 )));
2149 }
2150 Ok(())
2151}
2152
2153fn validate_text(field: &str, value: &str) -> Result<()> {
2154 if value.trim().is_empty() {
2155 return Err(OkError::Storage(format!("{field} must not be empty")));
2156 }
2157 Ok(())
2158}
2159
2160fn history_path(path: &Path) -> Result<String> {
2161 if path.as_os_str().is_empty()
2162 || path.is_absolute()
2163 || path
2164 .components()
2165 .any(|component| !matches!(component, std::path::Component::Normal(_)))
2166 {
2167 return Err(OkError::Storage(format!(
2168 "history path must be a normalized repository-relative path: {}",
2169 path.display()
2170 )));
2171 }
2172 let value = path.to_str().ok_or_else(|| {
2173 OkError::Storage(format!(
2174 "history path must be valid UTF-8: {}",
2175 path.display()
2176 ))
2177 })?;
2178 if value.contains('\\') {
2179 return Err(OkError::Storage(format!(
2180 "history path must use `/` separators: {}",
2181 path.display()
2182 )));
2183 }
2184 Ok(value.to_string())
2185}
2186
2187fn usize_to_i64(value: usize, field: &str) -> Result<i64> {
2188 i64::try_from(value)
2189 .map_err(|_| OkError::Storage(format!("{field} exceeds SQLite integer range")))
2190}
2191
2192fn history_query_limit(limit: usize) -> i64 {
2193 limit.saturating_add(1).min(i64::MAX as usize) as i64
2194}
2195
2196fn collect_limited_json<T, F>(
2197 rows: rusqlite::MappedRows<'_, F>,
2198 limit: usize,
2199) -> Result<(Vec<T>, bool)>
2200where
2201 F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
2202 T: serde::de::DeserializeOwned,
2203{
2204 let mut values = collect_json(rows)?;
2205 let truncated = values.len() > limit;
2206 values.truncate(limit);
2207 Ok((values, truncated))
2208}
2209
2210fn collect_json<T, F>(rows: rusqlite::MappedRows<'_, F>) -> Result<Vec<T>>
2211where
2212 F: FnMut(&rusqlite::Row<'_>) -> rusqlite::Result<String>,
2213 T: serde::de::DeserializeOwned,
2214{
2215 let mut out = Vec::new();
2216 for row in rows {
2217 let raw = row.map_err(storage_err)?;
2218 out.push(serde_json::from_str(&raw)?);
2219 }
2220 Ok(out)
2221}
2222
2223fn graph_node_by_id(conn: &Connection, id: &str) -> Result<Option<GraphNode>> {
2224 let raw: Option<String> = conn
2225 .query_row(
2226 "SELECT json FROM graph_nodes WHERE id = ?1",
2227 params![id],
2228 |row| row.get(0),
2229 )
2230 .optional()
2231 .map_err(storage_err)?;
2232 raw.map(|json| serde_json::from_str(&json).map_err(Into::into))
2233 .transpose()
2234}
2235
2236fn storage_err(err: rusqlite::Error) -> OkError {
2237 OkError::Storage(err.to_string())
2238}
2239
2240fn occurrence_id(file_id: &str, value: &str, line: Option<u32>, flag: bool) -> String {
2241 use sha2::{Digest, Sha256};
2242 let mut hasher = Sha256::new();
2243 hasher.update(file_id.as_bytes());
2244 hasher.update(b":");
2245 hasher.update(value.as_bytes());
2246 hasher.update(b":");
2247 hasher.update(line.unwrap_or_default().to_string().as_bytes());
2248 hasher.update(b":");
2249 hasher.update(if flag { b"1" } else { b"0" });
2250 format!("{:x}", hasher.finalize())
2251}
2252
2253fn source_type_name(source_type: &EvidenceSourceType) -> &'static str {
2254 match source_type {
2255 EvidenceSourceType::TreeSitter => "tree_sitter",
2256 EvidenceSourceType::Scip => "scip",
2257 EvidenceSourceType::Lsp => "lsp",
2258 EvidenceSourceType::Regex => "regex",
2259 EvidenceSourceType::Lexical => "lexical",
2260 EvidenceSourceType::Semantic => "semantic",
2261 EvidenceSourceType::Runtime => "runtime",
2262 EvidenceSourceType::GitHistory => "git_history",
2263 EvidenceSourceType::StaticAnalysis => "static_analysis",
2264 EvidenceSourceType::ExternalIntegration => "external_integration",
2265 EvidenceSourceType::Heuristic => "heuristic",
2266 }
2267}
2268
2269#[cfg(test)]
2270mod tests {
2271 use super::{SqliteStore, SQLITE_GRAPH_SCHEMA_VERSION};
2272 use chrono::{TimeZone, Utc};
2273 use open_kioku_core::{
2274 AnalysisFact, CodeChunk, Confidence, EdgeId, Evidence, EvidenceId, EvidenceSourceType,
2275 File, FileId, GitChangeKind, GitCochangeEdge, GitCommitId, GitCommitRecord, GitFileTouch,
2276 GitSymbolTouch, GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, HistoryRecordId,
2277 HistorySnapshot, IndexManifest, IndexQuality, Language, LineRange, NodeId, Owner,
2278 Repository, RepositoryId, ReviewerEvidence, ReviewerRole, Symbol, SymbolId, SymbolKind,
2279 SymbolOccurrence, HISTORY_SCHEMA_VERSION,
2280 };
2281 use open_kioku_storage::{
2282 GraphStore, HistoryStore, IndexData, MetadataStore, PartialIndexUpdate,
2283 };
2284 use rusqlite::{params, Connection};
2285 use std::collections::BTreeMap;
2286
2287 fn make_store() -> SqliteStore {
2288 SqliteStore::open(":memory:").expect("in-memory store")
2289 }
2290
2291 fn make_file(id: &str, path: &str) -> File {
2292 File {
2293 id: FileId::new(id),
2294 repository_id: RepositoryId::new("repo"),
2295 path: path.into(),
2296 language: Language::Rust,
2297 size_bytes: 100,
2298 content_hash: format!("hash-{id}"),
2299 is_generated: false,
2300 is_vendor: false,
2301 }
2302 }
2303
2304 fn make_symbol(id: &str, name: &str, file_id: &str) -> Symbol {
2305 Symbol {
2306 id: SymbolId::new(id),
2307 name: name.into(),
2308 qualified_name: format!("module::{name}"),
2309 kind: SymbolKind::Function,
2310 file_id: FileId::new(file_id),
2311 range: Some(LineRange::single(1)),
2312 language: Language::Rust,
2313 confidence: Confidence::High,
2314 provenance: EvidenceSourceType::TreeSitter,
2315 }
2316 }
2317
2318 fn evidence() -> Evidence {
2319 Evidence {
2320 id: EvidenceId::new("ev-1"),
2321 source: "test".into(),
2322 source_type: EvidenceSourceType::Lexical,
2323 file_range: None,
2324 symbol_id: None,
2325 confidence: Confidence::Medium,
2326 message: "test evidence".into(),
2327 indexed_at: Utc::now(),
2328 ..Default::default()
2329 }
2330 }
2331
2332 fn make_manifest() -> IndexManifest {
2333 IndexManifest {
2334 repository: Repository {
2335 id: RepositoryId::new("repo"),
2336 name: "repo".into(),
2337 root: std::path::PathBuf::from("."),
2338 branch: None,
2339 commit: None,
2340 indexed_at: None,
2341 },
2342 file_count: 2,
2343 symbol_count: 2,
2344 chunk_count: 0,
2345 indexed_at: Utc::now(),
2346 schema_version: 1,
2347 index_mode: Default::default(),
2348 phase_reports: Vec::new(),
2349 quality: IndexQuality::default(),
2350 }
2351 }
2352
2353 fn history_snapshot() -> HistorySnapshot {
2354 let older_at = Utc.with_ymd_and_hms(2026, 5, 1, 12, 0, 0).unwrap();
2355 let newer_at = Utc.with_ymd_and_hms(2026, 6, 1, 12, 0, 0).unwrap();
2356 let older_id = GitCommitId::new("older");
2357 let newer_id = GitCommitId::new("newer");
2358 HistorySnapshot {
2359 schema_version: HISTORY_SCHEMA_VERSION,
2360 commits: vec![
2361 GitCommitRecord {
2362 id: older_id.clone(),
2363 parent_ids: Vec::new(),
2364 author: Owner {
2365 name: "Older Author".into(),
2366 email: Some("older@example.com".into()),
2367 },
2368 committer: None,
2369 authored_at: older_at,
2370 committed_at: older_at,
2371 summary: "Introduce library".into(),
2372 message: "Introduce library".into(),
2373 file_count: 2,
2374 },
2375 GitCommitRecord {
2376 id: newer_id.clone(),
2377 parent_ids: vec![older_id.clone()],
2378 author: Owner {
2379 name: "Newer Author".into(),
2380 email: Some("newer@example.com".into()),
2381 },
2382 committer: None,
2383 authored_at: newer_at,
2384 committed_at: newer_at,
2385 summary: "Refine library".into(),
2386 message: "Refine library and tests".into(),
2387 file_count: 3,
2388 },
2389 ],
2390 file_touches: vec![
2391 GitFileTouch {
2392 id: HistoryRecordId::new("file-touch-older"),
2393 commit_id: older_id.clone(),
2394 path: "src/lib.rs".into(),
2395 previous_path: None,
2396 change_kind: GitChangeKind::Added,
2397 additions: Some(20),
2398 deletions: Some(0),
2399 touched_at: older_at,
2400 },
2401 GitFileTouch {
2402 id: HistoryRecordId::new("file-touch-newer"),
2403 commit_id: newer_id.clone(),
2404 path: "src/lib.rs".into(),
2405 previous_path: None,
2406 change_kind: GitChangeKind::Modified,
2407 additions: Some(5),
2408 deletions: Some(2),
2409 touched_at: newer_at,
2410 },
2411 ],
2412 symbol_touches: vec![GitSymbolTouch {
2413 id: HistoryRecordId::new("symbol-touch-newer"),
2414 commit_id: newer_id.clone(),
2415 symbol_id: Some(SymbolId::new("symbol-1")),
2416 qualified_name: "crate::history_for_file".into(),
2417 file_path: "src/lib.rs".into(),
2418 change_kind: GitChangeKind::Modified,
2419 line_ranges: vec![LineRange { start: 4, end: 8 }],
2420 confidence: Confidence::Medium,
2421 uncertainty: vec!["historical coordinates may have shifted".into()],
2422 touched_at: newer_at,
2423 }],
2424 cochange_edges: vec![
2425 GitCochangeEdge {
2426 id: HistoryRecordId::new("cochange-test"),
2427 path: "src/lib.rs".into(),
2428 cochanged_path: "tests/lib_test.rs".into(),
2429 commit_count: 2,
2430 recency_weight: 1.8,
2431 last_changed_at: Some(newer_at),
2432 sample_commits: vec![newer_id.clone(), older_id.clone()],
2433 test_corun: true,
2434 },
2435 GitCochangeEdge {
2436 id: HistoryRecordId::new("cochange-docs"),
2437 path: "src/lib.rs".into(),
2438 cochanged_path: "docs/library.md".into(),
2439 commit_count: 1,
2440 recency_weight: 0.5,
2441 last_changed_at: Some(older_at),
2442 sample_commits: vec![older_id],
2443 test_corun: false,
2444 },
2445 ],
2446 reviewer_evidence: vec![ReviewerEvidence {
2447 id: HistoryRecordId::new("review-newer"),
2448 commit_id: Some(newer_id),
2449 path: None,
2450 reviewer: Owner {
2451 name: "Reviewer".into(),
2452 email: Some("reviewer@example.com".into()),
2453 },
2454 role: ReviewerRole::Reviewer,
2455 observed_at: newer_at,
2456 source: "git-trailer:reviewed-by".into(),
2457 confidence: Confidence::High,
2458 }],
2459 }
2460 }
2461
2462 #[test]
2463 fn history_migration_upgrades_legacy_database_idempotently() {
2464 let dir = tempfile::tempdir().unwrap();
2465 let path = dir.path().join("index.sqlite");
2466 let legacy = Connection::open(&path).unwrap();
2467 legacy
2468 .execute_batch(
2469 r#"
2470 PRAGMA user_version = 0;
2471 CREATE TABLE analysis_facts (
2472 id TEXT PRIMARY KEY,
2473 file_id TEXT NOT NULL,
2474 source_type TEXT NOT NULL,
2475 target TEXT NOT NULL,
2476 json TEXT NOT NULL
2477 );
2478 INSERT INTO analysis_facts(id, file_id, source_type, target, json)
2479 VALUES('legacy-git', 'f1', 'git_history', 'tests/lib_test.rs', '{}');
2480 "#,
2481 )
2482 .unwrap();
2483 drop(legacy);
2484
2485 let store = SqliteStore::open(&path).unwrap();
2486 store.initialize().unwrap();
2487
2488 let conn = store.connection.lock().unwrap();
2489 let version: i64 = conn
2490 .pragma_query_value(None, "user_version", |row| row.get(0))
2491 .unwrap();
2492 assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
2493 let history_table_count: i64 = conn
2494 .query_row(
2495 "SELECT COUNT(*) FROM sqlite_master
2496 WHERE type = 'table'
2497 AND name IN (
2498 'git_commits',
2499 'git_file_touches',
2500 'git_symbol_touches',
2501 'git_cochange_edges',
2502 'git_review_events'
2503 )",
2504 [],
2505 |row| row.get(0),
2506 )
2507 .unwrap();
2508 assert_eq!(history_table_count, 5);
2509 let legacy_fact_count: i64 = conn
2510 .query_row("SELECT COUNT(*) FROM analysis_facts", [], |row| row.get(0))
2511 .unwrap();
2512 assert_eq!(legacy_fact_count, 1);
2513 }
2514
2515 #[test]
2516 fn newer_sqlite_schema_is_rejected_without_mutation() {
2517 let dir = tempfile::tempdir().unwrap();
2518 let path = dir.path().join("future.sqlite");
2519 let future = Connection::open(&path).unwrap();
2520 future
2521 .execute_batch(
2522 r#"
2523 PRAGMA user_version = 3;
2524 CREATE TABLE future_history_marker (id INTEGER PRIMARY KEY);
2525 "#,
2526 )
2527 .unwrap();
2528 drop(future);
2529
2530 let error = match SqliteStore::open(&path) {
2531 Ok(_) => panic!("newer schema should be rejected"),
2532 Err(error) => error.to_string(),
2533 };
2534 assert!(error.contains("newer than supported version 2"));
2535
2536 let conn = Connection::open(&path).unwrap();
2537 let current_table_count: i64 = conn
2538 .query_row(
2539 "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'manifests'",
2540 [],
2541 |row| row.get(0),
2542 )
2543 .unwrap();
2544 assert_eq!(current_table_count, 0);
2545 let future_marker_count: i64 = conn
2546 .query_row(
2547 "SELECT COUNT(*) FROM sqlite_master WHERE type = 'table' AND name = 'future_history_marker'",
2548 [],
2549 |row| row.get(0),
2550 )
2551 .unwrap();
2552 assert_eq!(future_marker_count, 1);
2553 }
2554
2555 #[test]
2556 fn history_snapshot_queries_return_typed_evidence() {
2557 let store = make_store();
2558 store.put_history_snapshot(&history_snapshot()).unwrap();
2559
2560 let recent = store.recent_commits(10).unwrap();
2561 assert_eq!(recent.len(), 2);
2562 assert_eq!(recent[0].id.0, "newer");
2563
2564 let neighbors = store
2565 .cochange_neighbors(std::path::Path::new("src/lib.rs"), 10)
2566 .unwrap();
2567 assert_eq!(neighbors.len(), 2);
2568 assert_eq!(
2569 neighbors[0].cochanged_path,
2570 std::path::Path::new("tests/lib_test.rs")
2571 );
2572
2573 let summary = store
2574 .history_for_file(std::path::Path::new("src/lib.rs"), 10)
2575 .unwrap();
2576 assert_eq!(summary.recent_commits.len(), 2);
2577 assert_eq!(summary.file_touches.len(), 2);
2578 assert_eq!(summary.symbol_touches.len(), 1);
2579 assert_eq!(summary.cochange_neighbors.len(), 2);
2580 assert_eq!(summary.reviewer_evidence.len(), 1);
2581 assert!(!summary.truncated);
2582 assert!(summary.uncertainty.is_empty());
2583
2584 let truncated = store
2585 .history_for_file(std::path::Path::new("src/lib.rs"), 1)
2586 .unwrap();
2587 assert!(truncated.truncated);
2588 assert!(truncated
2589 .uncertainty
2590 .iter()
2591 .any(|note| note.contains("truncated")));
2592 }
2593
2594 #[test]
2595 fn provenance_queries_return_first_last_and_explicit_symbol_uncertainty() {
2596 let store = make_store();
2597 let file = make_file("file-1", "src/lib.rs");
2598 let symbol = make_symbol("symbol-1", "history_for_file", "file-1");
2599 let mut unmapped_symbol = make_symbol("symbol-2", "unmapped", "file-1");
2600 unmapped_symbol.range = None;
2601 let manifest = make_manifest();
2602 store
2603 .replace_index(IndexData {
2604 manifest: &manifest,
2605 files: std::slice::from_ref(&file),
2606 symbols: &[symbol.clone(), unmapped_symbol.clone()],
2607 chunks: &[],
2608 tests: &[],
2609 imports: &[],
2610 occurrences: &[],
2611 analysis_facts: &[],
2612 })
2613 .unwrap();
2614 store.put_history_snapshot(&history_snapshot()).unwrap();
2615
2616 let file_provenance = store
2617 .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
2618 .unwrap();
2619 assert_eq!(
2620 file_provenance
2621 .first_seen
2622 .as_ref()
2623 .map(|touch| touch.commit.id.0.as_str()),
2624 Some("older")
2625 );
2626 assert_eq!(
2627 file_provenance
2628 .last_touched
2629 .as_ref()
2630 .map(|touch| touch.commit.id.0.as_str()),
2631 Some("newer")
2632 );
2633 assert_eq!(file_provenance.recent_touches.len(), 2);
2634 assert_eq!(file_provenance.confidence, Confidence::Exact);
2635
2636 let symbol_provenance = store.provenance_for_symbol(&symbol.id, 10).unwrap();
2637 assert_eq!(symbol_provenance.recent_touches.len(), 1);
2638 assert_eq!(symbol_provenance.confidence, Confidence::Medium);
2639 assert_eq!(
2640 symbol_provenance.recent_touches[0].commit.author.name,
2641 "Newer Author"
2642 );
2643 assert_eq!(
2644 symbol_provenance.recent_touches[0].line_ranges,
2645 vec![LineRange { start: 4, end: 8 }]
2646 );
2647 assert!(symbol_provenance
2648 .uncertainty
2649 .iter()
2650 .any(|note| note.contains("earliest line-mapped touch")));
2651
2652 let unmapped = store
2653 .provenance_for_symbol(&unmapped_symbol.id, 10)
2654 .unwrap();
2655 assert!(unmapped.first_seen.is_none());
2656 assert!(unmapped.last_touched.is_none());
2657 assert!(unmapped.recent_touches.is_empty());
2658 assert_eq!(unmapped.confidence, Confidence::Low);
2659 assert!(unmapped
2660 .uncertainty
2661 .iter()
2662 .any(|note| note.contains("no persisted line-level commit mapping")));
2663 assert!(unmapped
2664 .uncertainty
2665 .iter()
2666 .any(|note| note.contains("has no line range")));
2667 }
2668
2669 #[test]
2670 fn path_provenance_follows_rename_aliases_in_both_directions() {
2671 let store = make_store();
2672 let mut snapshot = history_snapshot();
2673 snapshot.file_touches[0].path = "src/old.rs".into();
2674 snapshot.file_touches[1].previous_path = Some("src/old.rs".into());
2675 snapshot.file_touches[1].change_kind = GitChangeKind::Renamed;
2676 store.put_history_snapshot(&snapshot).unwrap();
2677
2678 let current = store
2679 .provenance_for_path(std::path::Path::new("src/lib.rs"), 10)
2680 .unwrap();
2681 let historical = store
2682 .provenance_for_path(std::path::Path::new("src/old.rs"), 10)
2683 .unwrap();
2684
2685 assert_eq!(current.recent_touches.len(), 2);
2686 assert_eq!(historical.recent_touches.len(), 2);
2687 assert_eq!(
2688 current
2689 .first_seen
2690 .as_ref()
2691 .map(|touch| touch.path.as_path()),
2692 Some(std::path::Path::new("src/old.rs"))
2693 );
2694 }
2695
2696 #[test]
2697 fn invalid_snapshot_does_not_replace_existing_history() {
2698 let store = make_store();
2699 let snapshot = history_snapshot();
2700 store.put_history_snapshot(&snapshot).unwrap();
2701
2702 let mut invalid = snapshot;
2703 invalid.file_touches[0].commit_id = GitCommitId::new("missing");
2704 let error = store
2705 .put_history_snapshot(&invalid)
2706 .unwrap_err()
2707 .to_string();
2708 assert!(error.contains("references missing commit `missing`"));
2709
2710 let recent = store.recent_commits(10).unwrap();
2711 assert_eq!(recent.len(), 2);
2712 assert_eq!(recent[0].id.0, "newer");
2713
2714 store
2715 .put_history_snapshot(&HistorySnapshot::empty())
2716 .unwrap();
2717 assert!(store.recent_commits(10).unwrap().is_empty());
2718 }
2719
2720 #[test]
2721 fn replace_index_and_list_files() {
2722 let store = make_store();
2723 let file1 = make_file("f1", "src/main.rs");
2724 let file2 = make_file("f2", "src/lib.rs");
2725 let sym1 = make_symbol("s1", "main_fn", "f1");
2726
2727 let manifest = make_manifest();
2728 let files = vec![file1.clone(), file2.clone()];
2729 let symbols = vec![sym1.clone()];
2730
2731 let data = IndexData {
2732 manifest: &manifest,
2733 files: &files,
2734 symbols: &symbols,
2735 occurrences: &[],
2736 chunks: &[],
2737 imports: &[],
2738 tests: &[],
2739 analysis_facts: &[],
2740 };
2741 store.replace_index(data).unwrap();
2742
2743 let files_list = store.list_files(100, 0).unwrap();
2744 assert_eq!(files_list.len(), 2);
2745
2746 let by_path = store
2747 .get_file_by_path(&std::path::PathBuf::from("src/main.rs"))
2748 .unwrap();
2749 assert!(by_path.is_some());
2750 assert_eq!(by_path.unwrap().id, file1.id);
2751 }
2752
2753 #[test]
2754 fn partial_replace_updates_changed_files_and_cleans_deleted_graph_edges() {
2755 let store = make_store();
2756 let manifest = make_manifest();
2757 let file1 = make_file("f1", "src/main.rs");
2758 let file2 = make_file("f2", "src/lib.rs");
2759 let sym1 = make_symbol("s1", "main_fn", "f1");
2760 let sym2 = make_symbol("s2", "lib_fn", "f2");
2761 let old_chunk = CodeChunk {
2762 id: "c1".into(),
2763 file_id: file1.id.clone(),
2764 range: LineRange { start: 1, end: 1 },
2765 language: Language::Rust,
2766 text: "fn main_fn() {}".into(),
2767 symbol_id: Some(sym1.id.clone()),
2768 };
2769 store
2770 .replace_index(IndexData {
2771 manifest: &manifest,
2772 files: &[file1.clone(), file2.clone()],
2773 symbols: &[sym1.clone(), sym2.clone()],
2774 chunks: std::slice::from_ref(&old_chunk),
2775 tests: &[],
2776 imports: &[],
2777 occurrences: &[SymbolOccurrence {
2778 symbol_id: sym1.id.clone(),
2779 file_id: file1.id.clone(),
2780 range: Some(LineRange::single(1)),
2781 is_definition: true,
2782 confidence: Confidence::Exact,
2783 provenance: EvidenceSourceType::StaticAnalysis,
2784 }],
2785 analysis_facts: &[],
2786 })
2787 .unwrap();
2788 let node1 = GraphNode {
2789 id: NodeId::new("symbol:s1"),
2790 node_type: GraphNodeType::Function,
2791 label: "main_fn".into(),
2792 file_id: Some(file1.id.clone()),
2793 symbol_id: Some(sym1.id.clone()),
2794 ..Default::default()
2795 };
2796 let node2 = GraphNode {
2797 id: NodeId::new("symbol:s2"),
2798 node_type: GraphNodeType::Function,
2799 label: "lib_fn".into(),
2800 file_id: Some(file2.id.clone()),
2801 symbol_id: Some(sym2.id.clone()),
2802 ..Default::default()
2803 };
2804 let edge = GraphEdge {
2805 id: EdgeId::new("edge:s1-s2"),
2806 from: node1.id.clone(),
2807 to: node2.id.clone(),
2808 edge_type: GraphEdgeType::References,
2809 evidence: evidence(),
2810 ..Default::default()
2811 };
2812 let node3 = GraphNode {
2813 id: NodeId::new("external:a"),
2814 node_type: GraphNodeType::Module,
2815 label: "external a".into(),
2816 ..Default::default()
2817 };
2818 let node4 = GraphNode {
2819 id: NodeId::new("external:b"),
2820 node_type: GraphNodeType::Module,
2821 label: "external b".into(),
2822 ..Default::default()
2823 };
2824 let mut source_evidence = evidence();
2825 source_evidence.source = "src/main.rs".into();
2826 let source_edge = GraphEdge {
2827 id: EdgeId::new("edge:source-file"),
2828 from: node3.id.clone(),
2829 to: node4.id.clone(),
2830 edge_type: GraphEdgeType::RelatedToTicket,
2831 evidence: source_evidence,
2832 ..Default::default()
2833 };
2834 store
2835 .replace_graph(
2836 &[node1, node2.clone(), node3.clone(), node4.clone()],
2837 &[edge.clone(), source_edge],
2838 )
2839 .unwrap();
2840
2841 let mut updated_file2 = file2.clone();
2842 updated_file2.content_hash = "new-hash".into();
2843 let updated_sym2 = make_symbol("s2b", "lib_fn_new", "f2");
2844 let updated_chunk = CodeChunk {
2845 id: "c2".into(),
2846 file_id: updated_file2.id.clone(),
2847 range: LineRange { start: 2, end: 2 },
2848 language: Language::Rust,
2849 text: "fn lib_fn_new() {}".into(),
2850 symbol_id: Some(updated_sym2.id.clone()),
2851 };
2852 let updated_node2 = GraphNode {
2853 id: NodeId::new("symbol:s2b"),
2854 node_type: GraphNodeType::Function,
2855 label: "lib_fn_new".into(),
2856 file_id: Some(updated_file2.id.clone()),
2857 symbol_id: Some(updated_sym2.id.clone()),
2858 ..Default::default()
2859 };
2860 store
2861 .replace_files_index(PartialIndexUpdate {
2862 manifest: &manifest,
2863 changed_files: std::slice::from_ref(&updated_file2),
2864 deleted_file_ids: std::slice::from_ref(&file1.id),
2865 symbols: std::slice::from_ref(&updated_sym2),
2866 chunks: std::slice::from_ref(&updated_chunk),
2867 tests: &[],
2868 imports: &[],
2869 occurrences: &[],
2870 analysis_facts: &[],
2871 graph_nodes: std::slice::from_ref(&updated_node2),
2872 graph_edges: &[],
2873 })
2874 .unwrap();
2875
2876 assert!(store
2877 .get_file_by_path(std::path::Path::new("src/main.rs"))
2878 .unwrap()
2879 .is_none());
2880 assert_eq!(
2881 store
2882 .get_file_by_path(std::path::Path::new("src/lib.rs"))
2883 .unwrap()
2884 .unwrap()
2885 .content_hash,
2886 "new-hash"
2887 );
2888 assert!(store.symbol_by_id(&sym1.id).unwrap().is_none());
2889 assert!(store.symbol_by_id(&updated_sym2.id).unwrap().is_some());
2890 assert!(store.chunks_for_file(&file1.id).unwrap().is_empty());
2891 assert_eq!(store.chunks_for_file(&file2.id).unwrap()[0].id, "c2");
2892 let edge_count: i64 = store
2893 .connection
2894 .lock()
2895 .unwrap()
2896 .query_row("SELECT COUNT(*) FROM graph_edges", [], |row| row.get(0))
2897 .unwrap();
2898 assert_eq!(edge_count, 0);
2899 assert!(store.node_by_id("symbol:s1").unwrap().is_none());
2900 assert!(store.node_by_id("symbol:s2b").unwrap().is_some());
2901 }
2902
2903 #[test]
2904 fn partial_replace_rolls_back_on_insert_failure() {
2905 let store = make_store();
2906 let manifest = make_manifest();
2907 let file = make_file("f1", "src/lib.rs");
2908 store
2909 .replace_index(IndexData {
2910 manifest: &manifest,
2911 files: std::slice::from_ref(&file),
2912 symbols: &[],
2913 chunks: &[],
2914 tests: &[],
2915 imports: &[],
2916 occurrences: &[],
2917 analysis_facts: &[],
2918 })
2919 .unwrap();
2920
2921 let duplicate_a = make_file("f2", "src/dup.rs");
2922 let mut duplicate_b = make_file("f3", "src/dup.rs");
2923 duplicate_b.content_hash = "other".into();
2924 let error = store
2925 .replace_files_index(PartialIndexUpdate {
2926 manifest: &manifest,
2927 changed_files: &[duplicate_a, duplicate_b],
2928 deleted_file_ids: std::slice::from_ref(&file.id),
2929 symbols: &[],
2930 chunks: &[],
2931 tests: &[],
2932 imports: &[],
2933 occurrences: &[],
2934 analysis_facts: &[],
2935 graph_nodes: &[],
2936 graph_edges: &[],
2937 })
2938 .unwrap_err()
2939 .to_string();
2940 assert!(error.contains("UNIQUE") || error.contains("constraint"));
2941 assert!(store
2942 .get_file_by_path(std::path::Path::new("src/lib.rs"))
2943 .unwrap()
2944 .is_some());
2945 assert!(store
2946 .get_file_by_path(std::path::Path::new("src/dup.rs"))
2947 .unwrap()
2948 .is_none());
2949 }
2950
2951 #[test]
2952 fn replace_index_persists_analysis_facts() {
2953 let store = make_store();
2954 let file = make_file("f1", "src/handler.rs");
2955 let manifest = make_manifest();
2956 let runtime_fact = AnalysisFact {
2957 id: "runtime-1".into(),
2958 file_id: file.id.clone(),
2959 symbol_id: None,
2960 target: "GET /api/orders".into(),
2961 target_kind: GraphNodeType::Endpoint,
2962 edge_type: GraphEdgeType::ExposesEndpoint,
2963 range: Some(LineRange::single(12)),
2964 confidence: Confidence::High,
2965 source: "open-kioku-runtime:.ok/runtime/spans.jsonl".into(),
2966 source_type: EvidenceSourceType::Runtime,
2967 message: "runtime endpoint observed in local trace artifact".into(),
2968 };
2969 let static_fact = AnalysisFact {
2970 id: "static-1".into(),
2971 file_id: file.id.clone(),
2972 symbol_id: None,
2973 target: "orders".into(),
2974 target_kind: GraphNodeType::DatabaseTable,
2975 edge_type: GraphEdgeType::ReadsTable,
2976 range: None,
2977 confidence: Confidence::Medium,
2978 source: "open-kioku-static".into(),
2979 source_type: EvidenceSourceType::StaticAnalysis,
2980 message: "static fact".into(),
2981 };
2982 let git_fact = AnalysisFact {
2983 id: "git-1".into(),
2984 file_id: file.id.clone(),
2985 symbol_id: None,
2986 target: "tests/handler_test.rs".into(),
2987 target_kind: GraphNodeType::Test,
2988 edge_type: GraphEdgeType::ChangedBy,
2989 range: None,
2990 confidence: Confidence::High,
2991 source: "git-history:abc123".into(),
2992 source_type: EvidenceSourceType::GitHistory,
2993 message: "git co-change observed in 1 commit(s), recency weight 1.00".into(),
2994 };
2995
2996 store
2997 .replace_index(IndexData {
2998 manifest: &manifest,
2999 files: &[file],
3000 symbols: &[],
3001 occurrences: &[],
3002 chunks: &[],
3003 imports: &[],
3004 tests: &[],
3005 analysis_facts: &[runtime_fact.clone(), static_fact, git_fact.clone()],
3006 })
3007 .unwrap();
3008
3009 let runtime = store
3010 .analysis_facts(Some(EvidenceSourceType::Runtime), 10)
3011 .unwrap();
3012 assert_eq!(runtime.len(), 1);
3013 assert_eq!(runtime[0].id, runtime_fact.id);
3014 assert_eq!(runtime[0].target, runtime_fact.target);
3015 let git = store
3016 .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
3017 .unwrap();
3018 assert_eq!(git.len(), 1);
3019 assert_eq!(git[0].id, git_fact.id);
3020 assert_eq!(git[0].target, git_fact.target);
3021 let all = store.analysis_facts(None, 10).unwrap();
3022 assert_eq!(all.len(), 3);
3023 }
3024
3025 #[test]
3026 fn replace_index_preserves_typed_and_legacy_history() {
3027 let store = make_store();
3028 store.put_history_snapshot(&history_snapshot()).unwrap();
3029
3030 let file = make_file("f1", "src/lib.rs");
3031 let manifest = make_manifest();
3032 let git_fact = AnalysisFact {
3033 id: "legacy-git-1".into(),
3034 file_id: file.id.clone(),
3035 symbol_id: None,
3036 target: "tests/lib_test.rs".into(),
3037 target_kind: GraphNodeType::Test,
3038 edge_type: GraphEdgeType::ChangedBy,
3039 range: None,
3040 confidence: Confidence::High,
3041 source: "git-history:newer".into(),
3042 source_type: EvidenceSourceType::GitHistory,
3043 message: "legacy co-change compatibility fact".into(),
3044 };
3045
3046 for _ in 0..2 {
3047 store
3048 .replace_index(IndexData {
3049 manifest: &manifest,
3050 files: std::slice::from_ref(&file),
3051 symbols: &[],
3052 occurrences: &[],
3053 chunks: &[],
3054 imports: &[],
3055 tests: &[],
3056 analysis_facts: std::slice::from_ref(&git_fact),
3057 })
3058 .unwrap();
3059 }
3060
3061 assert_eq!(store.recent_commits(10).unwrap().len(), 2);
3062 let summary = store
3063 .history_for_file(std::path::Path::new("src/lib.rs"), 10)
3064 .unwrap();
3065 assert_eq!(summary.file_touches.len(), 2);
3066 let legacy = store
3067 .analysis_facts(Some(EvidenceSourceType::GitHistory), 10)
3068 .unwrap();
3069 assert_eq!(legacy.len(), 1);
3070 assert_eq!(legacy[0].id, git_fact.id);
3071 }
3072
3073 #[test]
3074 fn list_symbols_with_filter() {
3075 let store = make_store();
3076 let file = make_file("f1", "src/lib.rs");
3077 let sym_a = make_symbol("s1", "alpha_handler", "f1");
3078 let sym_b = make_symbol("s2", "beta_worker", "f1");
3079 let manifest = make_manifest();
3080 let files = vec![file];
3081 let symbols = vec![sym_a, sym_b];
3082 let data = IndexData {
3083 manifest: &manifest,
3084 files: &files,
3085 symbols: &symbols,
3086 occurrences: &[],
3087 chunks: &[],
3088 imports: &[],
3089 tests: &[],
3090 analysis_facts: &[],
3091 };
3092 store.replace_index(data).unwrap();
3093
3094 let all = store.list_symbols(None, 100, 0).unwrap();
3095 assert_eq!(all.len(), 2);
3096
3097 let filtered = store.list_symbols(Some("alpha"), 10, 0).unwrap();
3098 assert_eq!(filtered.len(), 1);
3099 assert_eq!(filtered[0].name, "alpha_handler");
3100 }
3101
3102 #[test]
3103 fn replace_graph_and_neighbors() {
3104 let store = make_store();
3105 let file = make_file("f1", "src/lib.rs");
3107 let manifest = make_manifest();
3108 let files = vec![file];
3109 let data = IndexData {
3110 manifest: &manifest,
3111 files: &files,
3112 symbols: &[],
3113 occurrences: &[],
3114 chunks: &[],
3115 imports: &[],
3116 tests: &[],
3117 analysis_facts: &[],
3118 };
3119 store.replace_index(data).unwrap();
3120
3121 let node_a = GraphNode {
3122 id: NodeId::new("file:src/lib.rs"),
3123 node_type: GraphNodeType::File,
3124 label: "src/lib.rs".into(),
3125 file_id: Some(FileId::new("f1")),
3126 symbol_id: None,
3127 ..Default::default()
3128 };
3129 let node_b = GraphNode {
3130 id: NodeId::new("symbol:s1"),
3131 node_type: GraphNodeType::Function,
3132 label: "worker".into(),
3133 file_id: Some(FileId::new("f1")),
3134 symbol_id: Some(SymbolId::new("s1")),
3135 ..Default::default()
3136 };
3137 let edge = GraphEdge {
3138 id: EdgeId::new("e1"),
3139 from: node_a.id.clone(),
3140 to: node_b.id.clone(),
3141 edge_type: GraphEdgeType::Defines,
3142 evidence: evidence(),
3143 ..Default::default()
3144 };
3145
3146 store
3147 .replace_graph(
3148 &[node_a.clone(), node_b.clone()],
3149 std::slice::from_ref(&edge),
3150 )
3151 .unwrap();
3152
3153 let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
3154 assert_eq!(edges.len(), 1);
3155 assert_eq!(edges[0].id.0, "e1");
3156 assert!(nodes.iter().any(|n| n.id == node_a.id));
3157 }
3158
3159 #[test]
3160 fn graph_facts_with_properties_and_confidence_metadata_round_trip() {
3161 let store = make_store();
3162 let file = make_file("f1", "src/lib.rs");
3163 let manifest = make_manifest();
3164 let files = vec![file];
3165 let data = IndexData {
3166 manifest: &manifest,
3167 files: &files,
3168 symbols: &[],
3169 occurrences: &[],
3170 chunks: &[],
3171 imports: &[],
3172 tests: &[],
3173 analysis_facts: &[],
3174 };
3175 store.replace_index(data).unwrap();
3176
3177 let node_a = GraphNode {
3178 id: NodeId::new("file:src/lib.rs"),
3179 node_type: GraphNodeType::File,
3180 label: "src/lib.rs".into(),
3181 file_id: Some(FileId::new("f1")),
3182 properties: BTreeMap::from([("package".into(), serde_json::json!("open-kioku"))]),
3183 schema_version: Some("graph-v1".into()),
3184 source_pass: Some("tree_sitter".into()),
3185 index_mode: Some("full".into()),
3186 extractor_version: Some("test-extractor".into()),
3187 ambiguity: vec!["generated file status unknown".into()],
3188 quality_notes: vec!["file path verified".into()],
3189 ..Default::default()
3190 };
3191 let node_b = GraphNode {
3192 id: NodeId::new("symbol:s1"),
3193 node_type: GraphNodeType::Function,
3194 label: "worker".into(),
3195 file_id: Some(FileId::new("f1")),
3196 symbol_id: Some(SymbolId::new("s1")),
3197 ..Default::default()
3198 };
3199 let mut edge_evidence = evidence();
3200 edge_evidence.confidence_score = Some(0.98);
3201 edge_evidence.confidence_reason = Some("exact symbol occurrence".into());
3202 edge_evidence.freshness = Some("fresh".into());
3203 let edge = GraphEdge {
3204 id: EdgeId::new("e1"),
3205 from: node_a.id.clone(),
3206 to: node_b.id.clone(),
3207 edge_type: GraphEdgeType::Defines,
3208 evidence: edge_evidence,
3209 properties: BTreeMap::from([("relation".into(), serde_json::json!("definition"))]),
3210 schema_version: Some("graph-v1".into()),
3211 source_pass: Some("scip".into()),
3212 index_mode: Some("full".into()),
3213 extractor_version: Some("test-scip".into()),
3214 ambiguity: vec!["macro expansion not modeled".into()],
3215 quality_notes: vec!["exact definition edge".into()],
3216 };
3217
3218 store
3219 .replace_graph(
3220 &[node_a.clone(), node_b.clone()],
3221 std::slice::from_ref(&edge),
3222 )
3223 .unwrap();
3224
3225 let (nodes, edges) = store.neighbors("file:src/lib.rs", 10).unwrap();
3226 let stored_node = nodes.iter().find(|node| node.id == node_a.id).unwrap();
3227 assert_eq!(stored_node.properties, node_a.properties);
3228 assert_eq!(stored_node.schema_version.as_deref(), Some("graph-v1"));
3229 assert_eq!(stored_node.source_pass.as_deref(), Some("tree_sitter"));
3230 assert_eq!(stored_node.quality_notes, vec!["file path verified"]);
3231
3232 assert_eq!(edges.len(), 1);
3233 let stored_edge = &edges[0];
3234 assert_eq!(stored_edge.properties, edge.properties);
3235 assert_eq!(stored_edge.schema_version.as_deref(), Some("graph-v1"));
3236 assert_eq!(stored_edge.evidence.confidence_score, Some(0.98));
3237 assert_eq!(
3238 stored_edge.evidence.confidence_reason.as_deref(),
3239 Some("exact symbol occurrence")
3240 );
3241 assert_eq!(stored_edge.evidence.freshness.as_deref(), Some("fresh"));
3242
3243 let indexed_confidence: String = store
3244 .connection
3245 .lock()
3246 .unwrap()
3247 .query_row(
3248 "SELECT confidence FROM graph_edges WHERE id = 'e1'",
3249 [],
3250 |row| row.get(0),
3251 )
3252 .unwrap();
3253 assert_eq!(indexed_confidence, "Medium");
3254 }
3255
3256 #[test]
3257 fn shortest_path_finds_direct_route() {
3258 let store = make_store();
3259 let file = make_file("f1", "src/lib.rs");
3260 let manifest = make_manifest();
3261 let files = vec![file];
3262 let data = IndexData {
3263 manifest: &manifest,
3264 files: &files,
3265 symbols: &[],
3266 occurrences: &[],
3267 chunks: &[],
3268 imports: &[],
3269 tests: &[],
3270 analysis_facts: &[],
3271 };
3272 store.replace_index(data).unwrap();
3273
3274 let node_a = GraphNode {
3275 id: NodeId::new("a"),
3276 node_type: GraphNodeType::File,
3277 label: "a".into(),
3278 file_id: None,
3279 symbol_id: None,
3280 ..Default::default()
3281 };
3282 let node_b = GraphNode {
3283 id: NodeId::new("b"),
3284 node_type: GraphNodeType::File,
3285 label: "b".into(),
3286 file_id: None,
3287 symbol_id: None,
3288 ..Default::default()
3289 };
3290 let edge = GraphEdge {
3291 id: EdgeId::new("a-b"),
3292 from: node_a.id.clone(),
3293 to: node_b.id.clone(),
3294 edge_type: GraphEdgeType::Defines,
3295 evidence: evidence(),
3296 ..Default::default()
3297 };
3298 store.replace_graph(&[node_a, node_b], &[edge]).unwrap();
3299
3300 let path = store.shortest_path("a", "b", 5).unwrap();
3301 assert_eq!(path.len(), 1);
3302 assert_eq!(path[0].id.0, "a-b");
3303 }
3304
3305 #[test]
3306 fn shortest_path_returns_empty_when_no_route() {
3307 let store = make_store();
3308 let file = make_file("f1", "src/lib.rs");
3309 let manifest = make_manifest();
3310 let files = vec![file];
3311 let data = IndexData {
3312 manifest: &manifest,
3313 files: &files,
3314 symbols: &[],
3315 occurrences: &[],
3316 chunks: &[],
3317 imports: &[],
3318 tests: &[],
3319 analysis_facts: &[],
3320 };
3321 store.replace_index(data).unwrap();
3322 store.replace_graph(&[], &[]).unwrap();
3323
3324 let path = store.shortest_path("x", "y", 5).unwrap();
3325 assert!(path.is_empty());
3326 }
3327
3328 #[test]
3329 fn test_old_graph_tables_migrate_and_replace_graph_backfills_columns() {
3330 let store = make_store();
3331 let legacy_file = GraphNode {
3332 id: NodeId::new("legacy_file"),
3333 node_type: GraphNodeType::File,
3334 label: "legacy.rs".into(),
3335 file_id: Some(FileId::new("f1")),
3336 ..Default::default()
3337 };
3338 let legacy_symbol = GraphNode {
3339 id: NodeId::new("legacy_symbol"),
3340 node_type: GraphNodeType::Function,
3341 label: "legacy_fn".into(),
3342 symbol_id: Some(SymbolId::new("s1")),
3343 ..Default::default()
3344 };
3345 let mut legacy_evidence = evidence();
3346 legacy_evidence.source_type = EvidenceSourceType::Scip;
3347 legacy_evidence.source = "index.scip".into();
3348 let legacy_edge = GraphEdge {
3349 id: EdgeId::new("legacy_edge"),
3350 from: legacy_file.id.clone(),
3351 to: legacy_symbol.id.clone(),
3352 edge_type: GraphEdgeType::Defines,
3353 evidence: legacy_evidence,
3354 ..Default::default()
3355 };
3356 {
3357 let conn = store.connection.lock().unwrap();
3358 conn.execute("DROP TABLE graph_nodes", []).unwrap();
3359 conn.execute("DROP TABLE graph_edges", []).unwrap();
3360 conn.execute(
3361 "CREATE TABLE graph_nodes(id TEXT PRIMARY KEY, label TEXT, json TEXT)",
3362 [],
3363 )
3364 .unwrap();
3365 conn.execute("CREATE TABLE graph_edges(id TEXT PRIMARY KEY, from_id TEXT, to_id TEXT, edge_type TEXT, json TEXT)", []).unwrap();
3366 conn.execute(
3367 "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
3368 params![
3369 legacy_file.id.0.as_str(),
3370 legacy_file.label.as_str(),
3371 serde_json::to_string(&legacy_file).unwrap(),
3372 ],
3373 )
3374 .unwrap();
3375 conn.execute(
3376 "INSERT INTO graph_nodes(id, label, json) VALUES(?1, ?2, ?3)",
3377 params![
3378 legacy_symbol.id.0.as_str(),
3379 legacy_symbol.label.as_str(),
3380 serde_json::to_string(&legacy_symbol).unwrap(),
3381 ],
3382 )
3383 .unwrap();
3384 conn.execute(
3385 "INSERT INTO graph_edges(id, from_id, to_id, edge_type, json)
3386 VALUES(?1, ?2, ?3, '', ?4)",
3387 params![
3388 legacy_edge.id.0.as_str(),
3389 legacy_edge.from.0.as_str(),
3390 legacy_edge.to.0.as_str(),
3391 serde_json::to_string(&legacy_edge).unwrap(),
3392 ],
3393 )
3394 .unwrap();
3395 }
3396 store.initialize().unwrap();
3397 store.initialize().unwrap();
3398
3399 let migrated_nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
3400 assert_eq!(migrated_nodes.len(), 1);
3401 assert_eq!(migrated_nodes[0].id.0, "legacy_file");
3402
3403 let migrated_edges = store.edges_by_type(GraphEdgeType::Defines, 10, 0).unwrap();
3404 assert_eq!(migrated_edges.len(), 1);
3405 assert_eq!(migrated_edges[0].id.0, "legacy_edge");
3406 let migrated_between = store
3407 .graph_edges_between("legacy_file", "legacy_symbol", 10)
3408 .unwrap();
3409 assert_eq!(migrated_between.len(), 1);
3410
3411 let migrated_counts = store.graph_schema_counts().unwrap();
3412 assert_eq!(migrated_counts.node_types.get("File"), Some(&1));
3413 assert_eq!(migrated_counts.edge_types.get("Defines"), Some(&1));
3414
3415 let node = GraphNode {
3416 id: NodeId::new("test_node"),
3417 node_type: GraphNodeType::File,
3418 label: "test".into(),
3419 ..Default::default()
3420 };
3421 store.replace_graph(&[node], &[]).unwrap();
3422
3423 let count: i64 = store
3424 .connection
3425 .lock()
3426 .unwrap()
3427 .query_row(
3428 "SELECT COUNT(*) FROM graph_nodes WHERE node_type = 'File'",
3429 [],
3430 |r| r.get(0),
3431 )
3432 .unwrap();
3433 assert_eq!(count, 1);
3434
3435 let version: i64 = store
3436 .connection
3437 .lock()
3438 .unwrap()
3439 .pragma_query_value(None, "user_version", |row| row.get(0))
3440 .unwrap();
3441 assert_eq!(version, SQLITE_GRAPH_SCHEMA_VERSION);
3442
3443 let index_count: i64 = store
3444 .connection
3445 .lock()
3446 .unwrap()
3447 .query_row(
3448 "SELECT COUNT(*) FROM sqlite_master
3449 WHERE type = 'index'
3450 AND name IN (
3451 'idx_graph_nodes_type',
3452 'idx_graph_nodes_file',
3453 'idx_graph_nodes_symbol',
3454 'idx_graph_edges_type',
3455 'idx_graph_edges_from_type',
3456 'idx_graph_edges_to_type',
3457 'idx_graph_edges_source_type'
3458 )",
3459 [],
3460 |row| row.get(0),
3461 )
3462 .unwrap();
3463 assert_eq!(index_count, 7);
3464 }
3465
3466 #[test]
3467 fn test_nodes_by_type_uses_indexed_column() {
3468 let store = make_store();
3469 let node1 = GraphNode {
3470 id: NodeId::new("n1"),
3471 node_type: GraphNodeType::File,
3472 ..Default::default()
3473 };
3474 let node2 = GraphNode {
3475 id: NodeId::new("n2"),
3476 node_type: GraphNodeType::File,
3477 ..Default::default()
3478 };
3479 let node3 = GraphNode {
3480 id: NodeId::new("n3"),
3481 node_type: GraphNodeType::Function,
3482 ..Default::default()
3483 };
3484 store
3485 .replace_graph(&[node2.clone(), node3.clone(), node1.clone()], &[])
3486 .unwrap();
3487
3488 let nodes = store.nodes_by_type(GraphNodeType::File, 10, 0).unwrap();
3489 assert_eq!(nodes.len(), 2);
3490 assert_eq!(nodes[0].id.0, "n1");
3491 assert_eq!(nodes[1].id.0, "n2");
3492 }
3493
3494 #[test]
3495 fn test_edges_by_type_uses_indexed_column() {
3496 let store = make_store();
3497 let node1 = GraphNode {
3498 id: NodeId::new("n1"),
3499 ..Default::default()
3500 };
3501 let node2 = GraphNode {
3502 id: NodeId::new("n2"),
3503 ..Default::default()
3504 };
3505 let edge1 = GraphEdge {
3506 id: EdgeId::new("e1"),
3507 from: NodeId::new("n1"),
3508 to: NodeId::new("n2"),
3509 edge_type: GraphEdgeType::Calls,
3510 ..Default::default()
3511 };
3512 let edge2 = GraphEdge {
3513 id: EdgeId::new("e2"),
3514 from: NodeId::new("n1"),
3515 to: NodeId::new("n2"),
3516 edge_type: GraphEdgeType::Calls,
3517 ..Default::default()
3518 };
3519 let edge3 = GraphEdge {
3520 id: EdgeId::new("e3"),
3521 from: NodeId::new("n1"),
3522 to: NodeId::new("n2"),
3523 edge_type: GraphEdgeType::Defines,
3524 ..Default::default()
3525 };
3526 store
3527 .replace_graph(
3528 &[node1, node2],
3529 &[edge2.clone(), edge3.clone(), edge1.clone()],
3530 )
3531 .unwrap();
3532
3533 let edges = store.edges_by_type(GraphEdgeType::Calls, 10, 0).unwrap();
3534 assert_eq!(edges.len(), 2);
3535 assert_eq!(edges[0].id.0, "e1");
3536 assert_eq!(edges[1].id.0, "e2");
3537 }
3538
3539 #[test]
3540 fn test_graph_edges_between_respects_limit() {
3541 let store = make_store();
3542 let node1 = GraphNode {
3543 id: NodeId::new("n1"),
3544 ..Default::default()
3545 };
3546 let node2 = GraphNode {
3547 id: NodeId::new("n2"),
3548 ..Default::default()
3549 };
3550 let edge1 = GraphEdge {
3551 id: EdgeId::new("e1"),
3552 from: NodeId::new("n1"),
3553 to: NodeId::new("n2"),
3554 ..Default::default()
3555 };
3556 let edge2 = GraphEdge {
3557 id: EdgeId::new("e2"),
3558 from: NodeId::new("n1"),
3559 to: NodeId::new("n2"),
3560 ..Default::default()
3561 };
3562 store
3563 .replace_graph(&[node1, node2], &[edge2.clone(), edge1.clone()])
3564 .unwrap();
3565
3566 let edges = store.graph_edges_between("n1", "n2", 1).unwrap();
3567 assert_eq!(edges.len(), 1);
3568 assert_eq!(edges[0].id.0, "e1");
3569 }
3570
3571 #[test]
3572 fn test_query_limit_is_capped() {
3573 assert_eq!(super::clamp_limit(0), 100);
3574 assert_eq!(super::clamp_limit(5), 5);
3575 assert_eq!(super::clamp_limit(5000), 1000);
3576 }
3577
3578 #[test]
3579 fn test_graph_schema_counts_returns_sorted_type_counts() {
3580 let store = make_store();
3581 let node1 = GraphNode {
3582 id: NodeId::new("n1"),
3583 node_type: GraphNodeType::File,
3584 ..Default::default()
3585 };
3586 let node2 = GraphNode {
3587 id: NodeId::new("n2"),
3588 node_type: GraphNodeType::File,
3589 ..Default::default()
3590 };
3591 let node3 = GraphNode {
3592 id: NodeId::new("n3"),
3593 node_type: GraphNodeType::Function,
3594 ..Default::default()
3595 };
3596 let edge1 = GraphEdge {
3597 id: EdgeId::new("e1"),
3598 from: NodeId::new("n1"),
3599 to: NodeId::new("n2"),
3600 edge_type: GraphEdgeType::Calls,
3601 ..Default::default()
3602 };
3603 store
3604 .replace_graph(&[node1, node2, node3], &[edge1])
3605 .unwrap();
3606
3607 let counts = store.graph_schema_counts().unwrap();
3608 assert_eq!(counts.node_types.get("File"), Some(&2));
3609 assert_eq!(counts.node_types.get("Function"), Some(&1));
3610 assert_eq!(counts.edge_types.get("Calls"), Some(&1));
3611 }
3612
3613 #[test]
3614 fn test_graph_counts_returns_total_nodes_and_edges() {
3615 let store = make_store();
3616 let node1 = GraphNode {
3617 id: NodeId::new("n1"),
3618 node_type: GraphNodeType::File,
3619 ..Default::default()
3620 };
3621 let node2 = GraphNode {
3622 id: NodeId::new("n2"),
3623 node_type: GraphNodeType::File,
3624 ..Default::default()
3625 };
3626 let edge1 = GraphEdge {
3627 id: EdgeId::new("e1"),
3628 from: NodeId::new("n1"),
3629 to: NodeId::new("n2"),
3630 edge_type: GraphEdgeType::Calls,
3631 ..Default::default()
3632 };
3633 store.replace_graph(&[node1, node2], &[edge1]).unwrap();
3634
3635 let overall = store.graph_counts().unwrap();
3636 assert_eq!(overall.nodes, 2);
3637 assert_eq!(overall.edges, 1);
3638 }
3639}