1use std::collections::{HashMap, HashSet};
11use std::path::Path;
12
13use rusqlite::{Connection, OpenFlags, params, params_from_iter};
14
15use crate::error::RepographError;
16use crate::search::chunk::{Chunk, TrackedFile, chunk_file};
17
18pub const SCHEMA_VERSION: &str = "1";
22
23const RRF_K: f64 = 60.0;
26
27pub trait Embedder {
31 fn model_id(&self) -> &str;
34
35 fn embed(&mut self, texts: &[String]) -> Result<Vec<Vec<f32>>, String>;
42}
43
44#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
46pub struct RepoStats {
47 pub files_indexed: usize,
49 pub files_unchanged: usize,
51 pub files_purged: usize,
53}
54
55#[derive(Debug, Clone)]
57pub struct ChunkRow {
58 pub repo: String,
59 pub path: String,
60 pub start_line: u32,
61 pub content: String,
62}
63
64pub struct Store {
66 conn: Connection,
67}
68
69impl std::fmt::Debug for Store {
70 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
71 f.debug_struct("Store").finish_non_exhaustive()
72 }
73}
74
75impl Store {
76 pub fn open_for_build(db_path: &Path) -> Result<Self, RepographError> {
83 if let Some(parent) = db_path.parent() {
84 fs_err::create_dir_all(parent)?;
85 }
86 let conn = Connection::open(db_path)?;
87 let store = Self { conn };
88 store.ensure_schema()?;
89 Ok(store)
90 }
91
92 pub fn open_existing(db_path: &Path) -> Result<Self, RepographError> {
103 if !db_path.is_file() {
104 return Err(RepographError::IndexMissing);
105 }
106 let conn = Connection::open_with_flags(db_path, OpenFlags::SQLITE_OPEN_READ_ONLY)?;
107 let store = Self { conn };
108 let version: Option<String> = store.meta_get("schema_version")?;
109 match version.as_deref() {
110 Some(v) if v == SCHEMA_VERSION => Ok(store),
111 Some(other) => Err(RepographError::Index(format!(
112 "index schema version {other} is not readable by this build (expected {SCHEMA_VERSION}); run `repograph index` to rebuild"
113 ))),
114 None => Err(RepographError::Index(
115 "index is missing its schema marker (corrupt); run `repograph index` to rebuild"
116 .to_string(),
117 )),
118 }
119 }
120
121 fn ensure_schema(&self) -> Result<(), RepographError> {
122 self.conn.execute_batch(
123 "CREATE TABLE IF NOT EXISTS meta (key TEXT PRIMARY KEY, value TEXT NOT NULL)",
124 )?;
125 let version: Option<String> = self.meta_get("schema_version")?;
126 if version.as_deref() == Some(SCHEMA_VERSION) {
127 return Ok(());
128 }
129 if version.is_some() {
130 self.drop_all()?;
131 }
132 self.create_all()?;
133 self.meta_set("schema_version", SCHEMA_VERSION)?;
134 Ok(())
135 }
136
137 fn drop_all(&self) -> Result<(), RepographError> {
138 self.conn.execute_batch(
139 "DROP TABLE IF EXISTS chunks_fts;
140 DROP TABLE IF EXISTS vectors;
141 DROP TABLE IF EXISTS chunks;
142 DROP TABLE IF EXISTS files;
143 DROP TABLE IF EXISTS repos;",
144 )?;
145 Ok(())
146 }
147
148 fn create_all(&self) -> Result<(), RepographError> {
149 self.conn.execute_batch(
150 "CREATE TABLE IF NOT EXISTS repos (
151 repo TEXT PRIMARY KEY,
152 indexed_commit TEXT
153 );
154 CREATE TABLE IF NOT EXISTS files (
155 repo TEXT NOT NULL,
156 path TEXT NOT NULL,
157 content_hash TEXT NOT NULL,
158 PRIMARY KEY (repo, path)
159 );
160 CREATE TABLE IF NOT EXISTS chunks (
161 id INTEGER PRIMARY KEY AUTOINCREMENT,
162 repo TEXT NOT NULL,
163 path TEXT NOT NULL,
164 start_line INTEGER NOT NULL,
165 end_line INTEGER NOT NULL,
166 content TEXT NOT NULL,
167 prefix TEXT NOT NULL
168 );
169 CREATE INDEX IF NOT EXISTS idx_chunks_repo_path ON chunks(repo, path);
170 CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(text, chunk_id UNINDEXED);
171 CREATE TABLE IF NOT EXISTS vectors (
172 chunk_id INTEGER PRIMARY KEY,
173 embedding BLOB NOT NULL,
174 model TEXT NOT NULL
175 );",
176 )?;
177 Ok(())
178 }
179
180 fn meta_get(&self, key: &str) -> Result<Option<String>, RepographError> {
181 match self
182 .conn
183 .query_row("SELECT value FROM meta WHERE key = ?1", [key], |r| {
184 r.get::<_, String>(0)
185 }) {
186 Ok(v) => Ok(Some(v)),
187 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
188 Err(e) => Err(e.into()),
189 }
190 }
191
192 fn meta_set(&self, key: &str, value: &str) -> Result<(), RepographError> {
193 self.conn.execute(
194 "INSERT INTO meta(key, value) VALUES(?1, ?2)
195 ON CONFLICT(key) DO UPDATE SET value = excluded.value",
196 params![key, value],
197 )?;
198 Ok(())
199 }
200
201 pub fn ensure_model(&self, model_id: &str) -> Result<(), RepographError> {
209 let current: Option<String> = self.meta_get("model_id")?;
210 if current.as_deref() != Some(model_id) {
211 self.conn.execute("DELETE FROM vectors", [])?;
212 self.meta_set("model_id", model_id)?;
213 }
214 Ok(())
215 }
216
217 pub fn has_vectors(&self) -> Result<bool, RepographError> {
224 let n: i64 = self
225 .conn
226 .query_row("SELECT COUNT(*) FROM vectors", [], |r| r.get(0))?;
227 Ok(n > 0)
228 }
229
230 pub fn indexed_commits(&self) -> Result<HashMap<String, Option<String>>, RepographError> {
236 let mut stmt = self
237 .conn
238 .prepare("SELECT repo, indexed_commit FROM repos")?;
239 let rows = stmt.query_map([], |r| {
240 Ok((r.get::<_, String>(0)?, r.get::<_, Option<String>>(1)?))
241 })?;
242 let mut out = HashMap::new();
243 for row in rows {
244 let (repo, commit) = row?;
245 out.insert(repo, commit);
246 }
247 Ok(out)
248 }
249
250 pub fn reconcile_repo(
260 &mut self,
261 repo: &str,
262 files: &[TrackedFile],
263 head_commit: Option<&str>,
264 mut embedder: Option<&mut dyn Embedder>,
265 ) -> Result<RepoStats, RepographError> {
266 let mut stats = RepoStats::default();
267 let existing = self.existing_hashes(repo)?;
268 let embedding = embedder.is_some();
269 let vectored: HashSet<String> = if embedding {
275 self.paths_with_vectors(repo)?
276 } else {
277 HashSet::new()
278 };
279 let current: HashSet<&str> = files.iter().map(|f| f.path.as_str()).collect();
280
281 let tx = self.conn.transaction()?;
282
283 for path in existing.keys() {
284 if !current.contains(path.as_str()) {
285 delete_file_chunks(&tx, repo, path)?;
286 tx.execute(
287 "DELETE FROM files WHERE repo = ?1 AND path = ?2",
288 params![repo, path],
289 )?;
290 stats.files_purged += 1;
291 }
292 }
293
294 for f in files {
295 let unchanged = existing.get(&f.path) == Some(&f.content_hash);
296 let needs_vectors = embedding && !vectored.contains(&f.path);
297 if unchanged && !needs_vectors {
298 stats.files_unchanged += 1;
299 continue;
300 }
301 delete_file_chunks(&tx, repo, &f.path)?;
302 let chunks = chunk_file(repo, &f.path, &f.text);
303 #[allow(clippy::option_if_let_else)]
307 let emb: Option<&mut dyn Embedder> = match &mut embedder {
308 Some(e) => Some(&mut **e),
309 None => None,
310 };
311 let embeddings = embed_chunks(emb, &chunks);
312 insert_chunks(&tx, repo, &chunks, embeddings.as_ref())?;
313 tx.execute(
314 "INSERT INTO files(repo, path, content_hash) VALUES(?1, ?2, ?3)
315 ON CONFLICT(repo, path) DO UPDATE SET content_hash = excluded.content_hash",
316 params![repo, f.path, f.content_hash],
317 )?;
318 stats.files_indexed += 1;
319 }
320
321 tx.execute(
322 "INSERT INTO repos(repo, indexed_commit) VALUES(?1, ?2)
323 ON CONFLICT(repo) DO UPDATE SET indexed_commit = excluded.indexed_commit",
324 params![repo, head_commit],
325 )?;
326 tx.commit()?;
327 Ok(stats)
328 }
329
330 fn paths_with_vectors(&self, repo: &str) -> Result<HashSet<String>, RepographError> {
333 let mut stmt = self.conn.prepare(
334 "SELECT DISTINCT c.path FROM chunks c JOIN vectors v ON v.chunk_id = c.id
335 WHERE c.repo = ?1",
336 )?;
337 let rows = stmt.query_map([repo], |r| r.get::<_, String>(0))?;
338 let mut out = HashSet::new();
339 for row in rows {
340 out.insert(row?);
341 }
342 Ok(out)
343 }
344
345 fn existing_hashes(&self, repo: &str) -> Result<HashMap<String, String>, RepographError> {
346 let mut stmt = self
347 .conn
348 .prepare("SELECT path, content_hash FROM files WHERE repo = ?1")?;
349 let rows = stmt.query_map([repo], |r| {
350 Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?))
351 })?;
352 let mut out = HashMap::new();
353 for row in rows {
354 let (path, hash) = row?;
355 out.insert(path, hash);
356 }
357 Ok(out)
358 }
359
360 pub fn search_lexical(
368 &self,
369 query: &str,
370 repos: &[String],
371 pool: usize,
372 ) -> Result<Vec<i64>, RepographError> {
373 let Some(match_expr) = fts_query(query) else {
374 return Ok(Vec::new());
375 };
376 let pool_i = i64::try_from(pool).unwrap_or(i64::MAX);
377 let mut sql = String::from(
380 "SELECT chunks.id FROM chunks_fts JOIN chunks ON chunks.id = chunks_fts.chunk_id
381 WHERE chunks_fts MATCH ?1",
382 );
383 let mut binds: Vec<rusqlite::types::Value> = vec![match_expr.into()];
384 if !repos.is_empty() {
385 let placeholders = repo_placeholders(repos.len(), binds.len() + 1);
386 sql.push_str(" AND chunks.repo IN (");
387 sql.push_str(&placeholders);
388 sql.push(')');
389 for r in repos {
390 binds.push(r.clone().into());
391 }
392 }
393 sql.push_str(" ORDER BY bm25(chunks_fts) LIMIT ");
394 sql.push_str(&pool_i.to_string());
395 let mut stmt = self.conn.prepare(&sql)?;
396 let rows = stmt.query_map(params_from_iter(binds), |r| r.get::<_, i64>(0))?;
397 let mut ids = Vec::new();
398 for row in rows {
399 ids.push(row?);
400 }
401 Ok(ids)
402 }
403
404 pub fn search_vectors(
411 &self,
412 query_embedding: &[f32],
413 repos: &[String],
414 pool: usize,
415 ) -> Result<Vec<i64>, RepographError> {
416 let mut sql = String::from(
417 "SELECT v.chunk_id, v.embedding FROM vectors v JOIN chunks c ON c.id = v.chunk_id",
418 );
419 let mut binds: Vec<rusqlite::types::Value> = Vec::new();
420 if !repos.is_empty() {
421 let placeholders = repo_placeholders(repos.len(), 1);
422 sql.push_str(" WHERE c.repo IN (");
423 sql.push_str(&placeholders);
424 sql.push(')');
425 for r in repos {
426 binds.push(r.clone().into());
427 }
428 }
429 let mut stmt = self.conn.prepare(&sql)?;
430 let rows = stmt.query_map(params_from_iter(binds), |r| {
431 Ok((r.get::<_, i64>(0)?, r.get::<_, Vec<u8>>(1)?))
432 })?;
433 let mut scored: Vec<(i64, f32)> = Vec::new();
434 for row in rows {
435 let (id, blob) = row?;
436 let v = blob_to_vec(&blob);
437 scored.push((id, cosine(query_embedding, &v)));
438 }
439 scored.sort_by(|a, b| b.1.total_cmp(&a.1));
440 scored.truncate(pool);
441 Ok(scored.into_iter().map(|(id, _)| id).collect())
442 }
443
444 pub fn fetch_chunks(&self, ids: &[i64]) -> Result<HashMap<i64, ChunkRow>, RepographError> {
450 if ids.is_empty() {
451 return Ok(HashMap::new());
452 }
453 let placeholders = repo_placeholders(ids.len(), 1);
454 let sql = format!(
455 "SELECT id, repo, path, start_line, content FROM chunks WHERE id IN ({placeholders})"
456 );
457 let mut stmt = self.conn.prepare(&sql)?;
458 let binds: Vec<rusqlite::types::Value> = ids.iter().map(|i| (*i).into()).collect();
459 let rows = stmt.query_map(params_from_iter(binds), |r| {
460 Ok((
461 r.get::<_, i64>(0)?,
462 ChunkRow {
463 repo: r.get::<_, String>(1)?,
464 path: r.get::<_, String>(2)?,
465 start_line: u32::try_from(r.get::<_, i64>(3)?).unwrap_or(u32::MAX),
466 content: r.get::<_, String>(4)?,
467 },
468 ))
469 })?;
470 let mut out = HashMap::new();
471 for row in rows {
472 let (id, chunk) = row?;
473 out.insert(id, chunk);
474 }
475 Ok(out)
476 }
477}
478
479#[must_use]
483pub fn fuse(lists: &[&[i64]]) -> Vec<(i64, f64)> {
484 let mut scores: HashMap<i64, f64> = HashMap::new();
485 for list in lists {
486 for (rank, id) in list.iter().enumerate() {
487 #[allow(clippy::cast_precision_loss)]
488 let contribution = 1.0 / (RRF_K + (rank as f64) + 1.0);
489 *scores.entry(*id).or_insert(0.0) += contribution;
490 }
491 }
492 let mut fused: Vec<(i64, f64)> = scores.into_iter().collect();
493 fused.sort_by(|a, b| b.1.total_cmp(&a.1).then(a.0.cmp(&b.0)));
494 fused
495}
496
497fn embed_chunks(
498 embedder: Option<&mut dyn Embedder>,
499 chunks: &[Chunk],
500) -> Option<(Vec<Vec<f32>>, String)> {
501 let embedder = embedder?;
502 if chunks.is_empty() {
503 return None;
504 }
505 let texts: Vec<String> = chunks.iter().map(Chunk::index_text).collect();
506 let model = embedder.model_id().to_string();
507 match embedder.embed(&texts) {
508 Ok(vectors) if vectors.len() == chunks.len() => Some((vectors, model)),
509 Ok(_) => {
510 tracing::warn!("embedder returned a vector count != chunk count; skipping vectors");
511 None
512 }
513 Err(msg) => {
514 tracing::warn!(error = %msg, "embedding failed; this file is lexical-only");
515 None
516 }
517 }
518}
519
520fn delete_file_chunks(
521 tx: &rusqlite::Transaction<'_>,
522 repo: &str,
523 path: &str,
524) -> Result<(), RepographError> {
525 tx.execute(
526 "DELETE FROM chunks_fts WHERE chunk_id IN
527 (SELECT id FROM chunks WHERE repo = ?1 AND path = ?2)",
528 params![repo, path],
529 )?;
530 tx.execute(
531 "DELETE FROM vectors WHERE chunk_id IN
532 (SELECT id FROM chunks WHERE repo = ?1 AND path = ?2)",
533 params![repo, path],
534 )?;
535 tx.execute(
536 "DELETE FROM chunks WHERE repo = ?1 AND path = ?2",
537 params![repo, path],
538 )?;
539 Ok(())
540}
541
542fn insert_chunks(
543 tx: &rusqlite::Transaction<'_>,
544 repo: &str,
545 chunks: &[Chunk],
546 embeddings: Option<&(Vec<Vec<f32>>, String)>,
547) -> Result<(), RepographError> {
548 for (i, chunk) in chunks.iter().enumerate() {
549 tx.execute(
550 "INSERT INTO chunks(repo, path, start_line, end_line, content, prefix)
551 VALUES(?1, ?2, ?3, ?4, ?5, ?6)",
552 params![
553 repo,
554 chunk.path,
555 chunk.start_line,
556 chunk.end_line,
557 chunk.content,
558 chunk.prefix
559 ],
560 )?;
561 let chunk_id = tx.last_insert_rowid();
562 tx.execute(
563 "INSERT INTO chunks_fts(text, chunk_id) VALUES(?1, ?2)",
564 params![chunk.index_text(), chunk_id],
565 )?;
566 if let Some((vectors, model)) = embeddings {
567 if let Some(v) = vectors.get(i) {
568 tx.execute(
569 "INSERT INTO vectors(chunk_id, embedding, model) VALUES(?1, ?2, ?3)",
570 params![chunk_id, vec_to_blob(v), model],
571 )?;
572 }
573 }
574 }
575 Ok(())
576}
577
578fn fts_query(query: &str) -> Option<String> {
582 let mut seen = HashSet::new();
583 let mut terms = Vec::new();
584 for raw in query.split(|c: char| !c.is_alphanumeric()) {
585 if raw.is_empty() {
586 continue;
587 }
588 let lower = raw.to_lowercase();
589 if seen.insert(lower.clone()) {
590 terms.push(format!("\"{lower}\""));
591 }
592 }
593 if terms.is_empty() {
594 None
595 } else {
596 Some(terms.join(" OR "))
597 }
598}
599
600fn repo_placeholders(n: usize, start: usize) -> String {
602 (start..start + n)
603 .map(|i| format!("?{i}"))
604 .collect::<Vec<_>>()
605 .join(", ")
606}
607
608fn vec_to_blob(v: &[f32]) -> Vec<u8> {
609 let mut bytes = Vec::with_capacity(v.len() * 4);
610 for x in v {
611 bytes.extend_from_slice(&x.to_le_bytes());
612 }
613 bytes
614}
615
616fn blob_to_vec(bytes: &[u8]) -> Vec<f32> {
617 bytes
618 .chunks_exact(4)
619 .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
620 .collect()
621}
622
623fn cosine(a: &[f32], b: &[f32]) -> f32 {
624 if a.len() != b.len() || a.is_empty() {
625 return 0.0;
626 }
627 let mut dot = 0.0f32;
628 let mut na = 0.0f32;
629 let mut nb = 0.0f32;
630 for (x, y) in a.iter().zip(b.iter()) {
631 dot += x * y;
632 na += x * x;
633 nb += y * y;
634 }
635 if na == 0.0 || nb == 0.0 {
636 return 0.0;
637 }
638 dot / (na.sqrt() * nb.sqrt())
639}
640
641#[cfg(test)]
642mod tests {
643 #![allow(
644 clippy::unwrap_used,
645 clippy::float_cmp,
646 clippy::cast_precision_loss,
647 clippy::cast_possible_truncation,
648 clippy::cast_sign_loss,
649 clippy::unnecessary_literal_bound
650 )]
651 use super::*;
652 use crate::search::chunk::TrackedFile;
653 use tempfile::TempDir;
654
655 struct StubEmbedder;
658 impl Embedder for StubEmbedder {
659 fn model_id(&self) -> &str {
660 "stub-v1"
661 }
662 fn embed(&mut self, texts: &[String]) -> Result<Vec<Vec<f32>>, String> {
663 Ok(texts
664 .iter()
665 .map(|t| vec![(t.len() % 7) as f32 + 1.0, 1.0, 0.5])
666 .collect())
667 }
668 }
669
670 fn tf(path: &str, text: &str) -> TrackedFile {
671 TrackedFile {
672 path: path.to_string(),
673 content_hash: format!("h:{}:{}", path, text.len()),
674 text: text.to_string(),
675 }
676 }
677
678 fn build_store() -> (TempDir, Store) {
679 let tmp = TempDir::new().unwrap();
680 let db = tmp.path().join("repograph").join("index.db");
681 let store = Store::open_for_build(&db).unwrap();
682 (tmp, store)
683 }
684
685 #[test]
686 fn open_existing_missing_is_index_missing() {
687 let tmp = TempDir::new().unwrap();
688 let db = tmp.path().join("nope.db");
689 let err = Store::open_existing(&db).unwrap_err();
690 assert!(matches!(err, RepographError::IndexMissing));
691 }
692
693 #[test]
694 fn reconcile_then_lexical_finds_exact_token() {
695 let (_tmp, mut store) = build_store();
696 let files = vec![
697 tf("auth.rs", "fn rotate_refresh_token() { /* logic */ }\n"),
698 tf("util.rs", "fn unrelated_helper() {}\n"),
699 ];
700 let stats = store
701 .reconcile_repo("api", &files, Some("deadbeef"), None)
702 .unwrap();
703 assert_eq!(stats.files_indexed, 2);
704 let ids = store
705 .search_lexical("rotate_refresh_token", &[], 10)
706 .unwrap();
707 assert!(!ids.is_empty());
708 let rows = store.fetch_chunks(&ids).unwrap();
709 let hit = rows.values().find(|r| r.path == "auth.rs");
710 assert!(hit.is_some(), "exact-symbol query surfaces the right file");
711 }
712
713 #[test]
714 fn incremental_skips_unchanged_and_reprocesses_changed() {
715 let (_tmp, mut store) = build_store();
716 let files = vec![
717 tf("a.rs", "fn first() {}\n"),
718 tf("b.rs", "fn second() {}\n"),
719 ];
720 store.reconcile_repo("r", &files, None, None).unwrap();
721
722 let files2 = vec![
724 tf("a.rs", "fn first() {}\n"),
725 tf("b.rs", "fn second_renamed() {}\n"),
726 ];
727 let stats = store.reconcile_repo("r", &files2, None, None).unwrap();
728 assert_eq!(stats.files_unchanged, 1, "a.rs hash matched");
729 assert_eq!(stats.files_indexed, 1, "b.rs re-chunked");
730
731 assert!(
733 !store
734 .search_lexical("second_renamed", &[], 10)
735 .unwrap()
736 .is_empty(),
737 "new content searchable"
738 );
739 let old = store.search_lexical("second", &[], 10).unwrap();
740 let rows = store.fetch_chunks(&old).unwrap();
742 assert!(
743 !rows.values().any(|r| r.content.contains("fn second()")),
744 "stale chunk purged"
745 );
746 }
747
748 #[test]
749 fn semantic_upgrade_embeds_previously_lexical_files() {
750 let (_tmp, mut store) = build_store();
751 let files = vec![tf("a.rs", "fn a() {}\n"), tf("b.rs", "fn b() {}\n")];
752
753 store.reconcile_repo("r", &files, None, None).unwrap();
755 assert!(
756 !store.has_vectors().unwrap(),
757 "lexical build wrote no vectors"
758 );
759
760 let mut emb = StubEmbedder;
763 store.ensure_model(emb.model_id()).unwrap();
764 let stats = store
765 .reconcile_repo("r", &files, None, Some(&mut emb))
766 .unwrap();
767 assert_eq!(
768 stats.files_indexed, 2,
769 "unchanged-but-unvectored files are reprocessed to embed them"
770 );
771 assert_eq!(stats.files_unchanged, 0);
772 assert!(
773 store.has_vectors().unwrap(),
774 "vectors present after the semantic upgrade"
775 );
776
777 let mut emb2 = StubEmbedder;
780 let stats2 = store
781 .reconcile_repo("r", &files, None, Some(&mut emb2))
782 .unwrap();
783 assert_eq!(
784 stats2.files_unchanged, 2,
785 "fully-vectored files are skipped"
786 );
787 assert_eq!(stats2.files_indexed, 0);
788 }
789
790 #[test]
791 fn purges_deleted_files() {
792 let (_tmp, mut store) = build_store();
793 store
794 .reconcile_repo("r", &[tf("gone.rs", "fn doomed() {}\n")], None, None)
795 .unwrap();
796 assert!(!store.search_lexical("doomed", &[], 10).unwrap().is_empty());
797 let stats = store.reconcile_repo("r", &[], None, None).unwrap();
799 assert_eq!(stats.files_purged, 1);
800 assert!(store.search_lexical("doomed", &[], 10).unwrap().is_empty());
801 }
802
803 #[test]
804 fn repo_filter_scopes_results() {
805 let (_tmp, mut store) = build_store();
806 store
807 .reconcile_repo("api", &[tf("a.rs", "fn shared_thing() {}\n")], None, None)
808 .unwrap();
809 store
810 .reconcile_repo("ui", &[tf("b.rs", "fn shared_thing() {}\n")], None, None)
811 .unwrap();
812 let all = store.search_lexical("shared_thing", &[], 10).unwrap();
813 assert_eq!(all.len(), 2);
814 let scoped = store
815 .search_lexical("shared_thing", &["api".to_string()], 10)
816 .unwrap();
817 let rows = store.fetch_chunks(&scoped).unwrap();
818 assert!(rows.values().all(|r| r.repo == "api"));
819 }
820
821 #[test]
822 fn indexed_commits_recorded() {
823 let (_tmp, mut store) = build_store();
824 store
825 .reconcile_repo("r", &[tf("a.rs", "fn a() {}\n")], Some("c0ffee"), None)
826 .unwrap();
827 let commits = store.indexed_commits().unwrap();
828 assert_eq!(commits.get("r"), Some(&Some("c0ffee".to_string())));
829 }
830
831 #[test]
832 fn fuse_rewards_agreement() {
833 let lexical = [1i64, 2, 4];
835 let vector = [2i64, 3, 4];
836 let fused = fuse(&[&lexical, &vector]);
837 assert_eq!(fused[0].0, 2, "id present in both lists ranks first");
838 }
839
840 #[test]
841 fn fts_query_extracts_tokens() {
842 assert_eq!(fts_query(" !! "), None);
843 assert_eq!(
844 fts_query("Rotate Refresh"),
845 Some("\"rotate\" OR \"refresh\"".to_string())
846 );
847 }
848
849 #[test]
850 fn cosine_identical_is_one() {
851 let v = [1.0f32, 2.0, 3.0];
852 assert!((cosine(&v, &v) - 1.0).abs() < 1e-6);
853 }
854
855 #[test]
856 fn blob_round_trips() {
857 let v = vec![0.5f32, -1.0, 3.25];
858 assert_eq!(blob_to_vec(&vec_to_blob(&v)), v);
859 }
860
861 #[test]
862 fn schema_version_mismatch_triggers_rebuild() {
863 let tmp = TempDir::new().unwrap();
864 let db = tmp.path().join("index.db");
865 {
866 let mut store = Store::open_for_build(&db).unwrap();
867 store
868 .reconcile_repo("r", &[tf("a.rs", "fn keep() {}\n")], None, None)
869 .unwrap();
870 store.meta_set("schema_version", "0").unwrap();
871 }
872 let store = Store::open_for_build(&db).unwrap();
874 assert!(
875 store.search_lexical("keep", &[], 10).unwrap().is_empty(),
876 "stale-schema index was rebuilt empty"
877 );
878 }
879}