1pub mod fts;
15pub mod schema;
16
17use crate::index::SparseIndex;
18use crate::{Chunk, ChunkId, Document, Result};
19use rusqlite::Connection;
20use std::path::Path;
21use std::sync::Mutex;
22
23pub struct SqliteIndex {
34 conn: Mutex<Connection>,
35}
36
37impl std::fmt::Debug for SqliteIndex {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.debug_struct("SqliteIndex").finish_non_exhaustive()
43 }
44}
45
46fn lock_err<T>(e: &std::sync::PoisonError<T>) -> crate::Error {
48 crate::Error::Query(format!("Mutex poisoned: {e}"))
49}
50
51impl SqliteIndex {
52 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
54 let conn = Connection::open(path.as_ref())
55 .map_err(|e| crate::Error::Query(format!("Failed to open SQLite database: {e}")))?;
56 schema::initialize(&conn)?;
57 Ok(Self { conn: Mutex::new(conn) })
58 }
59
60 pub fn open_in_memory() -> Result<Self> {
62 let conn = Connection::open_in_memory()
63 .map_err(|e| crate::Error::Query(format!("Failed to open in-memory database: {e}")))?;
64 schema::initialize(&conn)?;
65 Ok(Self { conn: Mutex::new(conn) })
66 }
67
68 pub fn document_count(&self) -> Result<usize> {
70 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
71 let count: i64 = conn
72 .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
73 .map_err(|e| crate::Error::Query(format!("Failed to count documents: {e}")))?;
74 Ok(count as usize)
75 }
76
77 pub fn chunk_count(&self) -> Result<usize> {
79 contract_pre_configuration!();
81 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
82 let count: i64 = conn
83 .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
84 .map_err(|e| crate::Error::Query(format!("Failed to count chunks: {e}")))?;
85 Ok(count as usize)
86 }
87
88 pub fn needs_reindex(&self, path: &str, hash: &[u8; 32]) -> Result<bool> {
90 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
91 let stored: Option<Vec<u8>> = conn
92 .query_row("SELECT blake3_hash FROM fingerprints WHERE doc_path = ?1", [path], |row| {
93 row.get(0)
94 })
95 .ok();
96
97 match stored {
98 Some(stored_hash) => Ok(stored_hash.as_slice() != hash),
99 None => Ok(true),
100 }
101 }
102
103 pub fn insert_document(
105 &self,
106 doc_id: &str,
107 title: Option<&str>,
108 source: Option<&str>,
109 content: &str,
110 chunks: &[(String, String)],
111 fingerprint: Option<(&str, &[u8; 32])>,
112 ) -> Result<()> {
113 contract_pre_configuration!(doc_id.as_bytes());
115
116 let mut conn = self.conn.lock().map_err(|e| lock_err(&e))?;
117 let tx = conn
118 .transaction()
119 .map_err(|e| crate::Error::Query(format!("Failed to begin transaction: {e}")))?;
120
121 tx.execute("DELETE FROM chunks WHERE doc_id = ?1", [doc_id])
124 .map_err(|e| crate::Error::Query(format!("Failed to delete old chunks: {e}")))?;
125 tx.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
126 .map_err(|e| crate::Error::Query(format!("Failed to delete old document: {e}")))?;
127
128 tx.execute(
129 "INSERT INTO documents (id, title, source, content, chunk_count) VALUES (?1, ?2, ?3, ?4, ?5)",
130 rusqlite::params![doc_id, title, source, content, chunks.len() as i64],
131 )
132 .map_err(|e| crate::Error::Query(format!("Failed to insert document: {e}")))?;
133
134 {
135 let mut stmt = tx
136 .prepare_cached(
137 "INSERT OR REPLACE INTO chunks (id, doc_id, content, position) VALUES (?1, ?2, ?3, ?4)",
138 )
139 .map_err(|e| crate::Error::Query(format!("Failed to prepare chunk insert: {e}")))?;
140
141 for (i, (chunk_id, chunk_content)) in chunks.iter().enumerate() {
142 stmt.execute(rusqlite::params![chunk_id, doc_id, chunk_content, i as i64])
143 .map_err(|e| crate::Error::Query(format!("Failed to insert chunk: {e}")))?;
144 }
145 }
146
147 if let Some((path, hash)) = fingerprint {
148 tx.execute(
149 "INSERT OR REPLACE INTO fingerprints (doc_path, blake3_hash, chunk_count) VALUES (?1, ?2, ?3)",
150 rusqlite::params![path, hash.as_slice(), chunks.len() as i64],
151 )
152 .map_err(|e| crate::Error::Query(format!("Failed to update fingerprint: {e}")))?;
153 }
154
155 tx.commit()
156 .map_err(|e| crate::Error::Query(format!("Failed to commit transaction: {e}")))?;
157
158 Ok(())
159 }
160
161 pub fn remove_document(&self, doc_id: &str) -> Result<()> {
166 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
167 conn.execute("DELETE FROM chunks WHERE doc_id = ?1", [doc_id])
168 .map_err(|e| crate::Error::Query(format!("Failed to delete chunks: {e}")))?;
169 conn.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
170 .map_err(|e| crate::Error::Query(format!("Failed to remove document: {e}")))?;
171 Ok(())
172 }
173
174 pub fn list_fingerprints(&self) -> Result<Vec<(String, Vec<u8>)>> {
178 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
179 let mut stmt = conn
180 .prepare("SELECT doc_path, blake3_hash FROM fingerprints")
181 .map_err(|e| crate::Error::Query(format!("Failed to list fingerprints: {e}")))?;
182 let rows = stmt
183 .query_map([], |row| {
184 let path: String = row.get(0)?;
185 let hash: Vec<u8> = row.get(1)?;
186 Ok((path, hash))
187 })
188 .map_err(|e| crate::Error::Query(format!("Failed to query fingerprints: {e}")))?;
189 let mut results = Vec::new();
190 for row in rows {
191 results.push(
192 row.map_err(|e| crate::Error::Query(format!("Failed to read fingerprint: {e}")))?,
193 );
194 }
195 Ok(results)
196 }
197
198 pub fn remove_by_source(&self, source: &str) -> Result<usize> {
202 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
203 let mut stmt = conn
205 .prepare("SELECT id FROM documents WHERE source = ?1")
206 .map_err(|e| crate::Error::Query(format!("Failed to find docs by source: {e}")))?;
207 let ids: Vec<String> = stmt
208 .query_map([source], |row| row.get(0))
209 .map_err(|e| crate::Error::Query(format!("Failed to query docs: {e}")))?
210 .filter_map(|r| r.ok())
211 .collect();
212
213 for doc_id in &ids {
214 conn.execute("DELETE FROM chunks WHERE doc_id = ?1", [doc_id])
215 .map_err(|e| crate::Error::Query(format!("Failed to delete chunks: {e}")))?;
216 conn.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
217 .map_err(|e| crate::Error::Query(format!("Failed to delete document: {e}")))?;
218 }
219
220 conn.execute("DELETE FROM fingerprints WHERE doc_path = ?1", [source])
222 .map_err(|e| crate::Error::Query(format!("Failed to delete fingerprint: {e}")))?;
223
224 Ok(ids.len())
225 }
226
227 pub fn search_fts(&self, query: &str, k: usize) -> Result<Vec<fts::FtsResult>> {
229 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
230 fts::search(&conn, query, k)
231 }
232
233 pub fn get_chunk(&self, chunk_id: &str) -> Result<Option<String>> {
235 contract_pre_configuration!(chunk_id.as_bytes());
237 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
238 let content: Option<String> = conn
239 .query_row("SELECT content FROM chunks WHERE id = ?1", [chunk_id], |row| row.get(0))
240 .ok();
241 Ok(content)
242 }
243
244 pub fn get_metadata(&self, key: &str) -> Result<Option<String>> {
246 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
247 let value: Option<String> = conn
248 .query_row("SELECT value FROM metadata WHERE key = ?1", [key], |row| row.get(0))
249 .ok();
250 Ok(value)
251 }
252
253 pub fn set_metadata(&self, key: &str, value: &str) -> Result<()> {
255 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
256 conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES (?1, ?2)", [key, value])
257 .map_err(|e| crate::Error::Query(format!("Failed to set metadata: {e}")))?;
258 Ok(())
259 }
260
261 pub fn optimize(&self) -> Result<()> {
263 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
264 fts::optimize(&conn)?;
265 conn.execute_batch("VACUUM;")
266 .map_err(|e| crate::Error::Query(format!("VACUUM failed: {e}")))?;
267 Ok(())
268 }
269}
270
271impl SparseIndex for SqliteIndex {
272 fn add(&mut self, chunk: &Chunk) {
273 let doc_id = chunk.document_id.to_string();
274 let chunk_id = chunk.id.to_string();
275 if let Ok(conn) = self.conn.lock() {
276 let _ = conn.execute(
277 "INSERT OR IGNORE INTO documents (id, content) VALUES (?1, '')",
278 [&doc_id],
279 );
280 let _ = conn.execute(
281 "INSERT OR REPLACE INTO chunks (id, doc_id, content, position) VALUES (?1, ?2, ?3, 0)",
282 rusqlite::params![chunk_id, doc_id, chunk.content],
283 );
284 }
285 }
286
287 fn add_batch(&mut self, chunks: &[Chunk]) {
288 let Ok(mut conn) = self.conn.lock() else {
289 return;
290 };
291 let Ok(tx) = conn.transaction() else {
292 return;
293 };
294
295 let mut doc_positions: std::collections::HashMap<String, i64> =
297 std::collections::HashMap::new();
298
299 for chunk in chunks {
300 let doc_id = chunk.document_id.to_string();
301 let chunk_id = chunk.id.to_string();
302 let pos = doc_positions.entry(doc_id.clone()).or_insert(0);
303 let _ = tx.execute(
304 "INSERT OR IGNORE INTO documents (id, content) VALUES (?1, '')",
305 [&doc_id],
306 );
307 let _ = tx.execute(
308 "INSERT OR REPLACE INTO chunks (id, doc_id, content, position) VALUES (?1, ?2, ?3, ?4)",
309 rusqlite::params![chunk_id, doc_id, chunk.content, *pos],
310 );
311 *pos += 1;
312 }
313
314 let _ = tx.commit();
315 }
316
317 fn search(&self, query: &str, k: usize) -> Vec<(ChunkId, f32)> {
318 let Ok(conn) = self.conn.lock() else {
319 return Vec::new();
320 };
321 let Ok(results) = fts::search(&conn, query, k) else {
322 return Vec::new();
323 };
324
325 results
326 .into_iter()
327 .filter_map(|r| {
328 uuid::Uuid::parse_str(&r.chunk_id).ok().map(|uuid| (ChunkId(uuid), r.score as f32))
329 })
330 .collect()
331 }
332
333 fn remove(&mut self, chunk_id: ChunkId) {
334 let id_str = chunk_id.to_string();
335 if let Ok(conn) = self.conn.lock() {
336 let _ = conn.execute("DELETE FROM chunks WHERE id = ?1", [&id_str]);
337 }
338 }
339
340 fn len(&self) -> usize {
341 self.chunk_count().unwrap_or(0)
342 }
343}
344
345#[derive(Debug, Clone)]
349pub struct StoreStats {
350 pub document_count: usize,
352 pub chunk_count: usize,
354 pub fingerprint_count: usize,
356 pub db_size_bytes: u64,
358}
359
360pub struct SqliteStore {
365 index: SqliteIndex,
366 path: Option<std::path::PathBuf>,
367}
368
369impl std::fmt::Debug for SqliteStore {
370 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
371 f.debug_struct("SqliteStore").field("path", &self.path).finish_non_exhaustive()
372 }
373}
374
375impl SqliteStore {
376 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
378 let path = path.as_ref().to_path_buf();
379 let index = SqliteIndex::open(&path)?;
380 Ok(Self { index, path: Some(path) })
381 }
382
383 pub fn open_in_memory() -> Result<Self> {
385 let index = SqliteIndex::open_in_memory()?;
386 Ok(Self { index, path: None })
387 }
388
389 pub fn index_document(
391 &self,
392 doc: &Document,
393 chunks: &[Chunk],
394 fingerprint: Option<(&str, &[u8; 32])>,
395 ) -> Result<()> {
396 let doc_id = doc.id.to_string();
397 let chunk_pairs: Vec<(String, String)> =
398 chunks.iter().map(|c| (c.id.to_string(), c.content.clone())).collect();
399
400 self.index.insert_document(
401 &doc_id,
402 doc.title.as_deref(),
403 doc.source.as_deref(),
404 &doc.content,
405 &chunk_pairs,
406 fingerprint,
407 )
408 }
409
410 pub fn search(&self, query: &str, k: usize) -> Result<Vec<fts::FtsResult>> {
414 self.index.search_fts(query, k)
415 }
416
417 pub fn needs_reindex(&self, path: &str, hash: &[u8; 32]) -> Result<bool> {
419 self.index.needs_reindex(path, hash)
420 }
421
422 pub fn list_fingerprints(&self) -> Result<Vec<(String, Vec<u8>)>> {
424 self.index.list_fingerprints()
425 }
426
427 pub fn remove_by_source(&self, source: &str) -> Result<usize> {
429 self.index.remove_by_source(source)
430 }
431
432 pub fn stats(&self) -> Result<StoreStats> {
434 let db_size_bytes = self
435 .path
436 .as_ref()
437 .and_then(|p| std::fs::metadata(p).ok())
438 .map(|m| m.len())
439 .unwrap_or(0);
440
441 Ok(StoreStats {
442 document_count: self.index.document_count()?,
443 chunk_count: self.index.chunk_count()?,
444 fingerprint_count: self.fingerprint_count()?,
445 db_size_bytes,
446 })
447 }
448
449 fn fingerprint_count(&self) -> Result<usize> {
451 let conn = self.index.conn.lock().map_err(|e| lock_err(&e))?;
452 let count: i64 = conn
453 .query_row("SELECT COUNT(*) FROM fingerprints", [], |r| r.get(0))
454 .map_err(|e| crate::Error::Query(format!("Failed to count fingerprints: {e}")))?;
455 Ok(count as usize)
456 }
457
458 pub fn get_metadata(&self, key: &str) -> Result<Option<String>> {
460 self.index.get_metadata(key)
461 }
462
463 pub fn set_metadata(&self, key: &str, value: &str) -> Result<()> {
465 self.index.set_metadata(key, value)
466 }
467
468 pub fn optimize(&self) -> Result<()> {
470 self.index.optimize()
471 }
472
473 pub fn as_index(&self) -> &SqliteIndex {
475 &self.index
476 }
477}
478
479#[cfg(test)]
480mod tests {
481 use super::*;
482 use crate::{Document, DocumentId};
483
484 fn make_doc(content: &str) -> Document {
485 Document::new(content)
486 }
487
488 fn make_chunk(doc_id: DocumentId, content: &str) -> Chunk {
489 Chunk {
490 id: ChunkId::new(),
491 document_id: doc_id,
492 content: content.to_string(),
493 start_offset: 0,
494 end_offset: content.len(),
495 metadata: crate::ChunkMetadata::default(),
496 embedding: None,
497 }
498 }
499
500 #[test]
503 fn test_index_roundtrip() {
504 let idx = SqliteIndex::open_in_memory().unwrap();
505 idx.insert_document(
506 "doc1",
507 Some("Test Doc"),
508 Some("/test.md"),
509 "full content here",
510 &[
511 ("c1".into(), "SIMD vector operations".into()),
512 ("c2".into(), "GPU kernel dispatch".into()),
513 ],
514 None,
515 )
516 .unwrap();
517
518 assert_eq!(idx.document_count().unwrap(), 1);
519 assert_eq!(idx.chunk_count().unwrap(), 2);
520
521 let content = idx.get_chunk("c1").unwrap();
522 assert_eq!(content.unwrap(), "SIMD vector operations");
523 }
524
525 #[test]
526 fn test_index_search() {
527 let idx = SqliteIndex::open_in_memory().unwrap();
528 idx.insert_document(
529 "doc1",
530 None,
531 None,
532 "",
533 &[
534 ("c1".into(), "machine learning algorithms for classification".into()),
535 ("c2".into(), "database indexing and query optimization".into()),
536 ],
537 None,
538 )
539 .unwrap();
540
541 let results = idx.search_fts("machine learning", 10).unwrap();
542 assert_eq!(results.len(), 1);
543 assert_eq!(results[0].chunk_id, "c1");
544 }
545
546 #[test]
547 fn test_index_fingerprint_reindex() {
548 let idx = SqliteIndex::open_in_memory().unwrap();
549 let hash1 = [1u8; 32];
550 let hash2 = [2u8; 32];
551
552 idx.insert_document(
554 "doc1",
555 None,
556 None,
557 "",
558 &[("c1".into(), "content".into())],
559 Some(("/test.md", &hash1)),
560 )
561 .unwrap();
562
563 assert!(!idx.needs_reindex("/test.md", &hash1).unwrap());
565
566 assert!(idx.needs_reindex("/test.md", &hash2).unwrap());
568
569 assert!(idx.needs_reindex("/unknown.md", &hash1).unwrap());
571 }
572
573 #[test]
574 fn test_index_remove_document() {
575 let idx = SqliteIndex::open_in_memory().unwrap();
576 idx.insert_document("doc1", None, None, "", &[("c1".into(), "some content".into())], None)
577 .unwrap();
578
579 assert_eq!(idx.document_count().unwrap(), 1);
580 idx.remove_document("doc1").unwrap();
581 assert_eq!(idx.document_count().unwrap(), 0);
582 assert_eq!(idx.chunk_count().unwrap(), 0);
583 }
584
585 #[test]
586 fn test_index_metadata() {
587 let idx = SqliteIndex::open_in_memory().unwrap();
588 idx.set_metadata("version", "1.0.0").unwrap();
589 assert_eq!(idx.get_metadata("version").unwrap(), Some("1.0.0".to_string()));
590 assert_eq!(idx.get_metadata("nonexistent").unwrap(), None);
591 }
592
593 #[test]
594 fn test_index_update_document() {
595 let idx = SqliteIndex::open_in_memory().unwrap();
596 idx.insert_document("doc1", None, None, "", &[("c1".into(), "old content".into())], None)
597 .unwrap();
598 idx.insert_document("doc1", None, None, "", &[("c2".into(), "new content".into())], None)
599 .unwrap();
600
601 assert_eq!(idx.chunk_count().unwrap(), 1);
603 assert!(idx.get_chunk("c1").unwrap().is_none());
604 assert_eq!(idx.get_chunk("c2").unwrap().unwrap(), "new content");
605 }
606
607 #[test]
610 fn test_sparse_index_add_and_len() {
611 let mut idx = SqliteIndex::open_in_memory().unwrap();
612 let doc_id = DocumentId::new();
613 let chunk = make_chunk(doc_id, "sparse index test content");
614 idx.add(&chunk);
615 assert_eq!(idx.len(), 1);
616 }
617
618 #[test]
619 fn test_sparse_index_add_batch() {
620 let mut idx = SqliteIndex::open_in_memory().unwrap();
621 let doc_id = DocumentId::new();
622 let chunks = vec![
623 make_chunk(doc_id, "first chunk content"),
624 make_chunk(doc_id, "second chunk content"),
625 ];
626 idx.add_batch(&chunks);
627 assert_eq!(idx.len(), 2);
628 }
629
630 #[test]
631 fn test_sparse_index_remove() {
632 let mut idx = SqliteIndex::open_in_memory().unwrap();
633 let doc_id = DocumentId::new();
634 let chunk = make_chunk(doc_id, "content to remove");
635 let chunk_id = chunk.id;
636 idx.add(&chunk);
637 assert_eq!(idx.len(), 1);
638 idx.remove(chunk_id);
639 assert_eq!(idx.len(), 0);
640 }
641
642 #[test]
645 fn test_store_index_and_search() {
646 let store = SqliteStore::open_in_memory().unwrap();
647 let doc = make_doc("SIMD vector operations for tensor computation");
648 let chunks = vec![make_chunk(doc.id, "SIMD vector operations for tensor computation")];
649 store.index_document(&doc, &chunks, None).unwrap();
650
651 let results = store.search("SIMD tensor", 10).unwrap();
652 assert!(!results.is_empty());
653 }
654
655 #[test]
656 fn test_store_stats() {
657 let store = SqliteStore::open_in_memory().unwrap();
658 let doc = make_doc("content");
659 let chunks = vec![make_chunk(doc.id, "chunk one"), make_chunk(doc.id, "chunk two")];
660 store.index_document(&doc, &chunks, Some(("/test.md", &[0u8; 32]))).unwrap();
661
662 let stats = store.stats().unwrap();
663 assert_eq!(stats.document_count, 1);
664 assert_eq!(stats.chunk_count, 2);
665 assert_eq!(stats.fingerprint_count, 1);
666 }
667
668 #[test]
669 fn test_store_needs_reindex() {
670 let store = SqliteStore::open_in_memory().unwrap();
671 let doc = make_doc("content");
672 let chunks = vec![make_chunk(doc.id, "chunk")];
673 let hash = [42u8; 32];
674 store.index_document(&doc, &chunks, Some(("/doc.md", &hash))).unwrap();
675
676 assert!(!store.needs_reindex("/doc.md", &hash).unwrap());
677 assert!(store.needs_reindex("/doc.md", &[0u8; 32]).unwrap());
678 assert!(store.needs_reindex("/other.md", &hash).unwrap());
679 }
680
681 #[test]
682 fn test_store_metadata() {
683 let store = SqliteStore::open_in_memory().unwrap();
684 store.set_metadata("batuta_version", "0.6.0").unwrap();
685 assert_eq!(store.get_metadata("batuta_version").unwrap(), Some("0.6.0".to_string()));
686 }
687
688 #[test]
689 fn test_store_optimize() {
690 let store = SqliteStore::open_in_memory().unwrap();
691 let doc = make_doc("content");
692 let chunks = vec![make_chunk(doc.id, "some chunk content")];
693 store.index_document(&doc, &chunks, None).unwrap();
694 store.optimize().unwrap(); }
696
697 #[test]
698 fn test_store_large_batch() {
699 let store = SqliteStore::open_in_memory().unwrap();
700
701 for i in 0..100 {
703 let doc = make_doc(&format!("Document {i} about machine learning"));
704 let chunks: Vec<Chunk> = (0..5)
705 .map(|j| {
706 make_chunk(
707 doc.id,
708 &format!("Chunk {j} of doc {i}: machine learning algorithms topic {j}"),
709 )
710 })
711 .collect();
712 store.index_document(&doc, &chunks, None).unwrap();
713 }
714
715 let stats = store.stats().unwrap();
716 assert_eq!(stats.document_count, 100);
717 assert_eq!(stats.chunk_count, 500);
718
719 let results = store.search("machine learning", 10).unwrap();
720 assert_eq!(results.len(), 10);
721 }
722
723 #[test]
724 fn test_search_deterministic() {
725 let store = SqliteStore::open_in_memory().unwrap();
726 let doc = make_doc("determinism test");
727 let chunks = vec![
728 make_chunk(doc.id, "alpha beta gamma delta"),
729 make_chunk(doc.id, "epsilon zeta alpha alpha"),
730 ];
731 store.index_document(&doc, &chunks, None).unwrap();
732
733 let baseline = store.search("alpha", 10).unwrap();
735 for _ in 0..10 {
736 let results = store.search("alpha", 10).unwrap();
737 assert_eq!(results.len(), baseline.len());
738 for (a, b) in baseline.iter().zip(results.iter()) {
739 assert_eq!(a.chunk_id, b.chunk_id);
740 assert!((a.score - b.score).abs() < f64::EPSILON);
741 }
742 }
743 }
744
745 #[test]
748 fn test_list_fingerprints_empty() {
749 let idx = SqliteIndex::open_in_memory().unwrap();
750 let fps = idx.list_fingerprints().unwrap();
751 assert!(fps.is_empty());
752 }
753
754 #[test]
755 fn test_list_fingerprints_populated() {
756 let idx = SqliteIndex::open_in_memory().unwrap();
757 let hash1 = [1u8; 32];
758 let hash2 = [2u8; 32];
759
760 idx.insert_document(
761 "doc1",
762 None,
763 Some("/a.md"),
764 "",
765 &[("c1".into(), "content a".into())],
766 Some(("/a.md", &hash1)),
767 )
768 .unwrap();
769 idx.insert_document(
770 "doc2",
771 None,
772 Some("/b.md"),
773 "",
774 &[("c2".into(), "content b".into())],
775 Some(("/b.md", &hash2)),
776 )
777 .unwrap();
778
779 let fps = idx.list_fingerprints().unwrap();
780 assert_eq!(fps.len(), 2);
781 let paths: Vec<&str> = fps.iter().map(|(p, _)| p.as_str()).collect();
782 assert!(paths.contains(&"/a.md"));
783 assert!(paths.contains(&"/b.md"));
784 }
785
786 #[test]
787 fn test_remove_by_source() {
788 let idx = SqliteIndex::open_in_memory().unwrap();
789 let hash = [1u8; 32];
790
791 idx.insert_document(
792 "doc1",
793 None,
794 Some("/a.md"),
795 "full content",
796 &[("c1".into(), "chunk 1".into()), ("c2".into(), "chunk 2".into())],
797 Some(("/a.md", &hash)),
798 )
799 .unwrap();
800 idx.insert_document(
801 "doc2",
802 None,
803 Some("/b.md"),
804 "other content",
805 &[("c3".into(), "chunk 3".into())],
806 Some(("/b.md", &hash)),
807 )
808 .unwrap();
809
810 assert_eq!(idx.document_count().unwrap(), 2);
811 assert_eq!(idx.chunk_count().unwrap(), 3);
812
813 let removed = idx.remove_by_source("/a.md").unwrap();
814 assert_eq!(removed, 1);
815 assert_eq!(idx.document_count().unwrap(), 1);
816 assert_eq!(idx.chunk_count().unwrap(), 1);
817
818 assert!(idx.needs_reindex("/a.md", &hash).unwrap());
820 assert!(!idx.needs_reindex("/b.md", &hash).unwrap());
822 }
823
824 #[test]
825 fn test_remove_by_source_nonexistent() {
826 let idx = SqliteIndex::open_in_memory().unwrap();
827 let removed = idx.remove_by_source("/nonexistent.md").unwrap();
828 assert_eq!(removed, 0);
829 }
830}