1pub mod fts;
15pub mod schema;
16
17use crate::index::SparseIndex;
18use crate::{Chunk, ChunkId, Document, Result};
19use rusqlite::Connection;
20use std::path::Path;
21use std::sync::Mutex;
22
23pub struct SqliteIndex {
34 conn: Mutex<Connection>,
35}
36
37impl std::fmt::Debug for SqliteIndex {
41 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 f.debug_struct("SqliteIndex").finish_non_exhaustive()
43 }
44}
45
46fn lock_err<T>(e: &std::sync::PoisonError<T>) -> crate::Error {
48 crate::Error::Query(format!("Mutex poisoned: {e}"))
49}
50
51impl SqliteIndex {
52 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
54 let conn = Connection::open(path.as_ref())
55 .map_err(|e| crate::Error::Query(format!("Failed to open SQLite database: {e}")))?;
56 schema::initialize(&conn)?;
57 Ok(Self { conn: Mutex::new(conn) })
58 }
59
60 pub fn open_in_memory() -> Result<Self> {
62 let conn = Connection::open_in_memory()
63 .map_err(|e| crate::Error::Query(format!("Failed to open in-memory database: {e}")))?;
64 schema::initialize(&conn)?;
65 Ok(Self { conn: Mutex::new(conn) })
66 }
67
68 pub fn document_count(&self) -> Result<usize> {
70 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
71 let count: i64 = conn
72 .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
73 .map_err(|e| crate::Error::Query(format!("Failed to count documents: {e}")))?;
74 Ok(count as usize)
75 }
76
77 pub fn chunk_count(&self) -> Result<usize> {
79 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
80 let count: i64 = conn
81 .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
82 .map_err(|e| crate::Error::Query(format!("Failed to count chunks: {e}")))?;
83 Ok(count as usize)
84 }
85
86 pub fn needs_reindex(&self, path: &str, hash: &[u8; 32]) -> Result<bool> {
88 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
89 let stored: Option<Vec<u8>> = conn
90 .query_row("SELECT blake3_hash FROM fingerprints WHERE doc_path = ?1", [path], |row| {
91 row.get(0)
92 })
93 .ok();
94
95 match stored {
96 Some(stored_hash) => Ok(stored_hash.as_slice() != hash),
97 None => Ok(true),
98 }
99 }
100
101 pub fn insert_document(
103 &self,
104 doc_id: &str,
105 title: Option<&str>,
106 source: Option<&str>,
107 content: &str,
108 chunks: &[(String, String)],
109 fingerprint: Option<(&str, &[u8; 32])>,
110 ) -> Result<()> {
111 let mut conn = self.conn.lock().map_err(|e| lock_err(&e))?;
112 let tx = conn
113 .transaction()
114 .map_err(|e| crate::Error::Query(format!("Failed to begin transaction: {e}")))?;
115
116 tx.execute("DELETE FROM chunks WHERE doc_id = ?1", [doc_id])
119 .map_err(|e| crate::Error::Query(format!("Failed to delete old chunks: {e}")))?;
120 tx.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
121 .map_err(|e| crate::Error::Query(format!("Failed to delete old document: {e}")))?;
122
123 tx.execute(
124 "INSERT INTO documents (id, title, source, content, chunk_count) VALUES (?1, ?2, ?3, ?4, ?5)",
125 rusqlite::params![doc_id, title, source, content, chunks.len() as i64],
126 )
127 .map_err(|e| crate::Error::Query(format!("Failed to insert document: {e}")))?;
128
129 {
130 let mut stmt = tx
131 .prepare_cached(
132 "INSERT OR REPLACE INTO chunks (id, doc_id, content, position) VALUES (?1, ?2, ?3, ?4)",
133 )
134 .map_err(|e| crate::Error::Query(format!("Failed to prepare chunk insert: {e}")))?;
135
136 for (i, (chunk_id, chunk_content)) in chunks.iter().enumerate() {
137 stmt.execute(rusqlite::params![chunk_id, doc_id, chunk_content, i as i64])
138 .map_err(|e| crate::Error::Query(format!("Failed to insert chunk: {e}")))?;
139 }
140 }
141
142 if let Some((path, hash)) = fingerprint {
143 tx.execute(
144 "INSERT OR REPLACE INTO fingerprints (doc_path, blake3_hash, chunk_count) VALUES (?1, ?2, ?3)",
145 rusqlite::params![path, hash.as_slice(), chunks.len() as i64],
146 )
147 .map_err(|e| crate::Error::Query(format!("Failed to update fingerprint: {e}")))?;
148 }
149
150 tx.commit()
151 .map_err(|e| crate::Error::Query(format!("Failed to commit transaction: {e}")))?;
152
153 Ok(())
154 }
155
156 pub fn remove_document(&self, doc_id: &str) -> Result<()> {
161 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
162 conn.execute("DELETE FROM chunks WHERE doc_id = ?1", [doc_id])
163 .map_err(|e| crate::Error::Query(format!("Failed to delete chunks: {e}")))?;
164 conn.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
165 .map_err(|e| crate::Error::Query(format!("Failed to remove document: {e}")))?;
166 Ok(())
167 }
168
169 pub fn list_fingerprints(&self) -> Result<Vec<(String, Vec<u8>)>> {
173 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
174 let mut stmt = conn
175 .prepare("SELECT doc_path, blake3_hash FROM fingerprints")
176 .map_err(|e| crate::Error::Query(format!("Failed to list fingerprints: {e}")))?;
177 let rows = stmt
178 .query_map([], |row| {
179 let path: String = row.get(0)?;
180 let hash: Vec<u8> = row.get(1)?;
181 Ok((path, hash))
182 })
183 .map_err(|e| crate::Error::Query(format!("Failed to query fingerprints: {e}")))?;
184 let mut results = Vec::new();
185 for row in rows {
186 results.push(
187 row.map_err(|e| crate::Error::Query(format!("Failed to read fingerprint: {e}")))?,
188 );
189 }
190 Ok(results)
191 }
192
193 pub fn remove_by_source(&self, source: &str) -> Result<usize> {
197 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
198 let mut stmt = conn
200 .prepare("SELECT id FROM documents WHERE source = ?1")
201 .map_err(|e| crate::Error::Query(format!("Failed to find docs by source: {e}")))?;
202 let ids: Vec<String> = stmt
203 .query_map([source], |row| row.get(0))
204 .map_err(|e| crate::Error::Query(format!("Failed to query docs: {e}")))?
205 .filter_map(|r| r.ok())
206 .collect();
207
208 for doc_id in &ids {
209 conn.execute("DELETE FROM chunks WHERE doc_id = ?1", [doc_id])
210 .map_err(|e| crate::Error::Query(format!("Failed to delete chunks: {e}")))?;
211 conn.execute("DELETE FROM documents WHERE id = ?1", [doc_id])
212 .map_err(|e| crate::Error::Query(format!("Failed to delete document: {e}")))?;
213 }
214
215 conn.execute("DELETE FROM fingerprints WHERE doc_path = ?1", [source])
217 .map_err(|e| crate::Error::Query(format!("Failed to delete fingerprint: {e}")))?;
218
219 Ok(ids.len())
220 }
221
222 pub fn search_fts(&self, query: &str, k: usize) -> Result<Vec<fts::FtsResult>> {
224 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
225 fts::search(&conn, query, k)
226 }
227
228 pub fn get_chunk(&self, chunk_id: &str) -> Result<Option<String>> {
230 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
231 let content: Option<String> = conn
232 .query_row("SELECT content FROM chunks WHERE id = ?1", [chunk_id], |row| row.get(0))
233 .ok();
234 Ok(content)
235 }
236
237 pub fn get_metadata(&self, key: &str) -> Result<Option<String>> {
239 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
240 let value: Option<String> = conn
241 .query_row("SELECT value FROM metadata WHERE key = ?1", [key], |row| row.get(0))
242 .ok();
243 Ok(value)
244 }
245
246 pub fn set_metadata(&self, key: &str, value: &str) -> Result<()> {
248 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
249 conn.execute("INSERT OR REPLACE INTO metadata (key, value) VALUES (?1, ?2)", [key, value])
250 .map_err(|e| crate::Error::Query(format!("Failed to set metadata: {e}")))?;
251 Ok(())
252 }
253
254 pub fn optimize(&self) -> Result<()> {
256 let conn = self.conn.lock().map_err(|e| lock_err(&e))?;
257 fts::optimize(&conn)?;
258 conn.execute_batch("VACUUM;")
259 .map_err(|e| crate::Error::Query(format!("VACUUM failed: {e}")))?;
260 Ok(())
261 }
262}
263
264impl SparseIndex for SqliteIndex {
265 fn add(&mut self, chunk: &Chunk) {
266 let doc_id = chunk.document_id.to_string();
267 let chunk_id = chunk.id.to_string();
268 if let Ok(conn) = self.conn.lock() {
269 let _ = conn.execute(
270 "INSERT OR IGNORE INTO documents (id, content) VALUES (?1, '')",
271 [&doc_id],
272 );
273 let _ = conn.execute(
274 "INSERT OR REPLACE INTO chunks (id, doc_id, content, position) VALUES (?1, ?2, ?3, 0)",
275 rusqlite::params![chunk_id, doc_id, chunk.content],
276 );
277 }
278 }
279
280 fn add_batch(&mut self, chunks: &[Chunk]) {
281 let Ok(mut conn) = self.conn.lock() else {
282 return;
283 };
284 let Ok(tx) = conn.transaction() else {
285 return;
286 };
287
288 let mut doc_positions: std::collections::HashMap<String, i64> =
290 std::collections::HashMap::new();
291
292 for chunk in chunks {
293 let doc_id = chunk.document_id.to_string();
294 let chunk_id = chunk.id.to_string();
295 let pos = doc_positions.entry(doc_id.clone()).or_insert(0);
296 let _ = tx.execute(
297 "INSERT OR IGNORE INTO documents (id, content) VALUES (?1, '')",
298 [&doc_id],
299 );
300 let _ = tx.execute(
301 "INSERT OR REPLACE INTO chunks (id, doc_id, content, position) VALUES (?1, ?2, ?3, ?4)",
302 rusqlite::params![chunk_id, doc_id, chunk.content, *pos],
303 );
304 *pos += 1;
305 }
306
307 let _ = tx.commit();
308 }
309
310 fn search(&self, query: &str, k: usize) -> Vec<(ChunkId, f32)> {
311 let Ok(conn) = self.conn.lock() else {
312 return Vec::new();
313 };
314 let Ok(results) = fts::search(&conn, query, k) else {
315 return Vec::new();
316 };
317
318 results
319 .into_iter()
320 .filter_map(|r| {
321 uuid::Uuid::parse_str(&r.chunk_id).ok().map(|uuid| (ChunkId(uuid), r.score as f32))
322 })
323 .collect()
324 }
325
326 fn remove(&mut self, chunk_id: ChunkId) {
327 let id_str = chunk_id.to_string();
328 if let Ok(conn) = self.conn.lock() {
329 let _ = conn.execute("DELETE FROM chunks WHERE id = ?1", [&id_str]);
330 }
331 }
332
333 fn len(&self) -> usize {
334 self.chunk_count().unwrap_or(0)
335 }
336}
337
338#[derive(Debug, Clone)]
342pub struct StoreStats {
343 pub document_count: usize,
345 pub chunk_count: usize,
347 pub fingerprint_count: usize,
349 pub db_size_bytes: u64,
351}
352
353pub struct SqliteStore {
358 index: SqliteIndex,
359 path: Option<std::path::PathBuf>,
360}
361
362impl std::fmt::Debug for SqliteStore {
363 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
364 f.debug_struct("SqliteStore").field("path", &self.path).finish_non_exhaustive()
365 }
366}
367
368impl SqliteStore {
369 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
371 let path = path.as_ref().to_path_buf();
372 let index = SqliteIndex::open(&path)?;
373 Ok(Self { index, path: Some(path) })
374 }
375
376 pub fn open_in_memory() -> Result<Self> {
378 let index = SqliteIndex::open_in_memory()?;
379 Ok(Self { index, path: None })
380 }
381
382 pub fn index_document(
384 &self,
385 doc: &Document,
386 chunks: &[Chunk],
387 fingerprint: Option<(&str, &[u8; 32])>,
388 ) -> Result<()> {
389 let doc_id = doc.id.to_string();
390 let chunk_pairs: Vec<(String, String)> =
391 chunks.iter().map(|c| (c.id.to_string(), c.content.clone())).collect();
392
393 self.index.insert_document(
394 &doc_id,
395 doc.title.as_deref(),
396 doc.source.as_deref(),
397 &doc.content,
398 &chunk_pairs,
399 fingerprint,
400 )
401 }
402
403 pub fn search(&self, query: &str, k: usize) -> Result<Vec<fts::FtsResult>> {
407 self.index.search_fts(query, k)
408 }
409
410 pub fn needs_reindex(&self, path: &str, hash: &[u8; 32]) -> Result<bool> {
412 self.index.needs_reindex(path, hash)
413 }
414
415 pub fn list_fingerprints(&self) -> Result<Vec<(String, Vec<u8>)>> {
417 self.index.list_fingerprints()
418 }
419
420 pub fn remove_by_source(&self, source: &str) -> Result<usize> {
422 self.index.remove_by_source(source)
423 }
424
425 pub fn stats(&self) -> Result<StoreStats> {
427 let db_size_bytes = self
428 .path
429 .as_ref()
430 .and_then(|p| std::fs::metadata(p).ok())
431 .map(|m| m.len())
432 .unwrap_or(0);
433
434 Ok(StoreStats {
435 document_count: self.index.document_count()?,
436 chunk_count: self.index.chunk_count()?,
437 fingerprint_count: self.fingerprint_count()?,
438 db_size_bytes,
439 })
440 }
441
442 fn fingerprint_count(&self) -> Result<usize> {
444 let conn = self.index.conn.lock().map_err(|e| lock_err(&e))?;
445 let count: i64 = conn
446 .query_row("SELECT COUNT(*) FROM fingerprints", [], |r| r.get(0))
447 .map_err(|e| crate::Error::Query(format!("Failed to count fingerprints: {e}")))?;
448 Ok(count as usize)
449 }
450
451 pub fn get_metadata(&self, key: &str) -> Result<Option<String>> {
453 self.index.get_metadata(key)
454 }
455
456 pub fn set_metadata(&self, key: &str, value: &str) -> Result<()> {
458 self.index.set_metadata(key, value)
459 }
460
461 pub fn optimize(&self) -> Result<()> {
463 self.index.optimize()
464 }
465
466 pub fn as_index(&self) -> &SqliteIndex {
468 &self.index
469 }
470}
471
472#[cfg(test)]
473mod tests {
474 use super::*;
475 use crate::{Document, DocumentId};
476
477 fn make_doc(content: &str) -> Document {
478 Document::new(content)
479 }
480
481 fn make_chunk(doc_id: DocumentId, content: &str) -> Chunk {
482 Chunk {
483 id: ChunkId::new(),
484 document_id: doc_id,
485 content: content.to_string(),
486 start_offset: 0,
487 end_offset: content.len(),
488 metadata: crate::ChunkMetadata::default(),
489 embedding: None,
490 }
491 }
492
493 #[test]
496 fn test_index_roundtrip() {
497 let idx = SqliteIndex::open_in_memory().unwrap();
498 idx.insert_document(
499 "doc1",
500 Some("Test Doc"),
501 Some("/test.md"),
502 "full content here",
503 &[
504 ("c1".into(), "SIMD vector operations".into()),
505 ("c2".into(), "GPU kernel dispatch".into()),
506 ],
507 None,
508 )
509 .unwrap();
510
511 assert_eq!(idx.document_count().unwrap(), 1);
512 assert_eq!(idx.chunk_count().unwrap(), 2);
513
514 let content = idx.get_chunk("c1").unwrap();
515 assert_eq!(content.unwrap(), "SIMD vector operations");
516 }
517
518 #[test]
519 fn test_index_search() {
520 let idx = SqliteIndex::open_in_memory().unwrap();
521 idx.insert_document(
522 "doc1",
523 None,
524 None,
525 "",
526 &[
527 ("c1".into(), "machine learning algorithms for classification".into()),
528 ("c2".into(), "database indexing and query optimization".into()),
529 ],
530 None,
531 )
532 .unwrap();
533
534 let results = idx.search_fts("machine learning", 10).unwrap();
535 assert_eq!(results.len(), 1);
536 assert_eq!(results[0].chunk_id, "c1");
537 }
538
539 #[test]
540 fn test_index_fingerprint_reindex() {
541 let idx = SqliteIndex::open_in_memory().unwrap();
542 let hash1 = [1u8; 32];
543 let hash2 = [2u8; 32];
544
545 idx.insert_document(
547 "doc1",
548 None,
549 None,
550 "",
551 &[("c1".into(), "content".into())],
552 Some(("/test.md", &hash1)),
553 )
554 .unwrap();
555
556 assert!(!idx.needs_reindex("/test.md", &hash1).unwrap());
558
559 assert!(idx.needs_reindex("/test.md", &hash2).unwrap());
561
562 assert!(idx.needs_reindex("/unknown.md", &hash1).unwrap());
564 }
565
566 #[test]
567 fn test_index_remove_document() {
568 let idx = SqliteIndex::open_in_memory().unwrap();
569 idx.insert_document("doc1", None, None, "", &[("c1".into(), "some content".into())], None)
570 .unwrap();
571
572 assert_eq!(idx.document_count().unwrap(), 1);
573 idx.remove_document("doc1").unwrap();
574 assert_eq!(idx.document_count().unwrap(), 0);
575 assert_eq!(idx.chunk_count().unwrap(), 0);
576 }
577
578 #[test]
579 fn test_index_metadata() {
580 let idx = SqliteIndex::open_in_memory().unwrap();
581 idx.set_metadata("version", "1.0.0").unwrap();
582 assert_eq!(idx.get_metadata("version").unwrap(), Some("1.0.0".to_string()));
583 assert_eq!(idx.get_metadata("nonexistent").unwrap(), None);
584 }
585
586 #[test]
587 fn test_index_update_document() {
588 let idx = SqliteIndex::open_in_memory().unwrap();
589 idx.insert_document("doc1", None, None, "", &[("c1".into(), "old content".into())], None)
590 .unwrap();
591 idx.insert_document("doc1", None, None, "", &[("c2".into(), "new content".into())], None)
592 .unwrap();
593
594 assert_eq!(idx.chunk_count().unwrap(), 1);
596 assert!(idx.get_chunk("c1").unwrap().is_none());
597 assert_eq!(idx.get_chunk("c2").unwrap().unwrap(), "new content");
598 }
599
600 #[test]
603 fn test_sparse_index_add_and_len() {
604 let mut idx = SqliteIndex::open_in_memory().unwrap();
605 let doc_id = DocumentId::new();
606 let chunk = make_chunk(doc_id, "sparse index test content");
607 idx.add(&chunk);
608 assert_eq!(idx.len(), 1);
609 }
610
611 #[test]
612 fn test_sparse_index_add_batch() {
613 let mut idx = SqliteIndex::open_in_memory().unwrap();
614 let doc_id = DocumentId::new();
615 let chunks = vec![
616 make_chunk(doc_id, "first chunk content"),
617 make_chunk(doc_id, "second chunk content"),
618 ];
619 idx.add_batch(&chunks);
620 assert_eq!(idx.len(), 2);
621 }
622
623 #[test]
624 fn test_sparse_index_remove() {
625 let mut idx = SqliteIndex::open_in_memory().unwrap();
626 let doc_id = DocumentId::new();
627 let chunk = make_chunk(doc_id, "content to remove");
628 let chunk_id = chunk.id;
629 idx.add(&chunk);
630 assert_eq!(idx.len(), 1);
631 idx.remove(chunk_id);
632 assert_eq!(idx.len(), 0);
633 }
634
635 #[test]
638 fn test_store_index_and_search() {
639 let store = SqliteStore::open_in_memory().unwrap();
640 let doc = make_doc("SIMD vector operations for tensor computation");
641 let chunks = vec![make_chunk(doc.id, "SIMD vector operations for tensor computation")];
642 store.index_document(&doc, &chunks, None).unwrap();
643
644 let results = store.search("SIMD tensor", 10).unwrap();
645 assert!(!results.is_empty());
646 }
647
648 #[test]
649 fn test_store_stats() {
650 let store = SqliteStore::open_in_memory().unwrap();
651 let doc = make_doc("content");
652 let chunks = vec![make_chunk(doc.id, "chunk one"), make_chunk(doc.id, "chunk two")];
653 store.index_document(&doc, &chunks, Some(("/test.md", &[0u8; 32]))).unwrap();
654
655 let stats = store.stats().unwrap();
656 assert_eq!(stats.document_count, 1);
657 assert_eq!(stats.chunk_count, 2);
658 assert_eq!(stats.fingerprint_count, 1);
659 }
660
661 #[test]
662 fn test_store_needs_reindex() {
663 let store = SqliteStore::open_in_memory().unwrap();
664 let doc = make_doc("content");
665 let chunks = vec![make_chunk(doc.id, "chunk")];
666 let hash = [42u8; 32];
667 store.index_document(&doc, &chunks, Some(("/doc.md", &hash))).unwrap();
668
669 assert!(!store.needs_reindex("/doc.md", &hash).unwrap());
670 assert!(store.needs_reindex("/doc.md", &[0u8; 32]).unwrap());
671 assert!(store.needs_reindex("/other.md", &hash).unwrap());
672 }
673
674 #[test]
675 fn test_store_metadata() {
676 let store = SqliteStore::open_in_memory().unwrap();
677 store.set_metadata("batuta_version", "0.6.0").unwrap();
678 assert_eq!(store.get_metadata("batuta_version").unwrap(), Some("0.6.0".to_string()));
679 }
680
681 #[test]
682 fn test_store_optimize() {
683 let store = SqliteStore::open_in_memory().unwrap();
684 let doc = make_doc("content");
685 let chunks = vec![make_chunk(doc.id, "some chunk content")];
686 store.index_document(&doc, &chunks, None).unwrap();
687 store.optimize().unwrap(); }
689
690 #[test]
691 fn test_store_large_batch() {
692 let store = SqliteStore::open_in_memory().unwrap();
693
694 for i in 0..100 {
696 let doc = make_doc(&format!("Document {i} about machine learning"));
697 let chunks: Vec<Chunk> = (0..5)
698 .map(|j| {
699 make_chunk(
700 doc.id,
701 &format!("Chunk {j} of doc {i}: machine learning algorithms topic {j}"),
702 )
703 })
704 .collect();
705 store.index_document(&doc, &chunks, None).unwrap();
706 }
707
708 let stats = store.stats().unwrap();
709 assert_eq!(stats.document_count, 100);
710 assert_eq!(stats.chunk_count, 500);
711
712 let results = store.search("machine learning", 10).unwrap();
713 assert_eq!(results.len(), 10);
714 }
715
716 #[test]
717 fn test_search_deterministic() {
718 let store = SqliteStore::open_in_memory().unwrap();
719 let doc = make_doc("determinism test");
720 let chunks = vec![
721 make_chunk(doc.id, "alpha beta gamma delta"),
722 make_chunk(doc.id, "epsilon zeta alpha alpha"),
723 ];
724 store.index_document(&doc, &chunks, None).unwrap();
725
726 let baseline = store.search("alpha", 10).unwrap();
728 for _ in 0..10 {
729 let results = store.search("alpha", 10).unwrap();
730 assert_eq!(results.len(), baseline.len());
731 for (a, b) in baseline.iter().zip(results.iter()) {
732 assert_eq!(a.chunk_id, b.chunk_id);
733 assert!((a.score - b.score).abs() < f64::EPSILON);
734 }
735 }
736 }
737
738 #[test]
741 fn test_list_fingerprints_empty() {
742 let idx = SqliteIndex::open_in_memory().unwrap();
743 let fps = idx.list_fingerprints().unwrap();
744 assert!(fps.is_empty());
745 }
746
747 #[test]
748 fn test_list_fingerprints_populated() {
749 let idx = SqliteIndex::open_in_memory().unwrap();
750 let hash1 = [1u8; 32];
751 let hash2 = [2u8; 32];
752
753 idx.insert_document(
754 "doc1",
755 None,
756 Some("/a.md"),
757 "",
758 &[("c1".into(), "content a".into())],
759 Some(("/a.md", &hash1)),
760 )
761 .unwrap();
762 idx.insert_document(
763 "doc2",
764 None,
765 Some("/b.md"),
766 "",
767 &[("c2".into(), "content b".into())],
768 Some(("/b.md", &hash2)),
769 )
770 .unwrap();
771
772 let fps = idx.list_fingerprints().unwrap();
773 assert_eq!(fps.len(), 2);
774 let paths: Vec<&str> = fps.iter().map(|(p, _)| p.as_str()).collect();
775 assert!(paths.contains(&"/a.md"));
776 assert!(paths.contains(&"/b.md"));
777 }
778
779 #[test]
780 fn test_remove_by_source() {
781 let idx = SqliteIndex::open_in_memory().unwrap();
782 let hash = [1u8; 32];
783
784 idx.insert_document(
785 "doc1",
786 None,
787 Some("/a.md"),
788 "full content",
789 &[("c1".into(), "chunk 1".into()), ("c2".into(), "chunk 2".into())],
790 Some(("/a.md", &hash)),
791 )
792 .unwrap();
793 idx.insert_document(
794 "doc2",
795 None,
796 Some("/b.md"),
797 "other content",
798 &[("c3".into(), "chunk 3".into())],
799 Some(("/b.md", &hash)),
800 )
801 .unwrap();
802
803 assert_eq!(idx.document_count().unwrap(), 2);
804 assert_eq!(idx.chunk_count().unwrap(), 3);
805
806 let removed = idx.remove_by_source("/a.md").unwrap();
807 assert_eq!(removed, 1);
808 assert_eq!(idx.document_count().unwrap(), 1);
809 assert_eq!(idx.chunk_count().unwrap(), 1);
810
811 assert!(idx.needs_reindex("/a.md", &hash).unwrap());
813 assert!(!idx.needs_reindex("/b.md", &hash).unwrap());
815 }
816
817 #[test]
818 fn test_remove_by_source_nonexistent() {
819 let idx = SqliteIndex::open_in_memory().unwrap();
820 let removed = idx.remove_by_source("/nonexistent.md").unwrap();
821 assert_eq!(removed, 0);
822 }
823}