1pub mod hnsw;
13mod rrf;
14
15pub use hnsw::{HnswConfig, HnswIndex, HnswResult};
16pub use rrf::{RrfConfig, reciprocal_rank_fusion, weighted_rrf};
17
18use crate::embedding::{Embedder, cosine_similarity};
19use crate::error::Result;
20use crate::storage::{SqliteStorage, Storage};
21
22pub const DEFAULT_SIMILARITY_THRESHOLD: f32 = 0.3;
24
25pub const DEFAULT_TOP_K: usize = 10;
27
28#[derive(Debug, Clone)]
30pub struct SearchResult {
31 pub chunk_id: i64,
33 pub buffer_id: i64,
35 pub index: usize,
37 pub score: f64,
39 pub semantic_score: Option<f32>,
41 pub bm25_score: Option<f64>,
43 pub content_preview: Option<String>,
45}
46
47#[derive(Debug, Clone)]
49pub struct SearchConfig {
50 pub top_k: usize,
52 pub similarity_threshold: f32,
54 pub rrf_k: u32,
56 pub use_semantic: bool,
58 pub use_bm25: bool,
60}
61
62impl Default for SearchConfig {
63 fn default() -> Self {
64 Self {
65 top_k: DEFAULT_TOP_K,
66 similarity_threshold: DEFAULT_SIMILARITY_THRESHOLD,
67 rrf_k: 60,
68 use_semantic: true,
69 use_bm25: true,
70 }
71 }
72}
73
74pub const DEFAULT_PREVIEW_LEN: usize = 150;
76
77impl SearchResult {
78 fn from_chunk_id(
82 storage: &SqliteStorage,
83 chunk_id: i64,
84 score: f64,
85 semantic_score: Option<f32>,
86 bm25_score: Option<f64>,
87 ) -> Option<Self> {
88 storage
89 .get_chunk(chunk_id)
90 .ok()
91 .flatten()
92 .map(|chunk| Self {
93 chunk_id,
94 buffer_id: chunk.buffer_id,
95 index: chunk.index,
96 score,
97 semantic_score,
98 bm25_score,
99 content_preview: None,
100 })
101 }
102}
103
104pub fn populate_previews(
116 storage: &SqliteStorage,
117 results: &mut [SearchResult],
118 preview_len: usize,
119) -> Result<()> {
120 if results.is_empty() {
121 return Ok(());
122 }
123
124 let ids: Vec<i64> = results.iter().map(|r| r.chunk_id).collect();
126 let chunk_map = storage.get_chunks_by_ids(&ids)?;
127
128 for result in results.iter_mut() {
129 if let Some(chunk) = chunk_map.get(&result.chunk_id) {
130 let content = &chunk.content;
131 let preview = if content.len() <= preview_len {
132 content.clone()
133 } else {
134 let end = crate::io::find_char_boundary(content, preview_len);
136 let mut preview = content[..end].to_string();
137 if end < content.len() {
138 preview.push_str("...");
139 }
140 preview
141 };
142 result.content_preview = Some(preview);
143 }
144 }
145 Ok(())
146}
147
148impl SearchConfig {
149 #[must_use]
151 pub fn new() -> Self {
152 Self::default()
153 }
154
155 #[must_use]
157 pub const fn with_top_k(mut self, top_k: usize) -> Self {
158 self.top_k = top_k;
159 self
160 }
161
162 #[must_use]
164 pub const fn with_threshold(mut self, threshold: f32) -> Self {
165 self.similarity_threshold = threshold;
166 self
167 }
168
169 #[must_use]
171 pub const fn with_rrf_k(mut self, k: u32) -> Self {
172 self.rrf_k = k;
173 self
174 }
175
176 #[must_use]
178 pub const fn with_semantic(mut self, enabled: bool) -> Self {
179 self.use_semantic = enabled;
180 self
181 }
182
183 #[must_use]
185 pub const fn with_bm25(mut self, enabled: bool) -> Self {
186 self.use_bm25 = enabled;
187 self
188 }
189}
190
191pub fn hybrid_search(
204 storage: &SqliteStorage,
205 embedder: &dyn Embedder,
206 query: &str,
207 config: &SearchConfig,
208) -> Result<Vec<SearchResult>> {
209 let mut semantic_results: Vec<(i64, f32)> = Vec::new();
210 let mut bm25_results: Vec<(i64, f64)> = Vec::new();
211
212 if config.use_semantic {
214 semantic_results = semantic_search(storage, embedder, query, config)?;
215 }
216
217 if config.use_bm25 {
219 bm25_results = storage.search_fts(query, config.top_k * 2)?;
220 }
221
222 if !config.use_semantic {
224 return Ok(bm25_results
225 .into_iter()
226 .take(config.top_k)
227 .filter_map(|(chunk_id, score)| {
228 SearchResult::from_chunk_id(storage, chunk_id, score, None, Some(score))
229 })
230 .collect());
231 }
232
233 if !config.use_bm25 {
234 return Ok(semantic_results
235 .into_iter()
236 .take(config.top_k)
237 .filter_map(|(chunk_id, score)| {
238 SearchResult::from_chunk_id(storage, chunk_id, f64::from(score), Some(score), None)
239 })
240 .collect());
241 }
242
243 let rrf_config = RrfConfig::new(config.rrf_k);
245
246 let semantic_ranked: Vec<i64> = semantic_results.iter().map(|(id, _)| *id).collect();
248 let bm25_ranked: Vec<i64> = bm25_results.iter().map(|(id, _)| *id).collect();
249
250 let fused = reciprocal_rank_fusion(&[&semantic_ranked, &bm25_ranked], &rrf_config);
251
252 let semantic_map: std::collections::HashMap<i64, f32> = semantic_results.into_iter().collect();
254 let bm25_map: std::collections::HashMap<i64, f64> = bm25_results.into_iter().collect();
255
256 let results: Vec<SearchResult> = fused
257 .into_iter()
258 .take(config.top_k)
259 .filter_map(|(chunk_id, rrf_score)| {
260 SearchResult::from_chunk_id(
261 storage,
262 chunk_id,
263 rrf_score,
264 semantic_map.get(&chunk_id).copied(),
265 bm25_map.get(&chunk_id).copied(),
266 )
267 })
268 .collect();
269
270 Ok(results)
271}
272
273fn semantic_search(
277 storage: &SqliteStorage,
278 embedder: &dyn Embedder,
279 query: &str,
280 config: &SearchConfig,
281) -> Result<Vec<(i64, f32)>> {
282 use rayon::prelude::*;
283
284 let query_embedding = embedder.embed(query)?;
286
287 let all_embeddings = storage.get_all_embeddings()?;
289
290 if all_embeddings.is_empty() {
291 return Ok(Vec::new());
292 }
293
294 let mut similarities: Vec<(i64, f32)> = all_embeddings
296 .par_iter()
297 .map(|(chunk_id, embedding)| {
298 let sim = cosine_similarity(&query_embedding, embedding);
299 (*chunk_id, sim)
300 })
301 .filter(|(_, sim)| *sim >= config.similarity_threshold)
302 .collect();
303
304 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
306
307 similarities.truncate(config.top_k * 2);
309
310 Ok(similarities)
311}
312
313pub fn search_semantic(
327 storage: &SqliteStorage,
328 embedder: &dyn Embedder,
329 query: &str,
330 top_k: usize,
331 threshold: f32,
332) -> Result<Vec<SearchResult>> {
333 let config = SearchConfig::new()
334 .with_top_k(top_k)
335 .with_threshold(threshold)
336 .with_semantic(true)
337 .with_bm25(false);
338
339 hybrid_search(storage, embedder, query, &config)
340}
341
342pub fn search_bm25(
354 storage: &SqliteStorage,
355 query: &str,
356 top_k: usize,
357) -> Result<Vec<SearchResult>> {
358 let results = storage.search_fts(query, top_k)?;
359
360 Ok(results
361 .into_iter()
362 .filter_map(|(chunk_id, score)| {
363 SearchResult::from_chunk_id(storage, chunk_id, score, None, Some(score))
364 })
365 .collect())
366}
367
368pub fn embed_buffer_chunks(
384 storage: &mut SqliteStorage,
385 embedder: &dyn Embedder,
386 buffer_id: i64,
387) -> Result<usize> {
388 let chunks = storage.get_chunks(buffer_id)?;
389
390 if chunks.is_empty() {
391 return Ok(0);
392 }
393
394 let texts: Vec<&str> = chunks.iter().map(|c| c.content.as_str()).collect();
396
397 let embeddings = embedder.embed_batch(&texts)?;
399
400 let batch: Vec<(i64, Vec<f32>)> = chunks
402 .iter()
403 .zip(embeddings)
404 .filter_map(|(chunk, embedding)| chunk.id.map(|id| (id, embedding)))
405 .collect();
406
407 let count = batch.len();
408
409 storage.store_embeddings_batch(&batch, Some(embedder.model_name()))?;
411
412 Ok(count)
413}
414
415pub fn buffer_fully_embedded(storage: &SqliteStorage, buffer_id: i64) -> Result<bool> {
424 storage.all_chunks_have_embeddings(buffer_id)
425}
426
427pub fn check_model_mismatch(
435 storage: &SqliteStorage,
436 buffer_id: i64,
437 current_model: &str,
438) -> Result<Option<String>> {
439 let models = storage.get_embedding_models(buffer_id)?;
440
441 if models.is_empty() {
443 return Ok(None);
444 }
445
446 for model in models {
448 if model != current_model {
449 return Ok(Some(model));
450 }
451 }
452
453 Ok(None)
454}
455
456#[derive(Debug, Clone)]
458pub struct EmbeddingModelInfo {
459 pub models: Vec<(Option<String>, i64)>,
461 pub total_embeddings: i64,
463 pub has_mixed_models: bool,
465}
466
467pub fn get_embedding_model_info(
473 storage: &SqliteStorage,
474 buffer_id: i64,
475) -> Result<EmbeddingModelInfo> {
476 let models = storage.get_embedding_model_counts(buffer_id)?;
477 let total_embeddings: i64 = models.iter().map(|(_, count)| count).sum();
478 let distinct_models: std::collections::HashSet<_> =
479 models.iter().map(|(name, _)| name.as_deref()).collect();
480 let has_mixed_models = distinct_models.len() > 1;
481
482 Ok(EmbeddingModelInfo {
483 models,
484 total_embeddings,
485 has_mixed_models,
486 })
487}
488
489#[derive(Debug, Clone)]
491pub struct IncrementalEmbedResult {
492 pub embedded_count: usize,
494 pub skipped_count: usize,
496 pub replaced_count: usize,
498 pub total_chunks: usize,
500 pub model_name: String,
502}
503
504impl IncrementalEmbedResult {
505 #[must_use]
507 pub const fn had_changes(&self) -> bool {
508 self.embedded_count > 0 || self.replaced_count > 0
509 }
510
511 #[must_use]
513 #[allow(clippy::cast_precision_loss)] pub fn completion_percentage(&self) -> f64 {
515 if self.total_chunks == 0 {
516 100.0
517 } else {
518 let completed = self.embedded_count + self.skipped_count + self.replaced_count;
519 (completed as f64 / self.total_chunks as f64) * 100.0
520 }
521 }
522}
523
524pub fn embed_buffer_chunks_incremental(
548 storage: &mut SqliteStorage,
549 embedder: &dyn Embedder,
550 buffer_id: i64,
551 force_reembed: bool,
552) -> Result<IncrementalEmbedResult> {
553 let current_model = embedder.model_name();
554 let stats = storage.get_embedding_stats(buffer_id)?;
555 let total_chunks = stats.total_chunks;
556
557 let model_to_check = if force_reembed {
559 Some(current_model)
560 } else {
561 None
562 };
563
564 let chunk_ids_to_embed = storage.get_chunks_needing_embedding(buffer_id, model_to_check)?;
565
566 if chunk_ids_to_embed.is_empty() {
567 return Ok(IncrementalEmbedResult {
568 embedded_count: 0,
569 skipped_count: total_chunks,
570 replaced_count: 0,
571 total_chunks,
572 model_name: current_model.to_string(),
573 });
574 }
575
576 let all_chunks = storage.get_chunks(buffer_id)?;
578 let chunks_to_embed: Vec<_> = all_chunks
579 .iter()
580 .filter(|c| c.id.is_some_and(|id| chunk_ids_to_embed.contains(&id)))
581 .collect();
582
583 let mut replaced_count = 0;
585 for chunk in &chunks_to_embed {
586 if let Some(id) = chunk.id
587 && storage.has_embedding(id)?
588 {
589 replaced_count += 1;
590 }
591 }
592
593 let texts: Vec<&str> = chunks_to_embed.iter().map(|c| c.content.as_str()).collect();
595 let embeddings = embedder.embed_batch(&texts)?;
596
597 let batch: Vec<(i64, Vec<f32>)> = chunks_to_embed
599 .iter()
600 .zip(embeddings)
601 .filter_map(|(chunk, embedding)| chunk.id.map(|id| (id, embedding)))
602 .collect();
603
604 let embedded_count = batch.len();
605 storage.store_embeddings_batch(&batch, Some(current_model))?;
606
607 let new_embeddings = embedded_count - replaced_count;
608 let skipped_count = total_chunks - embedded_count;
609
610 Ok(IncrementalEmbedResult {
611 embedded_count: new_embeddings,
612 skipped_count,
613 replaced_count,
614 total_chunks,
615 model_name: current_model.to_string(),
616 })
617}
618
619#[cfg(test)]
620mod tests {
621 use super::*;
622 use crate::core::{Buffer, Chunk};
623 use crate::embedding::{DEFAULT_DIMENSIONS, FallbackEmbedder};
624 use crate::storage::Storage;
625
626 fn setup_storage() -> SqliteStorage {
627 let mut storage = SqliteStorage::in_memory().unwrap();
628 storage.init().unwrap();
629 storage
630 }
631
632 fn setup_storage_with_chunks() -> SqliteStorage {
633 let mut storage = setup_storage();
634
635 let buffer = Buffer::from_named(
637 "test.txt".to_string(),
638 "Test content for searching".to_string(),
639 );
640 let buffer_id = storage.add_buffer(&buffer).unwrap();
641
642 let chunks = vec![
644 Chunk::new(
645 buffer_id,
646 "The quick brown fox jumps over the lazy dog".to_string(),
647 0..44,
648 0,
649 ),
650 Chunk::new(
651 buffer_id,
652 "Machine learning is a subset of artificial intelligence".to_string(),
653 44..100,
654 1,
655 ),
656 Chunk::new(
657 buffer_id,
658 "Rust is a systems programming language".to_string(),
659 100..139,
660 2,
661 ),
662 ];
663
664 storage.add_chunks(buffer_id, &chunks).unwrap();
665
666 storage
667 }
668
669 #[test]
670 fn test_search_config_default() {
671 let config = SearchConfig::default();
672 assert_eq!(config.top_k, DEFAULT_TOP_K);
673 assert!((config.similarity_threshold - DEFAULT_SIMILARITY_THRESHOLD).abs() < f32::EPSILON);
674 assert_eq!(config.rrf_k, 60);
675 assert!(config.use_semantic);
676 assert!(config.use_bm25);
677 }
678
679 #[test]
680 fn test_search_config_builder() {
681 let config = SearchConfig::new()
682 .with_top_k(20)
683 .with_threshold(0.5)
684 .with_rrf_k(30)
685 .with_semantic(false)
686 .with_bm25(true);
687
688 assert_eq!(config.top_k, 20);
689 assert!((config.similarity_threshold - 0.5).abs() < f32::EPSILON);
690 assert_eq!(config.rrf_k, 30);
691 assert!(!config.use_semantic);
692 assert!(config.use_bm25);
693 }
694
695 #[test]
696 fn test_search_bm25() {
697 let storage = setup_storage_with_chunks();
698
699 let results = search_bm25(&storage, "fox", 10).unwrap();
701 assert!(!results.is_empty());
702 assert!(results[0].bm25_score.is_some());
703 assert!(results[0].semantic_score.is_none());
704 }
705
706 #[test]
707 fn test_search_bm25_no_results() {
708 let storage = setup_storage_with_chunks();
709
710 let results = search_bm25(&storage, "xyz123nonexistent", 10).unwrap();
712 assert!(results.is_empty());
713 }
714
715 #[test]
716 fn test_embed_buffer_chunks() {
717 let mut storage = setup_storage_with_chunks();
718 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
719
720 let count = embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
722 assert_eq!(count, 3); }
724
725 #[test]
726 fn test_embed_buffer_chunks_empty() {
727 let mut storage = setup_storage();
728 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
729
730 let buffer = Buffer::from_named("empty.txt".to_string(), String::new());
732 let buffer_id = storage.add_buffer(&buffer).unwrap();
733
734 let count = embed_buffer_chunks(&mut storage, &embedder, buffer_id).unwrap();
735 assert_eq!(count, 0);
736 }
737
738 #[test]
739 fn test_buffer_fully_embedded_empty() {
740 let mut storage = setup_storage();
741
742 let buffer = Buffer::from_named("empty.txt".to_string(), String::new());
744 let buffer_id = storage.add_buffer(&buffer).unwrap();
745
746 let result = buffer_fully_embedded(&storage, buffer_id).unwrap();
748 assert!(result);
749 }
750
751 #[test]
752 fn test_buffer_fully_embedded_with_embeddings() {
753 let mut storage = setup_storage_with_chunks();
754 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
755
756 let result = buffer_fully_embedded(&storage, 1).unwrap();
758 assert!(!result);
759
760 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
762
763 let result = buffer_fully_embedded(&storage, 1).unwrap();
765 assert!(result);
766 }
767
768 #[test]
769 fn test_hybrid_search_bm25_only() {
770 let storage = setup_storage_with_chunks();
771 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
772
773 let config = SearchConfig::new().with_semantic(false).with_bm25(true);
774
775 let results = hybrid_search(&storage, &embedder, "programming", &config).unwrap();
776 assert!(!results.is_empty());
778 assert!(results[0].bm25_score.is_some());
779 assert!(results[0].semantic_score.is_none());
780 }
781
782 #[test]
783 fn test_hybrid_search_semantic_only() {
784 let mut storage = setup_storage_with_chunks();
785 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
786
787 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
789
790 let config = SearchConfig::new()
791 .with_semantic(true)
792 .with_bm25(false)
793 .with_threshold(0.0); let results = hybrid_search(&storage, &embedder, "programming language", &config).unwrap();
796 assert!(!results.is_empty());
797 assert!(results[0].semantic_score.is_some());
798 assert!(results[0].bm25_score.is_none());
799 }
800
801 #[test]
802 fn test_hybrid_search_both() {
803 let mut storage = setup_storage_with_chunks();
804 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
805
806 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
808
809 let config = SearchConfig::new()
810 .with_semantic(true)
811 .with_bm25(true)
812 .with_threshold(0.0); let results = hybrid_search(&storage, &embedder, "programming", &config).unwrap();
815 assert!(!results.is_empty());
816 }
817
818 #[test]
819 fn test_search_semantic() {
820 let mut storage = setup_storage_with_chunks();
821 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
822
823 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
825
826 let results = search_semantic(&storage, &embedder, "test query", 10, 0.0).unwrap();
827 for result in &results {
829 assert!(result.semantic_score.is_some());
830 assert!(result.bm25_score.is_none());
831 }
832 }
833
834 #[test]
835 fn test_search_semantic_empty_embeddings() {
836 let storage = setup_storage_with_chunks();
837 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
838
839 let results = search_semantic(&storage, &embedder, "test query", 10, 0.5).unwrap();
841 assert!(results.is_empty());
842 }
843
844 #[test]
845 fn test_incremental_embed_new_chunks() {
846 let mut storage = setup_storage_with_chunks();
847 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
848
849 let result = embed_buffer_chunks_incremental(&mut storage, &embedder, 1, false).unwrap();
851 assert_eq!(result.embedded_count, 3);
852 assert_eq!(result.skipped_count, 0);
853 assert_eq!(result.replaced_count, 0);
854 assert_eq!(result.total_chunks, 3);
855 assert!(result.had_changes());
856
857 let result2 = embed_buffer_chunks_incremental(&mut storage, &embedder, 1, false).unwrap();
859 assert_eq!(result2.embedded_count, 0);
860 assert_eq!(result2.skipped_count, 3);
861 assert_eq!(result2.replaced_count, 0);
862 assert!(!result2.had_changes());
863 }
864
865 #[test]
866 fn test_incremental_embed_force_reembed() {
867 let mut storage = setup_storage_with_chunks();
868 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
869
870 embed_buffer_chunks_incremental(&mut storage, &embedder, 1, false).unwrap();
872
873 let result = embed_buffer_chunks_incremental(&mut storage, &embedder, 1, true).unwrap();
875 assert_eq!(result.skipped_count, 3);
878 assert!(!result.had_changes());
879 }
880
881 #[test]
882 fn test_parallel_semantic_search() {
883 let mut storage = setup_storage_with_chunks();
884 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
885
886 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
888
889 let config = SearchConfig::new().with_top_k(10).with_threshold(0.0);
891
892 let results = semantic_search(&storage, &embedder, "test query", &config).unwrap();
893
894 assert!(!results.is_empty());
896
897 for window in results.windows(2) {
899 assert!(
900 window[0].1 >= window[1].1,
901 "Results should be sorted by descending similarity: {} >= {}",
902 window[0].1,
903 window[1].1,
904 );
905 }
906
907 let strict_config = SearchConfig::new().with_top_k(10).with_threshold(0.99);
909 let strict_results =
910 semantic_search(&storage, &embedder, "test query", &strict_config).unwrap();
911
912 assert!(
914 strict_results.len() <= results.len(),
915 "Strict threshold should not return more results than lenient threshold",
916 );
917 }
918
919 #[test]
920 fn test_incremental_embed_result_completion() {
921 let result = IncrementalEmbedResult {
922 embedded_count: 2,
923 skipped_count: 3,
924 replaced_count: 0,
925 total_chunks: 5,
926 model_name: "test".to_string(),
927 };
928 assert!(result.had_changes());
929 assert!((result.completion_percentage() - 100.0).abs() < f64::EPSILON);
930 }
931
932 #[test]
933 fn test_completion_percentage_zero_chunks() {
934 let result = IncrementalEmbedResult {
935 embedded_count: 0,
936 skipped_count: 0,
937 replaced_count: 0,
938 total_chunks: 0,
939 model_name: "test".to_string(),
940 };
941 assert!((result.completion_percentage() - 100.0).abs() < f64::EPSILON);
943 assert!(!result.had_changes());
944 }
945
946 #[test]
947 fn test_completion_percentage_partial() {
948 let result = IncrementalEmbedResult {
949 embedded_count: 1,
950 skipped_count: 1,
951 replaced_count: 0,
952 total_chunks: 4,
953 model_name: "test".to_string(),
954 };
955 assert!((result.completion_percentage() - 50.0).abs() < f64::EPSILON);
956 assert!(result.had_changes());
957 }
958
959 #[test]
960 fn test_had_changes_replaced_only() {
961 let result = IncrementalEmbedResult {
962 embedded_count: 0,
963 skipped_count: 2,
964 replaced_count: 1,
965 total_chunks: 3,
966 model_name: "test".to_string(),
967 };
968 assert!(result.had_changes());
969 }
970
971 #[test]
972 fn test_check_model_mismatch_no_embeddings() {
973 let mut storage = setup_storage();
974 let buffer = crate::core::Buffer::from_named("x.txt".to_string(), "hi".to_string());
975 let buffer_id = storage.add_buffer(&buffer).unwrap();
976
977 let result = check_model_mismatch(&storage, buffer_id, "model-a").unwrap();
979 assert!(result.is_none());
980 }
981
982 #[test]
983 fn test_check_model_mismatch_same_model() {
984 let mut storage = setup_storage_with_chunks();
985 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
986
987 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
988
989 let result = check_model_mismatch(&storage, 1, embedder.model_name()).unwrap();
991 assert!(result.is_none());
992 }
993
994 #[test]
995 fn test_check_model_mismatch_different_model() {
996 let mut storage = setup_storage_with_chunks();
997 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
998
999 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
1000
1001 let result = check_model_mismatch(&storage, 1, "some-other-model").unwrap();
1003 assert!(result.is_some());
1004 assert_eq!(result.unwrap(), embedder.model_name());
1005 }
1006
1007 #[test]
1008 fn test_get_embedding_model_info_no_embeddings() {
1009 let mut storage = setup_storage();
1010 let buffer = crate::core::Buffer::from_named("x.txt".to_string(), "hi".to_string());
1011 let buffer_id = storage.add_buffer(&buffer).unwrap();
1012
1013 let info = get_embedding_model_info(&storage, buffer_id).unwrap();
1014 assert_eq!(info.total_embeddings, 0);
1015 assert!(info.models.is_empty());
1016 assert!(!info.has_mixed_models);
1017 }
1018
1019 #[test]
1020 fn test_get_embedding_model_info_single_model() {
1021 let mut storage = setup_storage_with_chunks();
1022 let embedder = FallbackEmbedder::new(DEFAULT_DIMENSIONS);
1023
1024 embed_buffer_chunks(&mut storage, &embedder, 1).unwrap();
1025
1026 let info = get_embedding_model_info(&storage, 1).unwrap();
1027 assert_eq!(info.total_embeddings, 3);
1028 assert!(!info.has_mixed_models);
1029 }
1030
1031 #[test]
1032 fn test_populate_previews_short_content() {
1033 let storage = setup_storage_with_chunks();
1034
1035 let results_raw = search_bm25(&storage, "fox", 1).unwrap();
1037 assert!(!results_raw.is_empty());
1038
1039 let mut results = results_raw;
1040 populate_previews(&storage, &mut results, 200).unwrap();
1041
1042 let preview = results[0].content_preview.as_ref().unwrap();
1044 assert!(!preview.ends_with("..."));
1045 assert!(!preview.is_empty());
1046 }
1047
1048 #[test]
1049 fn test_populate_previews_truncates_long_content() {
1050 let mut storage = setup_storage();
1051 let buffer = crate::core::Buffer::from_named("long.txt".to_string(), String::new());
1052 let buffer_id = storage.add_buffer(&buffer).unwrap();
1053
1054 let long_content = "word ".repeat(100); let chunk =
1057 crate::core::Chunk::new(buffer_id, long_content.clone(), 0..long_content.len(), 0);
1058 storage.add_chunks(buffer_id, &[chunk]).unwrap();
1059
1060 let chunks = storage.get_chunks(buffer_id).unwrap();
1061 let chunk_id = chunks[0].id.unwrap();
1062
1063 let mut results = vec![SearchResult {
1065 chunk_id,
1066 buffer_id,
1067 index: 0,
1068 score: 1.0,
1069 semantic_score: None,
1070 bm25_score: None,
1071 content_preview: None,
1072 }];
1073
1074 populate_previews(&storage, &mut results, 20).unwrap();
1075
1076 let preview = results[0].content_preview.as_ref().unwrap();
1077 assert!(
1078 preview.ends_with("..."),
1079 "Expected ellipsis, got: {preview}"
1080 );
1081 assert!(preview.len() <= 23);
1083 }
1084
1085 #[test]
1086 fn test_populate_previews_utf8_boundary() {
1087 let mut storage = setup_storage();
1088 let buffer = crate::core::Buffer::from_named("utf8.txt".to_string(), String::new());
1089 let buffer_id = storage.add_buffer(&buffer).unwrap();
1090
1091 let content = "hello \u{65E5}\u{672C}\u{8A9E}"; let chunk = crate::core::Chunk::new(buffer_id, content.to_string(), 0..content.len(), 0);
1095 storage.add_chunks(buffer_id, &[chunk]).unwrap();
1096
1097 let chunks = storage.get_chunks(buffer_id).unwrap();
1098 let chunk_id = chunks[0].id.unwrap();
1099
1100 let mut results = vec![SearchResult {
1101 chunk_id,
1102 buffer_id,
1103 index: 0,
1104 score: 1.0,
1105 semantic_score: None,
1106 bm25_score: None,
1107 content_preview: None,
1108 }];
1109
1110 populate_previews(&storage, &mut results, 7).unwrap();
1113
1114 let preview = results[0].content_preview.as_ref().unwrap();
1115 assert!(preview.ends_with("..."), "Expected ellipsis in: {preview}");
1117 let body = preview.trim_end_matches("...");
1120 assert_eq!(
1121 body, "hello ",
1122 "Expected truncation at char boundary, got: {body:?}"
1123 );
1124 }
1125}