1use anyhow::Result;
2use colored::Colorize;
3use rayon::prelude::*;
4use serde::Serialize;
5use std::path::{Path, PathBuf};
6use std::time::{Duration, Instant};
7
8use crate::cache::FileMetaStore;
9use crate::chunker::SemanticChunker;
10use crate::embed::{EmbeddingService, ModelType};
11use crate::file::FileWalker;
12use crate::fts::FtsStore;
13use crate::rerank::{rrf_fusion, vector_only, FusedResult, NeuralReranker, DEFAULT_RRF_K};
14use crate::vectordb::VectorStore;
15
16#[derive(Debug, Clone)]
18pub struct SearchOptions {
19 pub max_results: usize,
21 pub per_file: Option<usize>,
23 pub content_lines: usize,
25 pub show_scores: bool,
27 pub compact: bool,
29 pub sync: bool,
31 pub json: bool,
33 pub filter_path: Option<String>,
35 pub model_override: Option<String>,
37 pub vector_only: bool,
39 pub rrf_k: Option<usize>,
41 pub rerank: bool,
43 pub rerank_top: Option<usize>,
45}
46
47impl Default for SearchOptions {
48 fn default() -> Self {
49 Self {
50 max_results: 10,
51 per_file: None,
52 content_lines: 3,
53 show_scores: false,
54 compact: false,
55 sync: false,
56 json: false,
57 filter_path: None,
58 model_override: None,
59 vector_only: false,
60 rrf_k: None,
61 rerank: false,
62 rerank_top: None,
63 }
64 }
65}
66
67#[derive(Serialize)]
69struct JsonOutput {
70 query: String,
71 results: Vec<JsonResult>,
72 #[serde(skip_serializing_if = "Option::is_none")]
73 timing: Option<JsonTiming>,
74}
75
76#[derive(Serialize)]
77struct JsonResult {
78 path: String,
79 start_line: usize,
80 end_line: usize,
81 kind: String,
82 content: String,
83 score: f32,
84 #[serde(skip_serializing_if = "Option::is_none")]
85 signature: Option<String>,
86 #[serde(skip_serializing_if = "Option::is_none")]
87 context_prev: Option<String>,
88 #[serde(skip_serializing_if = "Option::is_none")]
89 context_next: Option<String>,
90}
91
92#[derive(Serialize)]
93struct JsonTiming {
94 total_ms: u64,
95 embed_ms: u64,
96 search_ms: u64,
97 #[serde(skip_serializing_if = "Option::is_none")]
98 rerank_ms: Option<u64>,
99}
100
101fn get_db_path(path: Option<PathBuf>) -> Result<(PathBuf, PathBuf)> {
104 use crate::db_discovery::resolve_database_with_message;
105 resolve_database_with_message(path.as_deref(), "searching")
106}
107
108pub fn read_metadata(db_path: &Path) -> Option<(String, usize, Option<String>)> {
110 let metadata_path = db_path.join("metadata.json");
111 if let Ok(content) = std::fs::read_to_string(&metadata_path) {
112 if let Ok(json) = serde_json::from_str::<serde_json::Value>(&content) {
113 let model = json.get("model_short_name")?.as_str()?.to_string();
114 let dims = json.get("dimensions")?.as_u64()? as usize;
115 let primary_language = json
116 .get("primary_language")
117 .and_then(|v| v.as_str())
118 .map(|s| s.to_string());
119 return Some((model, dims, primary_language));
120 }
121 }
122 None
123}
124
125pub fn detect_identifiers(query: &str) -> Vec<String> {
132 let mut identifiers = Vec::new();
133 for token in query.split_whitespace() {
134 let is_pascal = token
135 .chars()
136 .next()
137 .map(|c| c.is_uppercase())
138 .unwrap_or(false)
139 && token.chars().any(|c| c.is_lowercase())
140 && !["Find", "Show", "Get", "Where", "How", "What", "All"].contains(&token);
141 let is_snake =
142 token.contains('_') && token.chars().all(|c| c.is_alphanumeric() || c == '_');
143 let is_camel = token
144 .chars()
145 .next()
146 .map(|c| c.is_lowercase())
147 .unwrap_or(false)
148 && token.chars().any(|c| c.is_uppercase());
149
150 if is_pascal || is_snake || is_camel {
151 identifiers.push(token.to_string());
152 }
153 }
154 identifiers
155}
156
157pub fn detect_structural_intent(query: &str) -> Option<crate::chunker::ChunkKind> {
166 use crate::chunker::ChunkKind;
167
168 let query_lower = query.to_lowercase();
169
170 let has_identifier = contains_identifier(query);
173
174 eprintln!(
175 "🔍 detect_structural_intent: query='{}', has_identifier={}",
176 query, has_identifier
177 );
178
179 if !has_identifier {
180 return None; }
182
183 let kind = if query_lower.contains("class ") {
184 Some(ChunkKind::Class)
185 } else if query_lower.contains("struct ") {
186 Some(ChunkKind::Struct)
187 } else if query_lower.contains("function ") || query_lower.contains("fn ") {
188 Some(ChunkKind::Function)
189 } else if query_lower.contains("method ") {
190 Some(ChunkKind::Method)
191 } else if query_lower.contains("enum ") {
192 Some(ChunkKind::Enum)
193 } else if query_lower.contains("interface ") {
194 Some(ChunkKind::Interface)
195 } else if query_lower.contains("trait ") {
196 Some(ChunkKind::Trait)
197 } else {
198 None
199 };
200
201 eprintln!("🔍 detect_structural_intent: kind={:?}", kind);
202 kind
203}
204
205fn contains_identifier(query: &str) -> bool {
213 let chars: Vec<char> = query.chars().collect();
214
215 for i in 0..chars.len().saturating_sub(1) {
217 if chars[i].is_uppercase() && (chars[i + 1].is_lowercase() || chars[i + 1].is_ascii_digit())
218 {
219 return true;
220 }
221 }
222
223 for i in 1..chars.len().saturating_sub(1) {
225 if chars[i] == '_' && chars[i - 1].is_lowercase() && chars[i + 1].is_lowercase() {
226 return true;
227 }
228 }
229
230 for i in 0..chars.len().saturating_sub(1) {
232 if chars[i].is_lowercase() && chars[i + 1].is_uppercase() {
233 return true;
234 }
235 }
236
237 false
238}
239
240pub fn boost_kind(
242 results: &mut Vec<crate::vectordb::SearchResult>,
243 target_kind: crate::chunker::ChunkKind,
244) {
245 let boost_factor = 0.15; let target_kind_str = format!("{:?}", target_kind);
248 for result in results.iter_mut() {
249 if result.kind == target_kind_str {
250 result.score *= 1.0 + boost_factor;
251 }
252 }
253 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
255}
256
257fn expand_query(query: &str) -> Vec<String> {
267 let mut variants = Vec::new();
268
269 let original_query = query.to_string();
271
272 variants.push(query.to_string());
274
275 if query.len() < 4 || query.len() > 50 {
279 return variants;
280 }
281
282 let looks_like_function = query.contains('_') && !query.contains(' ');
284
285 let looks_like_type = query
287 .chars()
288 .next()
289 .map(|c| c.is_uppercase())
290 .unwrap_or(false)
291 && !query.contains(' ');
292
293 const MAX_FUNCTION_VARIANTS: usize = 5;
295 const MAX_TYPE_VARIANTS: usize = 5;
296 const MAX_CONCEPT_VARIANTS: usize = 2;
297 const MAX_ABBREV_VARIANTS: usize = 2;
298
299 if looks_like_function {
300 variants.push(format!("fn {}", query));
303 variants.push(format!("async fn {}", query));
304 variants.push(format!("pub fn {}", query));
305
306 if variants.len() - 1 < MAX_FUNCTION_VARIANTS {
308 variants.push(format!("{} method", query));
309 }
310 if variants.len() - 1 < MAX_FUNCTION_VARIANTS {
311 variants.push(format!("Function: {}", query));
312 }
313 }
314
315 if looks_like_type {
316 variants.push(format!("struct {}", query));
319 variants.push(format!("impl {}", query));
320 variants.push(format!("enum {}", query));
321
322 if variants.len() - 1 < MAX_TYPE_VARIANTS {
324 variants.push(format!("class {}", query));
325 }
326 if variants.len() - 1 < MAX_TYPE_VARIANTS {
327 variants.push(format!("Struct: {}", query));
328 }
329 }
330
331 let is_single_concept = !query.contains('_')
333 && !query.contains(' ')
334 && query
335 .chars()
336 .next()
337 .map(|c| c.is_lowercase())
338 .unwrap_or(false);
339
340 if is_single_concept {
341 variants.push(format!("fn {}", query));
343 if variants.len() - 1 < MAX_CONCEPT_VARIANTS {
344 variants.push(format!("{} function", query));
345 }
346 }
347
348 let abbreviations: &[(&str, &str)] = &[
350 ("auth", "authentication"),
351 ("config", "configuration"),
352 ("db", "database"),
353 ("conn", "connection"),
354 ("err", "error"),
355 ("msg", "message"),
356 ];
357
358 let mut abbrev_count = 0;
359 for (abbr, full) in abbreviations {
360 if abbrev_count >= MAX_ABBREV_VARIANTS {
361 break;
362 }
363 if query.contains(abbr) {
364 let expanded = query.replace(abbr, full);
365 if expanded != query {
366 variants.push(expanded);
367 abbrev_count += 1;
368 }
369 }
370 }
371
372 const MAX_TOTAL_VARIANTS: usize = 9;
375 if variants.len() > MAX_TOTAL_VARIANTS {
376 variants.truncate(MAX_TOTAL_VARIANTS);
377 }
378
379 if std::env::var("CODESEARCH_VERBOSE").is_ok() && variants.len() > 1 {
382 eprintln!(
383 "[optimization] Query expansion: {} -> {} variants (original + {} expansions)",
384 original_query,
385 variants.len(),
386 variants.len() - 1
387 );
388 }
389
390 variants
391}
392
393pub fn adapt_rrf_k(query: &str) -> (f64, f64) {
396 let has_identifiers = !detect_identifiers(query).is_empty();
397 let has_structural_intent = detect_structural_intent(query).is_some();
398
399 match (has_identifiers, has_structural_intent) {
400 (true, _) => (12.0, 28.0), (_, true) => (15.0, 25.0),
405
406 _ => (20.0, 20.0),
408 }
409}
410
411pub async fn search(query: &str, path: Option<PathBuf>, options: SearchOptions) -> Result<()> {
413 let (db_path, _project_path) = get_db_path(path)?;
414
415 if !db_path.exists() {
416 println!("{}", "❌ No database found!".red());
417 println!(" Run {} first", "codesearch index".bright_cyan());
418 println!();
419 println!(
420 "{}",
421 "💡 Tip: codesearch can find databases in parent directories. Use 'codesearch list' to see all indexed projects.".dimmed()
422 );
423 return Ok(());
424 }
425
426 let (model_type, dimensions, primary_language) =
428 if let Some(ref model_name) = options.model_override {
429 let mt = ModelType::parse(model_name).unwrap_or_default();
431 (mt, mt.dimensions(), None)
432 } else if let Some((model_name, dims, lang)) = read_metadata(&db_path) {
433 if let Some(mt) = ModelType::parse(&model_name) {
435 (mt, dims, lang)
436 } else {
437 eprintln!(
439 "{}",
440 "⚠️ Unknown model in metadata, using default".yellow()
441 );
442 (ModelType::default(), 384, None)
443 }
444 } else {
445 (ModelType::default(), 384, None)
447 };
448
449 if options.sync {
451 println!("{}", "🔄 Syncing database...".yellow());
452 sync_database(&db_path, model_type)?;
453 }
454
455 let start = Instant::now();
457 let store = VectorStore::new(&db_path, dimensions)?;
458 let load_duration = start.elapsed();
459
460 let start = Instant::now();
462 let cache_dir = crate::constants::get_global_models_cache_dir()?;
463 let mut embedding_service = EmbeddingService::with_cache_dir(model_type, Some(&cache_dir))?;
464 let model_load_duration = start.elapsed();
465
466 let query_variants = expand_query(query);
468
469 let start = Instant::now();
471 let all_query_embeddings = embedding_service.embed_queries_batch(&query_variants)?;
472
473 let embed_duration = start.elapsed();
474
475 let start = Instant::now();
477
478 let has_identifiers = !detect_identifiers(query).is_empty();
482 let retrieval_limit = if options.vector_only {
483 options.max_results
484 } else if has_identifiers {
485 std::cmp::max(options.max_results * 3, 100)
487 } else {
488 std::cmp::max(options.max_results * 5, 200)
490 };
491
492 use std::collections::BinaryHeap;
495
496 let vector_search_results: Vec<Vec<crate::vectordb::SearchResult>> = all_query_embeddings
497 .par_iter()
498 .map(|query_emb| store.search(query_emb, retrieval_limit))
499 .collect::<Result<Vec<_>>>()?;
500
501 struct HeapEntry {
504 id: u32,
505 score: f32,
506 distance: f32,
507 }
508
509 impl PartialEq for HeapEntry {
510 fn eq(&self, other: &Self) -> bool {
511 self.id == other.id
512 }
513 }
514
515 impl Eq for HeapEntry {}
516
517 impl PartialOrd for HeapEntry {
518 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
519 Some(self.cmp(other))
520 }
521 }
522
523 impl Ord for HeapEntry {
524 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
525 self.score
527 .partial_cmp(&other.score)
528 .unwrap_or(std::cmp::Ordering::Equal)
529 }
530 }
531
532 let mut top_by_id: std::collections::HashMap<u32, HeapEntry> = std::collections::HashMap::new();
534 let mut full_results_by_id: std::collections::HashMap<u32, crate::vectordb::SearchResult> =
535 std::collections::HashMap::new();
536
537 for results in vector_search_results {
538 for result in results {
539 top_by_id
540 .entry(result.id)
541 .and_modify(|e| {
542 if result.score > e.score {
543 e.score = result.score;
544 e.distance = result.distance;
545 full_results_by_id.insert(result.id, result.clone());
547 }
548 })
549 .or_insert_with(|| {
550 let entry = HeapEntry {
551 id: result.id,
552 score: result.score,
553 distance: result.distance,
554 };
555 full_results_by_id.insert(result.id, result.clone());
556 entry
557 });
558 }
559 }
560
561 let mut heap: BinaryHeap<HeapEntry> = top_by_id.into_values().collect();
563 let mut vector_results: Vec<crate::vectordb::SearchResult> =
564 Vec::with_capacity(retrieval_limit);
565
566 while let Some(entry) = heap.pop() {
567 if vector_results.len() >= retrieval_limit {
568 break;
569 }
570 if let Some(mut result) = full_results_by_id.get(&entry.id).cloned() {
571 result.score = entry.score;
572 result.distance = entry.distance;
573 vector_results.push(result);
574 }
575 }
576
577 vector_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
579
580 const HIGH_CONFIDENCE_THRESHOLD: f32 = 0.15; const EARLY_TERMINATION_TOP_N: usize = 5; let should_use_vector_only = !options.vector_only && {
587 let top_results: Vec<_> = vector_results
589 .iter()
590 .take(EARLY_TERMINATION_TOP_N.min(vector_results.len()))
591 .collect();
592
593 let all_high_confidence = top_results
594 .iter()
595 .all(|r| r.distance < HIGH_CONFIDENCE_THRESHOLD);
596
597 !top_results.is_empty() && all_high_confidence
599 };
600
601 let vector_only_mode = options.vector_only || should_use_vector_only;
603
604 if should_use_vector_only && !options.vector_only {
606 eprintln!(
607 "{}",
608 "⚡ Early termination: High-confidence results found, skipping FTS search".green()
609 );
610 }
611
612 let fused_results: Vec<FusedResult> = if vector_only_mode {
613 vector_only(&vector_results)
615 } else {
616 match FtsStore::new(&db_path) {
618 Ok(fts_store) => {
619 let identifiers = detect_identifiers(query);
621 let structural_intent = detect_structural_intent(query);
623
624 if identifiers.is_empty() {
625 let fts_results =
627 fts_store.search(query, retrieval_limit, structural_intent)?;
628 let k = options.rrf_k.unwrap_or(DEFAULT_RRF_K as usize) as f32;
629 rrf_fusion(&vector_results, &fts_results, k)
630 } else {
631 let fts_results =
633 fts_store.search(query, retrieval_limit, structural_intent)?;
634
635 let mut all_exact_results = Vec::new();
637 let mut seen_exact_ids = std::collections::HashSet::new();
638
639 for identifier in &identifiers {
640 if let Ok(exact_matches) =
641 fts_store.search_exact(identifier, retrieval_limit, structural_intent)
642 {
643 for exact_match in exact_matches {
644 if seen_exact_ids.insert(exact_match.chunk_id) {
646 all_exact_results.push(exact_match);
647 }
648 }
649 }
650 }
651
652 let (vector_k, fts_k) = adapt_rrf_k(query);
654 let k = options.rrf_k.unwrap_or(DEFAULT_RRF_K as usize) as f32;
655 let vector_k_adaptive = vector_k.min(k as f64) as f32;
657 let fts_k_adaptive = fts_k.min(k as f64) as f32;
658
659 use crate::rerank::{rrf_fusion_with_exact, EXACT_MATCH_RRF_K};
660 rrf_fusion_with_exact(
661 &vector_results,
662 &fts_results,
663 &all_exact_results,
664 vector_k_adaptive,
665 fts_k_adaptive,
666 EXACT_MATCH_RRF_K,
667 )
668 }
669 }
670 Err(_) => {
671 eprintln!(
673 "{}",
674 "⚠️ FTS index not found, using vector-only search".yellow()
675 );
676 vector_only(&vector_results)
677 }
678 }
679 };
680
681 let mut results: Vec<crate::vectordb::SearchResult> = Vec::new();
683 let chunk_id_to_result: std::collections::HashMap<u32, &crate::vectordb::SearchResult> =
684 vector_results.iter().map(|r| (r.id, r)).collect();
685
686 let should_filter_by_path = options.filter_path.is_some();
689 let filter_path_normalized = options
690 .filter_path
691 .as_ref()
692 .map(|f| f.trim_start_matches("./").to_string());
693
694 let take_multiplier = if should_filter_by_path { 3 } else { 1 };
697 let take_count = if options.rerank {
698 options
699 .rerank_top
700 .unwrap_or(options.max_results)
701 .min(fused_results.len())
702 } else {
703 options.max_results * take_multiplier
704 };
705
706 for fused in fused_results.iter().take(take_count) {
707 if let Some(result) = chunk_id_to_result.get(&fused.chunk_id) {
708 if should_filter_by_path {
710 if let Some(ref filter) = filter_path_normalized {
711 let path_normalized = result.path.trim_start_matches("./");
712 if !path_normalized.starts_with(filter) {
713 continue;
714 }
715 }
716 }
717
718 let mut r = (*result).clone();
720 r.score = fused.rrf_score;
721 results.push(r);
722 } else {
723 if let Ok(Some(mut result)) = store.get_chunk_as_result(fused.chunk_id) {
725 if should_filter_by_path {
727 if let Some(ref filter) = filter_path_normalized {
728 let path_normalized = result.path.trim_start_matches("./");
729 if !path_normalized.starts_with(filter) {
730 continue;
731 }
732 }
733 }
734
735 result.score = fused.rrf_score;
736 results.push(result);
737 }
738 }
739 }
740
741 if should_filter_by_path {
743 let candidates_processed = take_count;
744 let results_after_filtering = results.len();
745 let filtered_out = candidates_processed.saturating_sub(results_after_filtering);
746 eprintln!(
747 "{}",
748 format!(
749 "🔍 Path filter '{}': {} candidates → {} results ({} filtered out)",
750 filter_path_normalized.as_ref().unwrap_or(&"".to_string()),
751 candidates_processed,
752 results_after_filtering,
753 filtered_out
754 )
755 .blue()
756 );
757 }
758
759 if let Some(ref lang) = primary_language {
762 use crate::file::Language;
763 let lang_boost = 0.2; for result in results.iter_mut() {
765 let file_lang = format!(
767 "{:?}",
768 Language::from_path(std::path::Path::new(&result.path))
769 );
770 if file_lang == *lang {
771 result.score *= 1.0 + lang_boost;
772 }
773 }
774 results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
776 }
777
778 if let Some(intent) = detect_structural_intent(query) {
780 boost_kind(&mut results, intent);
781 }
782
783 let identifiers = detect_identifiers(query);
785 if !identifiers.is_empty() && results.is_empty() {
786 eprintln!(
787 "{}",
788 format!(
789 "❓ No exact matches found for identifiers: {}",
790 identifiers.join(", ")
791 )
792 .yellow()
793 );
794 eprintln!("{}", " Try using broader search terms or running `codesearch index --sync` if the codebase changed.".dimmed());
795 }
796
797 let search_duration = start.elapsed();
798
799 let mut rerank_duration = Duration::ZERO;
801 if options.rerank && !results.is_empty() {
802 let start = Instant::now();
803
804 match NeuralReranker::new() {
806 Ok(mut reranker) => {
807 let documents: Vec<String> = results.iter().map(|r| r.content.clone()).collect();
809 let rrf_scores: Vec<f32> = results.iter().map(|r| r.score).collect();
810
811 match reranker.rerank_and_blend(query, &documents, &rrf_scores) {
813 Ok(reranked) => {
814 let mut reordered: Vec<crate::vectordb::SearchResult> =
816 Vec::with_capacity(results.len());
817 for (idx, score) in reranked {
818 let mut result = results[idx].clone();
819 result.score = score;
820 reordered.push(result);
821 }
822 results = reordered;
823 println!("{}", "✅ Neural reranking applied".green());
824 }
825 Err(e) => {
826 eprintln!("{}", format!("⚠️ Reranking failed: {}", e).yellow());
827 }
828 }
829 }
830 Err(e) => {
831 eprintln!("{}", format!("⚠️ Could not load reranker: {}", e).yellow());
832 }
833 }
834
835 rerank_duration = start.elapsed();
836 }
837
838 if let Some(ref filter) = options.filter_path {
840 let filter_normalized = filter.trim_start_matches("./");
841 results.retain(|r| {
842 let path_normalized = r.path.trim_start_matches("./");
843 path_normalized.starts_with(filter_normalized)
844 });
845 }
846
847 results.truncate(options.max_results);
849
850 if options.json {
852 let json_results: Vec<JsonResult> = results
853 .iter()
854 .map(|r| JsonResult {
855 path: r.path.clone(),
856 start_line: r.start_line,
857 end_line: r.end_line,
858 kind: r.kind.clone(),
859 content: r.content.clone(),
860 score: r.score,
861 signature: r.signature.clone(),
862 context_prev: r.context_prev.clone(),
863 context_next: r.context_next.clone(),
864 })
865 .collect();
866
867 let timing = if options.show_scores {
868 Some(JsonTiming {
869 total_ms: (load_duration
870 + model_load_duration
871 + embed_duration
872 + search_duration
873 + rerank_duration)
874 .as_millis() as u64,
875 embed_ms: embed_duration.as_millis() as u64,
876 search_ms: search_duration.as_millis() as u64,
877 rerank_ms: if options.rerank {
878 Some(rerank_duration.as_millis() as u64)
879 } else {
880 None
881 },
882 })
883 } else {
884 None
885 };
886
887 let output = JsonOutput {
888 query: query.to_string(),
889 results: json_results,
890 timing,
891 };
892
893 println!("{}", serde_json::to_string(&output)?);
894 return Ok(());
895 }
896
897 if options.compact {
898 let mut seen_files = std::collections::HashSet::new();
900 for result in &results {
901 if !seen_files.contains(&result.path) {
902 println!("{}", result.path);
903 seen_files.insert(result.path.clone());
904 }
905 }
906 return Ok(());
907 }
908
909 println!("{}", "🔍 Search Results".bright_cyan().bold());
911 println!("{}", "=".repeat(60));
912 println!("Query: \"{}\"", query.bright_yellow());
913 println!("Found {} results", results.len());
914 println!();
915
916 if options.show_scores {
917 println!("Timing:");
918 println!(" Database load: {:?}", load_duration);
919 println!(" Model load: {:?}", model_load_duration);
920 println!(" Query embed: {:?}", embed_duration);
921 println!(" Search: {:?}", search_duration);
922 if options.rerank {
923 println!(" Reranking: {:?}", rerank_duration);
924 }
925 println!(
926 " Total: {:?}",
927 load_duration
928 + model_load_duration
929 + embed_duration
930 + search_duration
931 + rerank_duration
932 );
933 println!();
934 }
935
936 if results.is_empty() {
938 println!("{}", "No matches found.".dimmed());
939 println!("Try:");
940 println!(" - Using different keywords");
941 println!(" - Making your query more general");
942 println!(
943 " - Running {} if the codebase changed",
944 "codesearch index --force".bright_cyan()
945 );
946 return Ok(());
947 }
948
949 if let Some(per_file) = options.per_file {
951 if per_file > 0 && per_file < options.max_results {
952 let mut by_file: std::collections::HashMap<String, Vec<_>> =
953 std::collections::HashMap::new();
954
955 for result in results {
956 by_file.entry(result.path.clone()).or_default().push(result);
957 }
958
959 let mut files: Vec<_> = by_file.into_iter().collect();
960 files.sort_by(|a, b| {
961 b.1.iter()
962 .map(|r| r.score)
963 .fold(0.0f32, f32::max)
964 .partial_cmp(&a.1.iter().map(|r| r.score).fold(0.0f32, f32::max))
965 .unwrap()
966 });
967
968 for (_file_path, mut file_results) in files {
969 file_results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
970 file_results.truncate(per_file);
971
972 for (idx, result) in file_results.iter().enumerate() {
973 print_result(
974 result,
975 idx == 0,
976 options.content_lines > 0,
977 options.show_scores,
978 )?;
979 }
980 }
981 } else {
982 for result in &results {
984 print_result(result, true, options.content_lines > 0, options.show_scores)?;
985 }
986 }
987 } else {
988 for result in &results {
990 print_result(result, true, options.content_lines > 0, options.show_scores)?;
991 }
992 }
993
994 Ok(())
995}
996
997fn sync_database(db_path: &Path, model_type: ModelType) -> Result<()> {
999 let project_path = db_path.parent().unwrap_or(std::path::Path::new("."));
1000
1001 let mut file_meta =
1003 FileMetaStore::load_or_create(db_path, model_type.short_name(), model_type.dimensions())?;
1004
1005 let walker = FileWalker::new(project_path.to_path_buf());
1007 let (files, _stats) = walker.walk()?;
1008
1009 let cache_dir = crate::constants::get_global_models_cache_dir()?;
1011 let mut embedding_service = EmbeddingService::with_cache_dir(model_type, Some(&cache_dir))?;
1012 let mut chunker = SemanticChunker::new(100, 2000, 10);
1013 let mut store = VectorStore::new(db_path, model_type.dimensions())?;
1014
1015 let mut changes = 0;
1016
1017 for file in &files {
1019 let (needs_reindex, old_chunk_ids) = file_meta.check_file(&file.path)?;
1020
1021 if !needs_reindex {
1022 continue;
1023 }
1024
1025 changes += 1;
1026 println!(" 📝 {}", file.path.display());
1027
1028 if !old_chunk_ids.is_empty() {
1030 store.delete_chunks(&old_chunk_ids)?;
1031 }
1032
1033 let source_code = match std::fs::read_to_string(&file.path) {
1035 Ok(content) => content,
1036 Err(_) => continue,
1037 };
1038
1039 let chunks = chunker.chunk_semantic(file.language, &file.path, &source_code)?;
1040
1041 if chunks.is_empty() {
1042 file_meta.update_file(&file.path, vec![])?;
1043 continue;
1044 }
1045
1046 let embedded_chunks = embedding_service.embed_chunks(chunks)?;
1048 let chunk_ids = store.insert_chunks_with_ids(embedded_chunks)?;
1049 file_meta.update_file(&file.path, chunk_ids)?;
1050 }
1051
1052 let deleted_files = file_meta.find_deleted_files();
1054 for (path, chunk_ids) in &deleted_files {
1055 changes += 1;
1056 println!(" 🗑️ {} (deleted)", path);
1057 if !chunk_ids.is_empty() {
1058 store.delete_chunks(chunk_ids)?;
1059 }
1060 file_meta.remove_file(std::path::Path::new(path));
1061 }
1062
1063 if changes > 0 {
1065 println!(" 🔨 Rebuilding index...");
1066 store.build_index()?;
1067 file_meta.save(db_path)?;
1068 println!(" ✅ {} file(s) synced", changes);
1069 } else {
1070 println!(" ✅ Already up to date");
1071 }
1072
1073 Ok(())
1074}
1075
1076fn print_result(
1077 result: &crate::vectordb::SearchResult,
1078 show_file: bool,
1079 show_content: bool,
1080 show_scores: bool,
1081) -> Result<()> {
1082 if show_file {
1083 println!("{}", "─".repeat(60));
1084 let file_display = format!("📄 {}", result.path);
1085 println!("{}", file_display.bright_green());
1086 }
1087
1088 let location = format!(
1090 " Lines {}-{} • {}",
1091 result.start_line, result.end_line, result.kind
1092 );
1093 println!("{}", location.dimmed());
1094
1095 if let Some(sig) = &result.signature {
1097 println!(" {}", sig.bright_cyan());
1098 }
1099
1100 if show_scores {
1102 let score_color = if result.score > 0.8 {
1103 "green"
1104 } else if result.score > 0.6 {
1105 "yellow"
1106 } else {
1107 "red"
1108 };
1109
1110 let score_text = format!(" Score: {:.3}", result.score);
1111 println!(
1112 "{}",
1113 match score_color {
1114 "green" => score_text.green(),
1115 "yellow" => score_text.yellow(),
1116 _ => score_text.red(),
1117 }
1118 );
1119 }
1120
1121 if let Some(ctx) = &result.context {
1123 println!(" Context: {}", ctx.dimmed());
1124 }
1125
1126 if show_content {
1128 if let Some(ctx_prev) = &result.context_prev {
1130 println!("\n {}:", "Context (before)".dimmed());
1131 for line in ctx_prev.lines() {
1132 println!(" │ {}", line.bright_black());
1133 }
1134 }
1135
1136 println!("\n {}:", "Content".bright_yellow());
1137 for line in result.content.lines().take(10) {
1138 println!(" │ {}", line.dimmed());
1139 }
1140 if result.content.lines().count() > 10 {
1141 println!(" │ {}", "...".dimmed());
1142 }
1143
1144 if let Some(ctx_next) = &result.context_next {
1146 println!("\n {}:", "Context (after)".dimmed());
1147 for line in ctx_next.lines() {
1148 println!(" │ {}", line.bright_black());
1149 }
1150 }
1151 } else {
1152 let snippet: String = result.content.lines().take(3).collect::<Vec<_>>().join(" ");
1154
1155 let snippet = if snippet.len() > 100 {
1156 format!("{}...", &snippet[..100])
1157 } else {
1158 snippet
1159 };
1160
1161 println!(" {}", snippet.dimmed());
1162 }
1163
1164 println!();
1165
1166 Ok(())
1167}