1use std::collections::HashSet;
2use std::path::Path;
3
4use crate::core::bm25_index::{format_search_results, BM25Index};
5use crate::core::embedding_index::EmbeddingIndex;
6#[cfg(feature = "embeddings")]
7use crate::core::embeddings::EmbeddingEngine;
8use crate::core::hybrid_search::{format_hybrid_results, HybridConfig, HybridResult};
9use crate::tools::CrpMode;
10
11#[allow(clippy::too_many_arguments)]
13pub fn handle(
14 query: &str,
15 path: &str,
16 top_k: usize,
17 crp_mode: CrpMode,
18 languages: Option<&[String]>,
19 path_glob: Option<&str>,
20 mode: Option<&str>,
21 workspace: Option<bool>,
22 artifacts: Option<bool>,
23) -> String {
24 let root = Path::new(path);
25 if !root.exists() {
26 return format!("ERR: path does not exist: {path}");
27 }
28
29 let root = if root.is_file() {
30 root.parent().unwrap_or(root)
31 } else {
32 root
33 };
34
35 let filter = match SearchFilter::new(languages, path_glob) {
36 Ok(f) => f,
37 Err(e) => return format!("ERR: invalid filter: {e}"),
38 };
39
40 let compact = crp_mode.is_tdd();
41 let mode = mode.unwrap_or("hybrid").to_lowercase();
42 let workspace = workspace.unwrap_or(false);
43 let artifacts = artifacts.unwrap_or(false);
44
45 if artifacts {
46 return artifacts_search(query, root, top_k, compact, &filter, workspace);
47 }
48 if workspace {
49 return workspace_search(query, root, top_k, compact, &filter, &mode);
50 }
51
52 let index = match load_or_refresh_bm25(root) {
53 Bm25LoadResult::Ready(idx) => idx,
54 Bm25LoadResult::Building => {
55 return "BM25 index is being built in the background. \
56 Run ctx_semantic_search again in ~30s, or use action=reindex to wait for completion."
57 .to_string();
58 }
59 };
60 if index.doc_count == 0 {
61 return "No code files found to index.".to_string();
62 }
63
64 match mode.as_str() {
65 "bm25" => {
66 let mut results = index.search(query, filtered_candidate_k(top_k, filter.is_active()));
67 if filter.is_active() {
68 results.retain(|x| filter.matches(&x.file_path));
69 }
70 results.truncate(top_k);
71
72 let header = if compact {
73 format!(
74 "semantic_search(bm25,{top_k}) → {} results, {} chunks indexed\n",
75 results.len(),
76 index.doc_count
77 )
78 } else {
79 format!(
80 "Semantic search (BM25): \"{}\" ({} results from {} indexed chunks)\n",
81 truncate_query(query, 60),
82 results.len(),
83 index.doc_count,
84 )
85 };
86 format!("{header}{}", format_search_results(&results, compact))
87 }
88 "dense" => dense_search_mode(query, root, &index, top_k, compact, &filter),
89 _ => hybrid_search_mode(query, root, &index, top_k, compact, &filter),
90 }
91}
92
93pub fn handle_reindex(path: &str) -> String {
95 let root = Path::new(path);
96 if !root.exists() {
97 return format!("ERR: path does not exist: {path}");
98 }
99 let root = if root.is_file() {
100 root.parent().unwrap_or(root)
101 } else {
102 root
103 };
104
105 let idx = BM25Index::build_from_directory(root);
106 let files = idx.files.len();
107 let chunks = idx.doc_count;
108 let _ = idx.save(root);
109
110 format!("Reindexed {path}: {files} files, {chunks} chunks")
111}
112
113pub fn handle_reindex_artifacts(path: &str, workspace: bool) -> String {
114 let root = Path::new(path);
115 if !root.exists() {
116 return format!("ERR: path does not exist: {path}");
117 }
118 let root = if root.is_file() {
119 root.parent().unwrap_or(root)
120 } else {
121 root
122 };
123
124 let mut roots: Vec<std::path::PathBuf> = vec![root.to_path_buf()];
125 let mut warnings: Vec<String> = Vec::new();
126
127 if workspace {
128 let linked = crate::core::workspace_config::load_linked_projects(root);
129 warnings.extend(linked.warnings);
130 roots.extend(linked.roots);
131 }
132
133 let mut total_files = 0usize;
134 let mut total_chunks = 0usize;
135 for r in roots {
136 let (idx, w) = crate::core::artifact_index::rebuild_from_scratch(&r);
137 warnings.extend(w);
138 total_files += idx.files.len();
139 total_chunks += idx.doc_count;
140 }
141
142 if warnings.is_empty() {
143 format!("Reindexed artifacts: {total_files} files, {total_chunks} chunks")
144 } else {
145 format!(
146 "Reindexed artifacts: {total_files} files, {total_chunks} chunks ({} warning(s))",
147 warnings.len()
148 )
149 }
150}
151
152pub fn handle_find_related(
157 file_path: &str,
158 line: usize,
159 project_root: &str,
160 top_k: usize,
161 crp_mode: CrpMode,
162) -> String {
163 let root = Path::new(project_root);
164 if !root.exists() {
165 return format!("ERR: path does not exist: {project_root}");
166 }
167
168 let index = BM25Index::load_or_build(root);
169 if index.doc_count == 0 {
170 return "ERR: empty index. Try action=reindex first.".to_string();
171 }
172
173 let source_chunk = index
174 .chunks
175 .iter()
176 .find(|c| c.file_path == file_path && c.start_line <= line && c.end_line >= line);
177
178 let Some(source_chunk) = source_chunk else {
179 return format!(
180 "ERR: no indexed chunk found at {file_path}:{line}. Try action=reindex first."
181 );
182 };
183
184 let query_text = source_chunk.content.clone();
185 let source_file = source_chunk.file_path.clone();
186 let source_start = source_chunk.start_line;
187
188 let compact = crp_mode != CrpMode::Off;
189
190 let results = find_related_internal(&query_text, root, &index, top_k + 5, compact);
191
192 let mut lines: Vec<String> = results
193 .into_iter()
194 .filter(|l| !l.contains(&format!("{source_file}:{source_start}-")))
195 .take(top_k)
196 .collect();
197
198 let header = if compact {
199 format!(
200 "find_related({file_path}:{line}) → {} results\n",
201 lines.len()
202 )
203 } else {
204 format!("Find related to {file_path}:{line} (semantic similarity)\n")
205 };
206
207 lines.insert(0, header);
208 lines.join("")
209}
210
211fn find_related_internal(
212 query: &str,
213 root: &Path,
214 index: &BM25Index,
215 top_k: usize,
216 compact: bool,
217) -> Vec<String> {
218 let Ok(filter) = SearchFilter::new(None, None) else {
219 return vec!["ERR: filter init failed\n".to_string()];
220 };
221 let output = hybrid_search_mode(query, root, index, top_k, compact, &filter);
222 output.lines().map(|l| format!("{l}\n")).collect()
223}
224
225fn truncate_query(q: &str, max: usize) -> &str {
226 if q.len() <= max {
227 return q;
228 }
229 match q.char_indices().nth(max) {
230 Some((byte_idx, _)) => &q[..byte_idx],
231 None => q,
232 }
233}
234
235std::thread_local! {
236 static BM25_SHARED_CACHE: std::cell::RefCell<Option<crate::core::bm25_cache::SharedBm25Cache>> =
237 const { std::cell::RefCell::new(None) };
238}
239
240pub fn set_thread_cache(cache: crate::core::bm25_cache::SharedBm25Cache) {
242 BM25_SHARED_CACHE.with(|c| {
243 *c.borrow_mut() = Some(cache);
244 });
245}
246
247pub fn get_thread_cache() -> Option<crate::core::bm25_cache::SharedBm25Cache> {
251 BM25_SHARED_CACHE.with(|c| c.borrow().clone())
252}
253
254pub(crate) enum Bm25LoadResult {
256 Ready(std::sync::Arc<BM25Index>),
257 Building,
258}
259
260fn load_or_refresh_bm25(root: &Path) -> Bm25LoadResult {
261 let cached = BM25_SHARED_CACHE.with(|c| {
262 let borrow = c.borrow();
263 borrow
264 .as_ref()
265 .and_then(|cache| crate::core::bm25_cache::get_or_background(cache, root))
266 });
267 if let Some(idx) = cached {
268 return Bm25LoadResult::Ready(idx);
269 }
270
271 let root_str = root.to_string_lossy().to_string();
272
273 if let Some(idx) = crate::core::index_orchestrator::try_load_bm25_index(&root_str) {
274 let idx = std::sync::Arc::new(idx);
275 store_in_thread_cache(root, &idx);
276 return Bm25LoadResult::Ready(idx);
277 }
278
279 if crate::core::index_orchestrator::is_building() {
280 return Bm25LoadResult::Building;
281 }
282
283 crate::core::index_orchestrator::ensure_all_background(&root_str);
284
285 let idx = std::sync::Arc::new(BM25Index::load_or_build(root));
286 store_in_thread_cache(root, &idx);
287 Bm25LoadResult::Ready(idx)
288}
289
290fn store_in_thread_cache(root: &Path, idx: &std::sync::Arc<BM25Index>) {
291 BM25_SHARED_CACHE.with(|c| {
292 let borrow = c.borrow();
293 if let Some(cache) = borrow.as_ref() {
294 let mut guard = cache
295 .lock()
296 .unwrap_or_else(std::sync::PoisonError::into_inner);
297 *guard = Some(crate::core::bm25_cache::Bm25CacheEntry {
298 root: root.to_path_buf(),
299 index: std::sync::Arc::clone(idx),
300 loaded_at: std::time::Instant::now(),
301 fingerprint: crate::core::bm25_cache::index_fingerprint(root),
302 });
303 }
304 });
305}
306
307fn filtered_candidate_k(top_k: usize, filtered: bool) -> usize {
308 if !filtered {
309 return top_k;
310 }
311 let candidates = (top_k.max(10)).saturating_mul(10);
312 candidates.clamp(50, 500)
313}
314
315const WORKSPACE_RRF_K: f64 = 60.0;
316
317fn artifacts_search(
318 query: &str,
319 root: &Path,
320 top_k: usize,
321 compact: bool,
322 filter: &SearchFilter,
323 workspace: bool,
324) -> String {
325 let mut roots: Vec<std::path::PathBuf> = vec![root.to_path_buf()];
326 let mut warnings: Vec<String> = Vec::new();
327
328 if workspace {
329 let linked = crate::core::workspace_config::load_linked_projects(root);
330 warnings.extend(linked.warnings);
331 roots.extend(linked.roots);
332 }
333 roots.sort();
334 roots.dedup();
335
336 let mut per_project: Vec<(String, Vec<crate::core::bm25_index::SearchResult>)> = Vec::new();
337 let mut total_chunks = 0usize;
338
339 for r in &roots {
340 let label = label_for_root(r);
341 let (idx, w) = crate::core::artifact_index::load_or_build(r);
342 warnings.extend(w);
343 total_chunks += idx.doc_count;
344 if idx.doc_count == 0 {
345 continue;
346 }
347
348 let mut results = idx.search(query, filtered_candidate_k(top_k, filter.is_active()));
349 if filter.is_active() {
350 results.retain(|x| filter.matches(&x.file_path));
351 }
352 results.truncate(top_k);
353
354 for res in &mut results {
355 res.file_path = if workspace {
356 format!("[project:{label}] [artifact] {}", res.file_path)
357 } else {
358 format!("[artifact] {}", res.file_path)
359 };
360 }
361
362 per_project.push((label, results));
363 }
364
365 let mut fused: Vec<crate::core::bm25_index::SearchResult> = if per_project.len() <= 1 {
366 per_project
367 .into_iter()
368 .next()
369 .map(|(_, v)| v)
370 .unwrap_or_default()
371 } else {
372 rrf_merge_bm25(per_project, top_k)
373 };
374
375 if fused.is_empty() {
376 return "No artifact files found to index.".to_string();
377 }
378
379 fused.truncate(top_k);
380
381 let header = if compact {
382 if workspace {
383 format!(
384 "semantic_search(artifacts,workspace,{top_k}) → {} results, projects={}, {} chunks indexed\n",
385 fused.len(),
386 roots.len(),
387 total_chunks
388 )
389 } else {
390 format!(
391 "semantic_search(artifacts,{top_k}) → {} results, {} chunks indexed\n",
392 fused.len(),
393 total_chunks
394 )
395 }
396 } else if workspace {
397 format!(
398 "Semantic search (Artifacts/Workspace): \"{}\" ({} results from {} projects)\n",
399 truncate_query(query, 60),
400 fused.len(),
401 roots.len()
402 )
403 } else {
404 format!(
405 "Semantic search (Artifacts): \"{}\" ({} results)\n",
406 truncate_query(query, 60),
407 fused.len()
408 )
409 };
410
411 let mut out = format!("{header}{}", format_search_results(&fused, compact));
412 if !warnings.is_empty() && !compact {
413 out.push_str(&format!("\nWarnings ({}):\n", warnings.len()));
414 for w in warnings.iter().take(20) {
415 out.push_str(&format!("- {w}\n"));
416 }
417 }
418 out
419}
420
421fn workspace_search(
422 query: &str,
423 root: &Path,
424 top_k: usize,
425 compact: bool,
426 filter: &SearchFilter,
427 mode: &str,
428) -> String {
429 let linked = crate::core::workspace_config::load_linked_projects(root);
430 let mut warnings = linked.warnings;
431
432 let mut roots: Vec<std::path::PathBuf> = vec![root.to_path_buf()];
433 roots.extend(linked.roots);
434 roots.sort();
435 roots.dedup();
436
437 let mut per_project: Vec<(String, Vec<HybridResult>)> = Vec::new();
438 let mut avg_cov: Option<f64> = None;
439 let mut cov_count = 0usize;
440
441 for r in &roots {
442 let label = label_for_root(r);
443 let index = BM25Index::load_or_build(r);
444 if index.doc_count == 0 {
445 continue;
446 }
447
448 let mut results: Vec<HybridResult> = match mode {
449 "bm25" => {
450 let mut bm25 = index.search(query, filtered_candidate_k(top_k, filter.is_active()));
451 if filter.is_active() {
452 bm25.retain(|x| filter.matches(&x.file_path));
453 }
454 bm25.truncate(top_k);
455 bm25.into_iter()
456 .map(HybridResult::from_bm25_public)
457 .collect()
458 }
459 "dense" => {
460 #[cfg(feature = "embeddings")]
461 {
462 match dense_results_for_root(query, r, &index, top_k, filter) {
463 Ok((v, cov)) => {
464 avg_cov = Some(avg_cov.unwrap_or(0.0) + cov);
465 cov_count += 1;
466 v
467 }
468 Err(e) => {
469 warnings.push(format!("[{label}] dense search failed: {e}"));
470 let mut bm25 = index
471 .search(query, filtered_candidate_k(top_k, filter.is_active()));
472 if filter.is_active() {
473 bm25.retain(|x| filter.matches(&x.file_path));
474 }
475 bm25.truncate(top_k);
476 bm25.into_iter()
477 .map(HybridResult::from_bm25_public)
478 .collect()
479 }
480 }
481 }
482 #[cfg(not(feature = "embeddings"))]
483 {
484 let _ = (&label, &warnings);
485 let mut bm25 =
486 index.search(query, filtered_candidate_k(top_k, filter.is_active()));
487 if filter.is_active() {
488 bm25.retain(|x| filter.matches(&x.file_path));
489 }
490 bm25.truncate(top_k);
491 bm25.into_iter()
492 .map(HybridResult::from_bm25_public)
493 .collect()
494 }
495 }
496 _ => {
497 #[cfg(feature = "embeddings")]
498 {
499 match hybrid_results_for_root(query, r, &index, top_k, filter) {
500 Ok((v, cov)) => {
501 avg_cov = Some(avg_cov.unwrap_or(0.0) + cov);
502 cov_count += 1;
503 v
504 }
505 Err(e) => {
506 warnings.push(format!("[{label}] hybrid search failed: {e}"));
507 let mut bm25 = index
508 .search(query, filtered_candidate_k(top_k, filter.is_active()));
509 if filter.is_active() {
510 bm25.retain(|x| filter.matches(&x.file_path));
511 }
512 bm25.truncate(top_k);
513 bm25.into_iter()
514 .map(HybridResult::from_bm25_public)
515 .collect()
516 }
517 }
518 }
519 #[cfg(not(feature = "embeddings"))]
520 {
521 let _ = (&label, &warnings);
522 let mut bm25 =
523 index.search(query, filtered_candidate_k(top_k, filter.is_active()));
524 if filter.is_active() {
525 bm25.retain(|x| filter.matches(&x.file_path));
526 }
527 bm25.truncate(top_k);
528 bm25.into_iter()
529 .map(HybridResult::from_bm25_public)
530 .collect()
531 }
532 }
533 };
534
535 for res in &mut results {
536 res.file_path = format!("[project:{label}] {}", res.file_path);
537 }
538 per_project.push((label, results));
539 }
540
541 let mut fused: Vec<HybridResult> = if per_project.len() <= 1 {
542 per_project
543 .into_iter()
544 .next()
545 .map(|(_, v)| v)
546 .unwrap_or_default()
547 } else {
548 rrf_merge_hybrid(per_project, top_k)
549 };
550
551 if fused.is_empty() {
552 return "No code files found to index.".to_string();
553 }
554
555 fused.truncate(top_k);
556 let cov = avg_cov.and_then(|s| {
557 if cov_count == 0 {
558 None
559 } else {
560 Some(s / cov_count as f64)
561 }
562 });
563
564 let header = if compact {
565 match (mode, cov) {
566 (_, Some(c)) => format!(
567 "semantic_search(workspace,{mode},{top_k}) → {} results, projects={}, embed_cov={:.0}%\n",
568 fused.len(),
569 roots.len(),
570 c * 100.0
571 ),
572 _ => format!(
573 "semantic_search(workspace,{mode},{top_k}) → {} results, projects={}\n",
574 fused.len(),
575 roots.len()
576 ),
577 }
578 } else {
579 format!(
580 "Workspace semantic search ({mode}): \"{}\" ({} results from {} projects)\n",
581 truncate_query(query, 60),
582 fused.len(),
583 roots.len()
584 )
585 };
586
587 let mut out = format!("{header}{}", format_hybrid_results(&fused, compact));
588 if !warnings.is_empty() && !compact {
589 out.push_str(&format!("\nWarnings ({}):\n", warnings.len()));
590 for w in warnings.iter().take(20) {
591 out.push_str(&format!("- {w}\n"));
592 }
593 }
594 out
595}
596
597fn rrf_merge_hybrid(lists: Vec<(String, Vec<HybridResult>)>, top_k: usize) -> Vec<HybridResult> {
598 use std::collections::HashMap;
599
600 let mut acc: HashMap<String, (HybridResult, f64)> = HashMap::new();
601 for (label, results) in lists {
602 for (rank, r) in results.into_iter().enumerate() {
603 let key = format!(
604 "{label}|{}|{}|{}|{}",
605 r.file_path, r.symbol_name, r.start_line, r.end_line
606 );
607 let rrf = 1.0 / (WORKSPACE_RRF_K + (rank as f64) + 1.0);
608 acc.entry(key)
609 .and_modify(|(_, s)| *s += rrf)
610 .or_insert((r, rrf));
611 }
612 }
613
614 let mut out: Vec<HybridResult> = acc
615 .into_values()
616 .map(|(mut r, s)| {
617 r.rrf_score = s;
618 r
619 })
620 .collect();
621 out.sort_by(|a, b| {
622 b.rrf_score
623 .partial_cmp(&a.rrf_score)
624 .unwrap_or(std::cmp::Ordering::Equal)
625 .then_with(|| a.file_path.cmp(&b.file_path))
626 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
627 .then_with(|| a.start_line.cmp(&b.start_line))
628 .then_with(|| a.end_line.cmp(&b.end_line))
629 });
630 out.truncate(top_k);
631 out
632}
633
634fn rrf_merge_bm25(
635 lists: Vec<(String, Vec<crate::core::bm25_index::SearchResult>)>,
636 top_k: usize,
637) -> Vec<crate::core::bm25_index::SearchResult> {
638 use std::collections::HashMap;
639
640 let mut acc: HashMap<String, (crate::core::bm25_index::SearchResult, f64)> = HashMap::new();
641 for (label, results) in lists {
642 for (rank, r) in results.into_iter().enumerate() {
643 let key = format!(
644 "{label}|{}|{}|{}|{}",
645 r.file_path, r.symbol_name, r.start_line, r.end_line
646 );
647 let rrf = 1.0 / (WORKSPACE_RRF_K + (rank as f64) + 1.0);
648 acc.entry(key)
649 .and_modify(|(_, s)| *s += rrf)
650 .or_insert((r, rrf));
651 }
652 }
653
654 let mut out: Vec<crate::core::bm25_index::SearchResult> = acc
655 .into_values()
656 .map(|(mut r, s)| {
657 r.score = s;
658 r
659 })
660 .collect();
661 out.sort_by(|a, b| {
662 b.score
663 .partial_cmp(&a.score)
664 .unwrap_or(std::cmp::Ordering::Equal)
665 .then_with(|| a.file_path.cmp(&b.file_path))
666 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
667 .then_with(|| a.start_line.cmp(&b.start_line))
668 .then_with(|| a.end_line.cmp(&b.end_line))
669 });
670 out.truncate(top_k);
671 out
672}
673
674#[cfg(feature = "embeddings")]
675fn dense_results_for_root(
676 query: &str,
677 root: &Path,
678 index: &BM25Index,
679 top_k: usize,
680 filter: &SearchFilter,
681) -> Result<(Vec<HybridResult>, f64), String> {
682 let (engine, mut embed_idx) = load_engine_and_index(root)?;
683 let (aligned, coverage, changed_files) =
684 ensure_embeddings(root, index, engine, &mut embed_idx)?;
685
686 let backend = crate::core::dense_backend::DenseBackendKind::try_from_env()?;
687 let filter_fn = |p: &str| filter.matches(p);
688 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
689 .is_active()
690 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
691
692 let candidate_k = filtered_candidate_k(top_k, filter.is_active());
693 let mut results = crate::core::dense_backend::dense_results_as_hybrid(
694 backend,
695 root,
696 index,
697 engine,
698 &aligned,
699 &changed_files,
700 query,
701 candidate_k,
702 filter_pred,
703 )?;
704 results.truncate(top_k);
705
706 Ok((results, coverage))
707}
708
709#[cfg(feature = "embeddings")]
710fn hybrid_results_for_root(
711 query: &str,
712 root: &Path,
713 index: &BM25Index,
714 top_k: usize,
715 filter: &SearchFilter,
716) -> Result<(Vec<HybridResult>, f64), String> {
717 let (engine, mut embed_idx) = load_engine_and_index(root)?;
718 let (aligned, coverage, changed_files) =
719 ensure_embeddings(root, index, engine, &mut embed_idx)?;
720
721 let backend = crate::core::dense_backend::DenseBackendKind::try_from_env()?;
722 let cfg = HybridConfig::from_config();
723 let filter_fn = |p: &str| filter.matches(p);
724 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
725 .is_active()
726 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
727 let candidate_k = filtered_candidate_k(top_k, filter.is_active());
728 let graph_ranks = graph_rrf_ranks_for_search_root(root);
729 let graph_ranks_ref = graph_ranks.as_ref();
730 let mut results = crate::core::dense_backend::hybrid_results(
731 backend,
732 root,
733 index,
734 engine,
735 &aligned,
736 &changed_files,
737 query,
738 candidate_k,
739 &cfg,
740 filter_pred,
741 graph_ranks_ref,
742 )?;
743
744 if cfg.splade_weight > 0.0 {
745 let splade = crate::core::splade_retrieval::hybrid_retrieve(query, index, candidate_k);
746 if !splade.is_empty() {
747 boost_with_splade(&mut results, &splade, cfg.splade_weight);
748 }
749 }
750
751 results.truncate(top_k);
752 Ok((results, coverage))
753}
754
755fn boost_with_splade(
757 results: &mut [HybridResult],
758 splade: &[crate::core::splade_retrieval::SpladeResult],
759 weight: f64,
760) {
761 use std::collections::HashMap;
762 let rrf_k = 60.0_f64;
763
764 let boosts: HashMap<&str, f64> = splade
765 .iter()
766 .enumerate()
767 .map(|(rank, sr)| (sr.file_path.as_str(), weight / (rrf_k + rank as f64 + 1.0)))
768 .collect();
769
770 for r in results.iter_mut() {
771 if let Some(&boost) = boosts.get(r.file_path.as_str()) {
772 r.rrf_score += boost;
773 }
774 }
775
776 results.sort_by(|a, b| {
777 b.rrf_score
778 .partial_cmp(&a.rrf_score)
779 .unwrap_or(std::cmp::Ordering::Equal)
780 });
781}
782
783fn label_for_root(root: &Path) -> String {
784 root.file_name()
785 .and_then(|s| s.to_str())
786 .map(str::to_string)
787 .filter(|s| !s.is_empty())
788 .unwrap_or_else(|| root.to_string_lossy().to_string())
789}
790
791fn graph_rrf_ranks_for_search_root(
792 root: &Path,
793) -> Option<std::collections::HashMap<String, usize>> {
794 let root_s = root.to_string_lossy().to_string();
795 let session = crate::core::session::SessionState::load_latest_for_project_root(&root_s)?;
796
797 if session.files_touched.is_empty() {
798 return None;
799 }
800
801 let recent: Vec<String> = session
802 .files_touched
803 .iter()
804 .rev()
805 .filter(|f| path_under_search_root(&f.path, root))
806 .take(12)
807 .map(|f| f.path.clone())
808 .collect();
809
810 if recent.is_empty() {
811 return None;
812 }
813
814 crate::core::graph_context::graph_neighbor_ranks_for_recent_files(&root_s, &recent, 40, 120)
815}
816
817fn path_under_search_root(path: &str, root: &Path) -> bool {
818 let p = std::path::Path::new(path);
819 if p.is_absolute() {
820 let root_norm = crate::core::pathutil::safe_canonicalize_or_self(root);
821 let path_norm = crate::core::pathutil::safe_canonicalize_or_self(p);
822 path_norm.starts_with(&root_norm)
823 } else {
824 true
825 }
826}
827
828fn hybrid_search_mode(
829 query: &str,
830 root: &Path,
831 index: &BM25Index,
832 top_k: usize,
833 compact: bool,
834 filter: &SearchFilter,
835) -> String {
836 #[cfg(feature = "embeddings")]
837 {
838 let (engine, mut embed_idx) = match load_engine_and_index(root) {
839 Ok(v) => v,
840 Err(e) => return format!("ERR: {e}"),
841 };
842
843 let (aligned, coverage, changed_files) =
844 match ensure_embeddings(root, index, engine, &mut embed_idx) {
845 Ok(v) => v,
846 Err(e) => return format!("ERR: {e}"),
847 };
848
849 let backend = match crate::core::dense_backend::DenseBackendKind::try_from_env() {
850 Ok(v) => v,
851 Err(e) => return format!("ERR: {e}"),
852 };
853
854 let cfg = HybridConfig::from_config();
855 let filter_fn = |p: &str| filter.matches(p);
856 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
857 .is_active()
858 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
859 let graph_ranks = graph_rrf_ranks_for_search_root(root);
860 let graph_ranks_ref = graph_ranks.as_ref();
861 let mut results = match crate::core::dense_backend::hybrid_results(
862 backend,
863 root,
864 index,
865 engine,
866 &aligned,
867 &changed_files,
868 query,
869 top_k,
870 &cfg,
871 filter_pred,
872 graph_ranks_ref,
873 ) {
874 Ok(v) => v,
875 Err(e) => return format!("ERR: {e}"),
876 };
877
878 if cfg.splade_weight > 0.0 {
879 let splade = crate::core::splade_retrieval::hybrid_retrieve(query, index, top_k);
880 if !splade.is_empty() {
881 boost_with_splade(&mut results, &splade, cfg.splade_weight);
882 }
883 }
884
885 results.truncate(top_k);
886
887 let header = if compact {
888 format!(
889 "semantic_search(hybrid,{top_k}) → {} results, {} chunks, embed_cov={:.0}%\n",
890 results.len(),
891 index.doc_count,
892 coverage * 100.0
893 )
894 } else {
895 format!(
896 "Semantic search (Hybrid): \"{}\" ({} results from {} indexed chunks, embeddings coverage {:.0}%)\n",
897 truncate_query(query, 60),
898 results.len(),
899 index.doc_count,
900 coverage * 100.0
901 )
902 };
903
904 format!("{header}{}", format_hybrid_results(&results, compact))
905 }
906 #[cfg(not(feature = "embeddings"))]
907 {
908 let mut results = index.search(query, filtered_candidate_k(top_k, filter.is_active()));
909 if filter.is_active() {
910 results.retain(|x| filter.matches(&x.file_path));
911 }
912
913 if let Some(graph_ranks) = graph_rrf_ranks_for_search_root(root) {
914 const GRAPH_RRF_K: f64 = 60.0;
915 for r in &mut results {
916 if let Some(&rank) = graph_ranks.get(&r.file_path) {
917 r.score += 1.0 / (GRAPH_RRF_K + rank as f64 + 1.0);
918 }
919 }
920 results.sort_by(|a, b| {
921 b.score
922 .partial_cmp(&a.score)
923 .unwrap_or(std::cmp::Ordering::Equal)
924 });
925 }
926
927 results.truncate(top_k);
928 let graph_tag = if graph_rrf_ranks_for_search_root(root).is_some() {
929 "+graph"
930 } else {
931 ""
932 };
933 let header = if compact {
934 format!(
935 "semantic_search(bm25{graph_tag},{top_k}) → {} results, {} chunks indexed\n",
936 results.len(),
937 index.doc_count
938 )
939 } else {
940 format!(
941 "Semantic search (BM25{graph_tag}): \"{}\" ({} results from {} indexed chunks)\n",
942 truncate_query(query, 60),
943 results.len(),
944 index.doc_count,
945 )
946 };
947 format!("{header}{}", format_search_results(&results, compact))
948 }
949}
950
951fn dense_search_mode(
952 query: &str,
953 root: &Path,
954 index: &BM25Index,
955 top_k: usize,
956 compact: bool,
957 filter: &SearchFilter,
958) -> String {
959 #[cfg(feature = "embeddings")]
960 {
961 let (engine, mut embed_idx) = match load_engine_and_index(root) {
962 Ok(v) => v,
963 Err(e) => return format!("ERR: {e}"),
964 };
965
966 let (aligned, coverage, changed_files) =
967 match ensure_embeddings(root, index, engine, &mut embed_idx) {
968 Ok(v) => v,
969 Err(e) => return format!("ERR: {e}"),
970 };
971
972 let backend = match crate::core::dense_backend::DenseBackendKind::try_from_env() {
973 Ok(v) => v,
974 Err(e) => return format!("ERR: {e}"),
975 };
976
977 let filter_fn = |p: &str| filter.matches(p);
978 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
979 .is_active()
980 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
981
982 let candidate_k = filtered_candidate_k(top_k, filter.is_active());
983 let mut results = match crate::core::dense_backend::dense_results_as_hybrid(
984 backend,
985 root,
986 index,
987 engine,
988 &aligned,
989 &changed_files,
990 query,
991 candidate_k,
992 filter_pred,
993 ) {
994 Ok(v) => v,
995 Err(e) => return format!("ERR: {e}"),
996 };
997 results.truncate(top_k);
998
999 let header = if compact {
1000 format!(
1001 "semantic_search(dense,{top_k}) → {} results, {} chunks, embed_cov={:.0}%\n",
1002 results.len(),
1003 index.doc_count,
1004 coverage * 100.0
1005 )
1006 } else {
1007 format!(
1008 "Semantic search (Dense): \"{}\" ({} results from {} indexed chunks, embeddings coverage {:.0}%)\n",
1009 truncate_query(query, 60),
1010 results.len(),
1011 index.doc_count,
1012 coverage * 100.0
1013 )
1014 };
1015
1016 format!("{header}{}", format_hybrid_results(&results, compact))
1017 }
1018 #[cfg(not(feature = "embeddings"))]
1019 {
1020 "ERR: embeddings feature not enabled".to_string()
1021 }
1022}
1023
1024#[cfg(feature = "embeddings")]
1025fn load_engine_and_index(
1026 root: &Path,
1027) -> Result<(&'static EmbeddingEngine, EmbeddingIndex), String> {
1028 let cfg = crate::core::config::Config::load();
1029 let profile = crate::core::config::MemoryProfile::effective(&cfg);
1030 if !profile.embeddings_enabled() {
1031 return Err("embeddings disabled by memory_profile=low".into());
1032 }
1033
1034 let engine = crate::core::embeddings::shared_engine()
1035 .ok_or_else(|| "embedding engine load failed".to_string())?;
1036
1037 let model_name = engine.model_name();
1038 let mut idx = EmbeddingIndex::load(root)
1039 .unwrap_or_else(|| EmbeddingIndex::new_with_model(engine.dimensions(), model_name));
1040
1041 if let Some((stored, current)) = idx.model_mismatch(model_name) {
1042 tracing::warn!(
1043 "[embeddings] model changed: {stored} → {current}. Re-indexing all embeddings."
1044 );
1045 idx = EmbeddingIndex::new_with_model(engine.dimensions(), model_name);
1046 } else if idx.dimension_mismatch(engine.dimensions()) {
1047 tracing::warn!(
1048 "[embeddings] dimension mismatch: index={}d, engine={}d. Re-indexing.",
1049 idx.dimensions,
1050 engine.dimensions()
1051 );
1052 idx = EmbeddingIndex::new_with_model(engine.dimensions(), model_name);
1053 }
1054
1055 if idx.model_id.is_none() {
1056 idx.model_id = Some(model_name.to_string());
1057 }
1058
1059 Ok((engine, idx))
1060}
1061
1062#[cfg(feature = "embeddings")]
1063fn ensure_embeddings(
1064 root: &Path,
1065 index: &BM25Index,
1066 engine: &EmbeddingEngine,
1067 embed_idx: &mut EmbeddingIndex,
1068) -> Result<(Vec<Vec<f32>>, f64, Vec<String>), String> {
1069 let mut changed_files = embed_idx.files_needing_update(&index.chunks);
1070 changed_files.sort();
1071 changed_files.dedup();
1072
1073 if !changed_files.is_empty() {
1074 let changed_set: std::collections::HashSet<&str> = changed_files
1075 .iter()
1076 .map(std::string::String::as_str)
1077 .collect();
1078 let mut new_embeddings: Vec<(usize, Vec<f32>)> = Vec::new();
1079 for (i, c) in index.chunks.iter().enumerate() {
1080 if !changed_set.contains(c.file_path.as_str()) {
1081 continue;
1082 }
1083 let emb = engine
1084 .embed(&c.content)
1085 .map_err(|e| format!("embed failed for {}: {e}", c.file_path))?;
1086 new_embeddings.push((i, emb));
1087 }
1088 embed_idx.update(&index.chunks, &new_embeddings, &changed_files);
1089 embed_idx
1090 .save(root)
1091 .map_err(|e| format!("save embeddings failed: {e}"))?;
1092 }
1093
1094 if let Some(aligned) = embed_idx.get_aligned_embeddings(&index.chunks) {
1095 let coverage = embed_idx.coverage(index.chunks.len());
1096 return Ok((aligned, coverage, changed_files));
1097 }
1098
1099 let mut all_files: Vec<String> = index.chunks.iter().map(|c| c.file_path.clone()).collect();
1101 all_files.sort();
1102 all_files.dedup();
1103
1104 let mut new_embeddings: Vec<(usize, Vec<f32>)> = Vec::with_capacity(index.chunks.len());
1105 for (i, c) in index.chunks.iter().enumerate() {
1106 let emb = engine
1107 .embed(&c.content)
1108 .map_err(|e| format!("embed failed for {}: {e}", c.file_path))?;
1109 new_embeddings.push((i, emb));
1110 }
1111
1112 embed_idx.update(&index.chunks, &new_embeddings, &all_files);
1113 embed_idx
1114 .save(root)
1115 .map_err(|e| format!("save embeddings failed: {e}"))?;
1116
1117 let aligned = embed_idx
1118 .get_aligned_embeddings(&index.chunks)
1119 .ok_or_else(|| "embedding alignment failed after full rebuild".to_string())?;
1120 let coverage = embed_idx.coverage(index.chunks.len());
1121 Ok((aligned, coverage, all_files))
1122}
1123
1124struct SearchFilter {
1125 allowed_exts: Option<HashSet<String>>,
1126 path_glob: Option<glob::Pattern>,
1127}
1128
1129impl SearchFilter {
1130 fn new(languages: Option<&[String]>, path_glob: Option<&str>) -> Result<Self, String> {
1131 let allowed_exts = languages.map(normalize_languages);
1132 let path_glob = match path_glob {
1133 None => None,
1134 Some(s) if s.trim().is_empty() => None,
1135 Some(s) => Some(glob::Pattern::new(s).map_err(|e| e.msg.to_string())?),
1136 };
1137 Ok(Self {
1138 allowed_exts,
1139 path_glob,
1140 })
1141 }
1142
1143 fn is_active(&self) -> bool {
1144 self.allowed_exts.is_some() || self.path_glob.is_some()
1145 }
1146
1147 fn matches(&self, rel_path: &str) -> bool {
1148 let rel_path = rel_path.replace('\\', "/");
1149 if let Some(p) = &self.path_glob {
1150 if !p.matches(&rel_path) {
1151 return false;
1152 }
1153 }
1154 if let Some(exts) = &self.allowed_exts {
1155 let ext = Path::new(&rel_path)
1156 .extension()
1157 .and_then(|e| e.to_str())
1158 .unwrap_or("")
1159 .to_lowercase();
1160 if ext.is_empty() || !exts.contains(&ext) {
1161 return false;
1162 }
1163 }
1164 true
1165 }
1166}
1167
1168fn normalize_languages(langs: &[String]) -> HashSet<String> {
1169 let mut out = HashSet::new();
1170 for l in langs {
1171 let raw = l.trim().trim_start_matches('.').to_lowercase();
1172 match raw.as_str() {
1173 "rust" | "rs" => {
1174 out.insert("rs".to_string());
1175 }
1176 "ts" | "typescript" => {
1177 out.insert("ts".to_string());
1178 out.insert("tsx".to_string());
1179 }
1180 "js" | "javascript" => {
1181 out.insert("js".to_string());
1182 out.insert("jsx".to_string());
1183 out.insert("mjs".to_string());
1184 out.insert("cjs".to_string());
1185 }
1186 "py" | "python" => {
1187 out.insert("py".to_string());
1188 }
1189 "go" => {
1190 out.insert("go".to_string());
1191 }
1192 "java" => {
1193 out.insert("java".to_string());
1194 }
1195 "ruby" | "rb" => {
1196 out.insert("rb".to_string());
1197 }
1198 "php" => {
1199 out.insert("php".to_string());
1200 }
1201 "c" => {
1202 out.insert("c".to_string());
1203 out.insert("h".to_string());
1204 }
1205 "cpp" | "c++" | "cc" => {
1206 out.insert("cpp".to_string());
1207 out.insert("hpp".to_string());
1208 out.insert("cc".to_string());
1209 out.insert("hh".to_string());
1210 }
1211 "cs" | "csharp" => {
1212 out.insert("cs".to_string());
1213 }
1214 "swift" => {
1215 out.insert("swift".to_string());
1216 }
1217 "kt" | "kotlin" => {
1218 out.insert("kt".to_string());
1219 out.insert("kts".to_string());
1220 }
1221 "json" => {
1222 out.insert("json".to_string());
1223 }
1224 "yaml" | "yml" => {
1225 out.insert("yaml".to_string());
1226 out.insert("yml".to_string());
1227 }
1228 other if !other.is_empty() => {
1229 out.insert(other.to_string());
1230 }
1231 _ => {}
1232 }
1233 }
1234 out
1235}
1236
1237#[cfg(feature = "embeddings")]
1239pub fn load_engine_and_index_pub(
1240 root: &Path,
1241) -> Result<(&'static EmbeddingEngine, EmbeddingIndex), String> {
1242 load_engine_and_index(root)
1243}
1244
1245#[cfg(feature = "embeddings")]
1247pub fn ensure_embeddings_for_eval(
1248 root: &Path,
1249 index: &BM25Index,
1250 engine: &EmbeddingEngine,
1251 embed_idx: &mut EmbeddingIndex,
1252) -> Result<(Vec<Vec<f32>>, f64, Vec<String>), String> {
1253 ensure_embeddings(root, index, engine, embed_idx)
1254}
1255
1256pub fn boost_with_splade_pub(
1258 results: &mut [HybridResult],
1259 splade: &[crate::core::splade_retrieval::SpladeResult],
1260 weight: f64,
1261) {
1262 boost_with_splade(results, splade, weight);
1263}
1264
1265#[cfg(test)]
1266mod filter_tests {
1267 use super::*;
1268
1269 #[test]
1270 fn filter_language_rust() {
1271 let f = SearchFilter::new(Some(&["rust".into()]), None).unwrap();
1272 assert!(f.matches("src/main.rs"));
1273 assert!(!f.matches("src/main.ts"));
1274 }
1275
1276 #[test]
1277 fn filter_path_glob() {
1278 let f = SearchFilter::new(None, Some("rust/src/**")).unwrap();
1279 assert!(f.matches("rust/src/core/mod.rs"));
1280 assert!(!f.matches("website/src/pages/index.astro"));
1281 }
1282}
1283
1284#[cfg(test)]
1285mod determinism_tests {
1286 use super::*;
1287
1288 #[test]
1289 fn rrf_merge_hybrid_is_deterministic_on_ties() {
1290 let a = HybridResult {
1291 file_path: "a.rs".to_string(),
1292 symbol_name: "foo".to_string(),
1293 kind: crate::core::bm25_index::ChunkKind::Function,
1294 start_line: 1,
1295 end_line: 1,
1296 snippet: "a".to_string(),
1297 rrf_score: 0.0,
1298 bm25_score: None,
1299 dense_score: None,
1300 bm25_rank: None,
1301 dense_rank: None,
1302 };
1303 let b = HybridResult {
1304 file_path: "b.rs".to_string(),
1305 symbol_name: "foo".to_string(),
1306 kind: crate::core::bm25_index::ChunkKind::Function,
1307 start_line: 1,
1308 end_line: 1,
1309 snippet: "b".to_string(),
1310 rrf_score: 0.0,
1311 bm25_score: None,
1312 dense_score: None,
1313 bm25_rank: None,
1314 dense_rank: None,
1315 };
1316
1317 let fused = rrf_merge_hybrid(
1319 vec![
1320 ("root".to_string(), vec![a.clone(), b.clone()]),
1321 ("root".to_string(), vec![b.clone(), a.clone()]),
1322 ],
1323 10,
1324 );
1325
1326 assert_eq!(fused.len(), 2);
1327 assert_eq!(fused[0].file_path, "a.rs");
1328 assert_eq!(fused[1].file_path, "b.rs");
1329 }
1330}