1use std::collections::HashSet;
2use std::path::Path;
3
4use crate::core::bm25_index::{format_search_results, BM25Index};
5use crate::core::embedding_index::EmbeddingIndex;
6#[cfg(feature = "embeddings")]
7use crate::core::embeddings::EmbeddingEngine;
8use crate::core::hybrid_search::{format_hybrid_results, HybridConfig, HybridResult};
9use crate::tools::CrpMode;
10
11#[allow(clippy::too_many_arguments)]
13pub fn handle(
14 query: &str,
15 path: &str,
16 top_k: usize,
17 crp_mode: CrpMode,
18 languages: Option<&[String]>,
19 path_glob: Option<&str>,
20 mode: Option<&str>,
21 workspace: Option<bool>,
22 artifacts: Option<bool>,
23) -> String {
24 let root = Path::new(path);
25 if !root.exists() {
26 return format!("ERR: path does not exist: {path}");
27 }
28
29 let root = if root.is_file() {
30 root.parent().unwrap_or(root)
31 } else {
32 root
33 };
34
35 let filter = match SearchFilter::new(languages, path_glob) {
36 Ok(f) => f,
37 Err(e) => return format!("ERR: invalid filter: {e}"),
38 };
39
40 let compact = crp_mode.is_tdd();
41 let mode = mode.unwrap_or("hybrid").to_lowercase();
42 let workspace = workspace.unwrap_or(false);
43 let artifacts = artifacts.unwrap_or(false);
44
45 if artifacts {
46 return artifacts_search(query, root, top_k, compact, &filter, workspace);
47 }
48 if workspace {
49 return workspace_search(query, root, top_k, compact, &filter, &mode);
50 }
51
52 let index = match load_or_refresh_bm25(root) {
53 Bm25LoadResult::Ready(idx) => idx,
54 Bm25LoadResult::Building => {
55 return "BM25 index is being built in the background. \
56 Run ctx_semantic_search again in ~30s, or use action=reindex to wait for completion."
57 .to_string();
58 }
59 };
60 if index.doc_count == 0 {
61 return "No code files found to index.".to_string();
62 }
63
64 match mode.as_str() {
65 "bm25" => {
66 let mut results = index.search(query, filtered_candidate_k(top_k, filter.is_active()));
67 if filter.is_active() {
68 results.retain(|x| filter.matches(&x.file_path));
69 }
70 results.truncate(top_k);
71
72 let header = if compact {
73 format!(
74 "semantic_search(bm25,{top_k}) → {} results, {} chunks indexed\n",
75 results.len(),
76 index.doc_count
77 )
78 } else {
79 format!(
80 "Semantic search (BM25): \"{}\" ({} results from {} indexed chunks)\n",
81 truncate_query(query, 60),
82 results.len(),
83 index.doc_count,
84 )
85 };
86 format!("{header}{}", format_search_results(&results, compact))
87 }
88 "dense" => dense_search_mode(query, root, &index, top_k, compact, &filter),
89 _ => hybrid_search_mode(query, root, &index, top_k, compact, &filter),
90 }
91}
92
93pub fn handle_reindex(path: &str) -> String {
95 let root = Path::new(path);
96 if !root.exists() {
97 return format!("ERR: path does not exist: {path}");
98 }
99 let root = if root.is_file() {
100 root.parent().unwrap_or(root)
101 } else {
102 root
103 };
104
105 let idx = BM25Index::build_from_directory(root);
106 let files = idx.files.len();
107 let chunks = idx.doc_count;
108 let _ = idx.save(root);
109
110 format!("Reindexed {path}: {files} files, {chunks} chunks")
111}
112
113pub fn handle_reindex_artifacts(path: &str, workspace: bool) -> String {
114 let root = Path::new(path);
115 if !root.exists() {
116 return format!("ERR: path does not exist: {path}");
117 }
118 let root = if root.is_file() {
119 root.parent().unwrap_or(root)
120 } else {
121 root
122 };
123
124 let mut roots: Vec<std::path::PathBuf> = vec![root.to_path_buf()];
125 let mut warnings: Vec<String> = Vec::new();
126
127 if workspace {
128 let linked = crate::core::workspace_config::load_linked_projects(root);
129 warnings.extend(linked.warnings);
130 roots.extend(linked.roots);
131 }
132
133 let mut total_files = 0usize;
134 let mut total_chunks = 0usize;
135 for r in roots {
136 let (idx, w) = crate::core::artifact_index::rebuild_from_scratch(&r);
137 warnings.extend(w);
138 total_files += idx.files.len();
139 total_chunks += idx.doc_count;
140 }
141
142 if warnings.is_empty() {
143 format!("Reindexed artifacts: {total_files} files, {total_chunks} chunks")
144 } else {
145 format!(
146 "Reindexed artifacts: {total_files} files, {total_chunks} chunks ({} warning(s))",
147 warnings.len()
148 )
149 }
150}
151
152pub fn handle_find_related(
157 file_path: &str,
158 line: usize,
159 project_root: &str,
160 top_k: usize,
161 crp_mode: CrpMode,
162) -> String {
163 let root = Path::new(project_root);
164 if !root.exists() {
165 return format!("ERR: path does not exist: {project_root}");
166 }
167
168 let index = BM25Index::load_or_build(root);
169 if index.doc_count == 0 {
170 return "ERR: empty index. Try action=reindex first.".to_string();
171 }
172
173 let source_chunk = index
174 .chunks
175 .iter()
176 .find(|c| c.file_path == file_path && c.start_line <= line && c.end_line >= line);
177
178 let Some(source_chunk) = source_chunk else {
179 return format!(
180 "ERR: no indexed chunk found at {file_path}:{line}. Try action=reindex first."
181 );
182 };
183
184 let query_text = source_chunk.content.clone();
185 let source_file = source_chunk.file_path.clone();
186 let source_start = source_chunk.start_line;
187
188 let compact = crp_mode != CrpMode::Off;
189
190 let results = find_related_internal(&query_text, root, &index, top_k + 5, compact);
191
192 let mut lines: Vec<String> = results
193 .into_iter()
194 .filter(|l| !l.contains(&format!("{source_file}:{source_start}-")))
195 .take(top_k)
196 .collect();
197
198 let header = if compact {
199 format!(
200 "find_related({file_path}:{line}) → {} results\n",
201 lines.len()
202 )
203 } else {
204 format!("Find related to {file_path}:{line} (semantic similarity)\n")
205 };
206
207 lines.insert(0, header);
208 lines.join("")
209}
210
211fn find_related_internal(
212 query: &str,
213 root: &Path,
214 index: &BM25Index,
215 top_k: usize,
216 compact: bool,
217) -> Vec<String> {
218 let Ok(filter) = SearchFilter::new(None, None) else {
219 return vec!["ERR: filter init failed\n".to_string()];
220 };
221 let output = hybrid_search_mode(query, root, index, top_k, compact, &filter);
222 output.lines().map(|l| format!("{l}\n")).collect()
223}
224
225fn truncate_query(q: &str, max: usize) -> &str {
226 if q.len() <= max {
227 return q;
228 }
229 match q.char_indices().nth(max) {
230 Some((byte_idx, _)) => &q[..byte_idx],
231 None => q,
232 }
233}
234
235std::thread_local! {
236 static BM25_SHARED_CACHE: std::cell::RefCell<Option<crate::core::bm25_cache::SharedBm25Cache>> =
237 const { std::cell::RefCell::new(None) };
238}
239
240pub fn set_thread_cache(cache: crate::core::bm25_cache::SharedBm25Cache) {
242 BM25_SHARED_CACHE.with(|c| {
243 *c.borrow_mut() = Some(cache);
244 });
245}
246
247pub(crate) enum Bm25LoadResult {
249 Ready(std::sync::Arc<BM25Index>),
250 Building,
251}
252
253fn load_or_refresh_bm25(root: &Path) -> Bm25LoadResult {
254 let cached = BM25_SHARED_CACHE.with(|c| {
255 let borrow = c.borrow();
256 borrow
257 .as_ref()
258 .and_then(|cache| crate::core::bm25_cache::get_or_background(cache, root))
259 });
260 if let Some(idx) = cached {
261 return Bm25LoadResult::Ready(idx);
262 }
263
264 let root_str = root.to_string_lossy().to_string();
265
266 if let Some(idx) = crate::core::index_orchestrator::try_load_bm25_index(&root_str) {
267 let idx = std::sync::Arc::new(idx);
268 store_in_thread_cache(root, &idx);
269 return Bm25LoadResult::Ready(idx);
270 }
271
272 if crate::core::index_orchestrator::is_building() {
273 return Bm25LoadResult::Building;
274 }
275
276 crate::core::index_orchestrator::ensure_all_background(&root_str);
277
278 let idx = std::sync::Arc::new(BM25Index::load_or_build(root));
279 store_in_thread_cache(root, &idx);
280 Bm25LoadResult::Ready(idx)
281}
282
283fn store_in_thread_cache(root: &Path, idx: &std::sync::Arc<BM25Index>) {
284 BM25_SHARED_CACHE.with(|c| {
285 let borrow = c.borrow();
286 if let Some(cache) = borrow.as_ref() {
287 let mut guard = cache
288 .lock()
289 .unwrap_or_else(std::sync::PoisonError::into_inner);
290 *guard = Some(crate::core::bm25_cache::Bm25CacheEntry {
291 root: root.to_path_buf(),
292 index: std::sync::Arc::clone(idx),
293 loaded_at: std::time::Instant::now(),
294 });
295 }
296 });
297}
298
299fn filtered_candidate_k(top_k: usize, filtered: bool) -> usize {
300 if !filtered {
301 return top_k;
302 }
303 let candidates = (top_k.max(10)).saturating_mul(10);
304 candidates.clamp(50, 500)
305}
306
307const WORKSPACE_RRF_K: f64 = 60.0;
308
309fn artifacts_search(
310 query: &str,
311 root: &Path,
312 top_k: usize,
313 compact: bool,
314 filter: &SearchFilter,
315 workspace: bool,
316) -> String {
317 let mut roots: Vec<std::path::PathBuf> = vec![root.to_path_buf()];
318 let mut warnings: Vec<String> = Vec::new();
319
320 if workspace {
321 let linked = crate::core::workspace_config::load_linked_projects(root);
322 warnings.extend(linked.warnings);
323 roots.extend(linked.roots);
324 }
325 roots.sort();
326 roots.dedup();
327
328 let mut per_project: Vec<(String, Vec<crate::core::bm25_index::SearchResult>)> = Vec::new();
329 let mut total_chunks = 0usize;
330
331 for r in &roots {
332 let label = label_for_root(r);
333 let (idx, w) = crate::core::artifact_index::load_or_build(r);
334 warnings.extend(w);
335 total_chunks += idx.doc_count;
336 if idx.doc_count == 0 {
337 continue;
338 }
339
340 let mut results = idx.search(query, filtered_candidate_k(top_k, filter.is_active()));
341 if filter.is_active() {
342 results.retain(|x| filter.matches(&x.file_path));
343 }
344 results.truncate(top_k);
345
346 for res in &mut results {
347 res.file_path = if workspace {
348 format!("[project:{label}] [artifact] {}", res.file_path)
349 } else {
350 format!("[artifact] {}", res.file_path)
351 };
352 }
353
354 per_project.push((label, results));
355 }
356
357 let mut fused: Vec<crate::core::bm25_index::SearchResult> = if per_project.len() <= 1 {
358 per_project
359 .into_iter()
360 .next()
361 .map(|(_, v)| v)
362 .unwrap_or_default()
363 } else {
364 rrf_merge_bm25(per_project, top_k)
365 };
366
367 if fused.is_empty() {
368 return "No artifact files found to index.".to_string();
369 }
370
371 fused.truncate(top_k);
372
373 let header = if compact {
374 if workspace {
375 format!(
376 "semantic_search(artifacts,workspace,{top_k}) → {} results, projects={}, {} chunks indexed\n",
377 fused.len(),
378 roots.len(),
379 total_chunks
380 )
381 } else {
382 format!(
383 "semantic_search(artifacts,{top_k}) → {} results, {} chunks indexed\n",
384 fused.len(),
385 total_chunks
386 )
387 }
388 } else if workspace {
389 format!(
390 "Semantic search (Artifacts/Workspace): \"{}\" ({} results from {} projects)\n",
391 truncate_query(query, 60),
392 fused.len(),
393 roots.len()
394 )
395 } else {
396 format!(
397 "Semantic search (Artifacts): \"{}\" ({} results)\n",
398 truncate_query(query, 60),
399 fused.len()
400 )
401 };
402
403 let mut out = format!("{header}{}", format_search_results(&fused, compact));
404 if !warnings.is_empty() && !compact {
405 out.push_str(&format!("\nWarnings ({}):\n", warnings.len()));
406 for w in warnings.iter().take(20) {
407 out.push_str(&format!("- {w}\n"));
408 }
409 }
410 out
411}
412
413fn workspace_search(
414 query: &str,
415 root: &Path,
416 top_k: usize,
417 compact: bool,
418 filter: &SearchFilter,
419 mode: &str,
420) -> String {
421 let linked = crate::core::workspace_config::load_linked_projects(root);
422 let mut warnings = linked.warnings;
423
424 let mut roots: Vec<std::path::PathBuf> = vec![root.to_path_buf()];
425 roots.extend(linked.roots);
426 roots.sort();
427 roots.dedup();
428
429 let mut per_project: Vec<(String, Vec<HybridResult>)> = Vec::new();
430 let mut avg_cov: Option<f64> = None;
431 let mut cov_count = 0usize;
432
433 for r in &roots {
434 let label = label_for_root(r);
435 let index = BM25Index::load_or_build(r);
436 if index.doc_count == 0 {
437 continue;
438 }
439
440 let mut results: Vec<HybridResult> = match mode {
441 "bm25" => {
442 let mut bm25 = index.search(query, filtered_candidate_k(top_k, filter.is_active()));
443 if filter.is_active() {
444 bm25.retain(|x| filter.matches(&x.file_path));
445 }
446 bm25.truncate(top_k);
447 bm25.into_iter()
448 .map(HybridResult::from_bm25_public)
449 .collect()
450 }
451 "dense" => {
452 #[cfg(feature = "embeddings")]
453 {
454 match dense_results_for_root(query, r, &index, top_k, filter) {
455 Ok((v, cov)) => {
456 avg_cov = Some(avg_cov.unwrap_or(0.0) + cov);
457 cov_count += 1;
458 v
459 }
460 Err(e) => {
461 warnings.push(format!("[{label}] dense search failed: {e}"));
462 let mut bm25 = index
463 .search(query, filtered_candidate_k(top_k, filter.is_active()));
464 if filter.is_active() {
465 bm25.retain(|x| filter.matches(&x.file_path));
466 }
467 bm25.truncate(top_k);
468 bm25.into_iter()
469 .map(HybridResult::from_bm25_public)
470 .collect()
471 }
472 }
473 }
474 #[cfg(not(feature = "embeddings"))]
475 {
476 let _ = (&label, &warnings);
477 let mut bm25 =
478 index.search(query, filtered_candidate_k(top_k, filter.is_active()));
479 if filter.is_active() {
480 bm25.retain(|x| filter.matches(&x.file_path));
481 }
482 bm25.truncate(top_k);
483 bm25.into_iter()
484 .map(HybridResult::from_bm25_public)
485 .collect()
486 }
487 }
488 _ => {
489 #[cfg(feature = "embeddings")]
490 {
491 match hybrid_results_for_root(query, r, &index, top_k, filter) {
492 Ok((v, cov)) => {
493 avg_cov = Some(avg_cov.unwrap_or(0.0) + cov);
494 cov_count += 1;
495 v
496 }
497 Err(e) => {
498 warnings.push(format!("[{label}] hybrid search failed: {e}"));
499 let mut bm25 = index
500 .search(query, filtered_candidate_k(top_k, filter.is_active()));
501 if filter.is_active() {
502 bm25.retain(|x| filter.matches(&x.file_path));
503 }
504 bm25.truncate(top_k);
505 bm25.into_iter()
506 .map(HybridResult::from_bm25_public)
507 .collect()
508 }
509 }
510 }
511 #[cfg(not(feature = "embeddings"))]
512 {
513 let _ = (&label, &warnings);
514 let mut bm25 =
515 index.search(query, filtered_candidate_k(top_k, filter.is_active()));
516 if filter.is_active() {
517 bm25.retain(|x| filter.matches(&x.file_path));
518 }
519 bm25.truncate(top_k);
520 bm25.into_iter()
521 .map(HybridResult::from_bm25_public)
522 .collect()
523 }
524 }
525 };
526
527 for res in &mut results {
528 res.file_path = format!("[project:{label}] {}", res.file_path);
529 }
530 per_project.push((label, results));
531 }
532
533 let mut fused: Vec<HybridResult> = if per_project.len() <= 1 {
534 per_project
535 .into_iter()
536 .next()
537 .map(|(_, v)| v)
538 .unwrap_or_default()
539 } else {
540 rrf_merge_hybrid(per_project, top_k)
541 };
542
543 if fused.is_empty() {
544 return "No code files found to index.".to_string();
545 }
546
547 fused.truncate(top_k);
548 let cov = avg_cov.and_then(|s| {
549 if cov_count == 0 {
550 None
551 } else {
552 Some(s / cov_count as f64)
553 }
554 });
555
556 let header = if compact {
557 match (mode, cov) {
558 (_, Some(c)) => format!(
559 "semantic_search(workspace,{mode},{top_k}) → {} results, projects={}, embed_cov={:.0}%\n",
560 fused.len(),
561 roots.len(),
562 c * 100.0
563 ),
564 _ => format!(
565 "semantic_search(workspace,{mode},{top_k}) → {} results, projects={}\n",
566 fused.len(),
567 roots.len()
568 ),
569 }
570 } else {
571 format!(
572 "Workspace semantic search ({mode}): \"{}\" ({} results from {} projects)\n",
573 truncate_query(query, 60),
574 fused.len(),
575 roots.len()
576 )
577 };
578
579 let mut out = format!("{header}{}", format_hybrid_results(&fused, compact));
580 if !warnings.is_empty() && !compact {
581 out.push_str(&format!("\nWarnings ({}):\n", warnings.len()));
582 for w in warnings.iter().take(20) {
583 out.push_str(&format!("- {w}\n"));
584 }
585 }
586 out
587}
588
589fn rrf_merge_hybrid(lists: Vec<(String, Vec<HybridResult>)>, top_k: usize) -> Vec<HybridResult> {
590 use std::collections::HashMap;
591
592 let mut acc: HashMap<String, (HybridResult, f64)> = HashMap::new();
593 for (label, results) in lists {
594 for (rank, r) in results.into_iter().enumerate() {
595 let key = format!(
596 "{label}|{}|{}|{}|{}",
597 r.file_path, r.symbol_name, r.start_line, r.end_line
598 );
599 let rrf = 1.0 / (WORKSPACE_RRF_K + (rank as f64) + 1.0);
600 acc.entry(key)
601 .and_modify(|(_, s)| *s += rrf)
602 .or_insert((r, rrf));
603 }
604 }
605
606 let mut out: Vec<HybridResult> = acc
607 .into_values()
608 .map(|(mut r, s)| {
609 r.rrf_score = s;
610 r
611 })
612 .collect();
613 out.sort_by(|a, b| {
614 b.rrf_score
615 .partial_cmp(&a.rrf_score)
616 .unwrap_or(std::cmp::Ordering::Equal)
617 .then_with(|| a.file_path.cmp(&b.file_path))
618 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
619 .then_with(|| a.start_line.cmp(&b.start_line))
620 .then_with(|| a.end_line.cmp(&b.end_line))
621 });
622 out.truncate(top_k);
623 out
624}
625
626fn rrf_merge_bm25(
627 lists: Vec<(String, Vec<crate::core::bm25_index::SearchResult>)>,
628 top_k: usize,
629) -> Vec<crate::core::bm25_index::SearchResult> {
630 use std::collections::HashMap;
631
632 let mut acc: HashMap<String, (crate::core::bm25_index::SearchResult, f64)> = HashMap::new();
633 for (label, results) in lists {
634 for (rank, r) in results.into_iter().enumerate() {
635 let key = format!(
636 "{label}|{}|{}|{}|{}",
637 r.file_path, r.symbol_name, r.start_line, r.end_line
638 );
639 let rrf = 1.0 / (WORKSPACE_RRF_K + (rank as f64) + 1.0);
640 acc.entry(key)
641 .and_modify(|(_, s)| *s += rrf)
642 .or_insert((r, rrf));
643 }
644 }
645
646 let mut out: Vec<crate::core::bm25_index::SearchResult> = acc
647 .into_values()
648 .map(|(mut r, s)| {
649 r.score = s;
650 r
651 })
652 .collect();
653 out.sort_by(|a, b| {
654 b.score
655 .partial_cmp(&a.score)
656 .unwrap_or(std::cmp::Ordering::Equal)
657 .then_with(|| a.file_path.cmp(&b.file_path))
658 .then_with(|| a.symbol_name.cmp(&b.symbol_name))
659 .then_with(|| a.start_line.cmp(&b.start_line))
660 .then_with(|| a.end_line.cmp(&b.end_line))
661 });
662 out.truncate(top_k);
663 out
664}
665
666#[cfg(feature = "embeddings")]
667fn dense_results_for_root(
668 query: &str,
669 root: &Path,
670 index: &BM25Index,
671 top_k: usize,
672 filter: &SearchFilter,
673) -> Result<(Vec<HybridResult>, f64), String> {
674 let (engine, mut embed_idx) = load_engine_and_index(root)?;
675 let (aligned, coverage, changed_files) =
676 ensure_embeddings(root, index, engine, &mut embed_idx)?;
677
678 let backend = crate::core::dense_backend::DenseBackendKind::try_from_env()?;
679 let filter_fn = |p: &str| filter.matches(p);
680 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
681 .is_active()
682 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
683
684 let candidate_k = filtered_candidate_k(top_k, filter.is_active());
685 let mut results = crate::core::dense_backend::dense_results_as_hybrid(
686 backend,
687 root,
688 index,
689 engine,
690 &aligned,
691 &changed_files,
692 query,
693 candidate_k,
694 filter_pred,
695 )?;
696 results.truncate(top_k);
697
698 Ok((results, coverage))
699}
700
701#[cfg(feature = "embeddings")]
702fn hybrid_results_for_root(
703 query: &str,
704 root: &Path,
705 index: &BM25Index,
706 top_k: usize,
707 filter: &SearchFilter,
708) -> Result<(Vec<HybridResult>, f64), String> {
709 let (engine, mut embed_idx) = load_engine_and_index(root)?;
710 let (aligned, coverage, changed_files) =
711 ensure_embeddings(root, index, engine, &mut embed_idx)?;
712
713 let backend = crate::core::dense_backend::DenseBackendKind::try_from_env()?;
714 let cfg = HybridConfig::default();
715 let filter_fn = |p: &str| filter.matches(p);
716 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
717 .is_active()
718 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
719 let candidate_k = filtered_candidate_k(top_k, filter.is_active());
720 let graph_ranks = graph_rrf_ranks_for_search_root(root);
721 let graph_ranks_ref = graph_ranks.as_ref();
722 let mut results = crate::core::dense_backend::hybrid_results(
723 backend,
724 root,
725 index,
726 engine,
727 &aligned,
728 &changed_files,
729 query,
730 candidate_k,
731 &cfg,
732 filter_pred,
733 graph_ranks_ref,
734 )?;
735 results.truncate(top_k);
736 Ok((results, coverage))
737}
738
739fn label_for_root(root: &Path) -> String {
740 root.file_name()
741 .and_then(|s| s.to_str())
742 .map(str::to_string)
743 .filter(|s| !s.is_empty())
744 .unwrap_or_else(|| root.to_string_lossy().to_string())
745}
746
747fn graph_rrf_ranks_for_search_root(
748 root: &Path,
749) -> Option<std::collections::HashMap<String, usize>> {
750 let root_s = root.to_string_lossy().to_string();
751 let session = crate::core::session::SessionState::load_latest_for_project_root(&root_s)?;
752
753 if session.files_touched.is_empty() {
754 return None;
755 }
756
757 let recent: Vec<String> = session
758 .files_touched
759 .iter()
760 .rev()
761 .filter(|f| path_under_search_root(&f.path, root))
762 .take(12)
763 .map(|f| f.path.clone())
764 .collect();
765
766 if recent.is_empty() {
767 return None;
768 }
769
770 crate::core::graph_context::graph_neighbor_ranks_for_recent_files(&root_s, &recent, 40, 120)
771}
772
773fn path_under_search_root(path: &str, root: &Path) -> bool {
774 let p = std::path::Path::new(path);
775 if p.is_absolute() {
776 let root_norm = crate::core::pathutil::safe_canonicalize_or_self(root);
777 let path_norm = crate::core::pathutil::safe_canonicalize_or_self(p);
778 path_norm.starts_with(&root_norm)
779 } else {
780 true
781 }
782}
783
784fn hybrid_search_mode(
785 query: &str,
786 root: &Path,
787 index: &BM25Index,
788 top_k: usize,
789 compact: bool,
790 filter: &SearchFilter,
791) -> String {
792 #[cfg(feature = "embeddings")]
793 {
794 let (engine, mut embed_idx) = match load_engine_and_index(root) {
795 Ok(v) => v,
796 Err(e) => return format!("ERR: {e}"),
797 };
798
799 let (aligned, coverage, changed_files) =
800 match ensure_embeddings(root, index, engine, &mut embed_idx) {
801 Ok(v) => v,
802 Err(e) => return format!("ERR: {e}"),
803 };
804
805 let backend = match crate::core::dense_backend::DenseBackendKind::try_from_env() {
806 Ok(v) => v,
807 Err(e) => return format!("ERR: {e}"),
808 };
809
810 let cfg = HybridConfig::default();
811 let filter_fn = |p: &str| filter.matches(p);
812 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
813 .is_active()
814 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
815 let graph_ranks = graph_rrf_ranks_for_search_root(root);
816 let graph_ranks_ref = graph_ranks.as_ref();
817 let mut results = match crate::core::dense_backend::hybrid_results(
818 backend,
819 root,
820 index,
821 engine,
822 &aligned,
823 &changed_files,
824 query,
825 top_k,
826 &cfg,
827 filter_pred,
828 graph_ranks_ref,
829 ) {
830 Ok(v) => v,
831 Err(e) => return format!("ERR: {e}"),
832 };
833 results.truncate(top_k);
834
835 let header = if compact {
836 format!(
837 "semantic_search(hybrid,{top_k}) → {} results, {} chunks, embed_cov={:.0}%\n",
838 results.len(),
839 index.doc_count,
840 coverage * 100.0
841 )
842 } else {
843 format!(
844 "Semantic search (Hybrid): \"{}\" ({} results from {} indexed chunks, embeddings coverage {:.0}%)\n",
845 truncate_query(query, 60),
846 results.len(),
847 index.doc_count,
848 coverage * 100.0
849 )
850 };
851
852 format!("{header}{}", format_hybrid_results(&results, compact))
853 }
854 #[cfg(not(feature = "embeddings"))]
855 {
856 let mut results = index.search(query, filtered_candidate_k(top_k, filter.is_active()));
857 if filter.is_active() {
858 results.retain(|x| filter.matches(&x.file_path));
859 }
860
861 if let Some(graph_ranks) = graph_rrf_ranks_for_search_root(root) {
862 const GRAPH_RRF_K: f64 = 60.0;
863 for r in &mut results {
864 if let Some(&rank) = graph_ranks.get(&r.file_path) {
865 r.score += 1.0 / (GRAPH_RRF_K + rank as f64 + 1.0);
866 }
867 }
868 results.sort_by(|a, b| {
869 b.score
870 .partial_cmp(&a.score)
871 .unwrap_or(std::cmp::Ordering::Equal)
872 });
873 }
874
875 results.truncate(top_k);
876 let graph_tag = if graph_rrf_ranks_for_search_root(root).is_some() {
877 "+graph"
878 } else {
879 ""
880 };
881 let header = if compact {
882 format!(
883 "semantic_search(bm25{graph_tag},{top_k}) → {} results, {} chunks indexed\n",
884 results.len(),
885 index.doc_count
886 )
887 } else {
888 format!(
889 "Semantic search (BM25{graph_tag}): \"{}\" ({} results from {} indexed chunks)\n",
890 truncate_query(query, 60),
891 results.len(),
892 index.doc_count,
893 )
894 };
895 format!("{header}{}", format_search_results(&results, compact))
896 }
897}
898
899fn dense_search_mode(
900 query: &str,
901 root: &Path,
902 index: &BM25Index,
903 top_k: usize,
904 compact: bool,
905 filter: &SearchFilter,
906) -> String {
907 #[cfg(feature = "embeddings")]
908 {
909 let (engine, mut embed_idx) = match load_engine_and_index(root) {
910 Ok(v) => v,
911 Err(e) => return format!("ERR: {e}"),
912 };
913
914 let (aligned, coverage, changed_files) =
915 match ensure_embeddings(root, index, engine, &mut embed_idx) {
916 Ok(v) => v,
917 Err(e) => return format!("ERR: {e}"),
918 };
919
920 let backend = match crate::core::dense_backend::DenseBackendKind::try_from_env() {
921 Ok(v) => v,
922 Err(e) => return format!("ERR: {e}"),
923 };
924
925 let filter_fn = |p: &str| filter.matches(p);
926 let filter_pred: Option<&dyn Fn(&str) -> bool> = filter
927 .is_active()
928 .then_some(&filter_fn as &dyn Fn(&str) -> bool);
929
930 let candidate_k = filtered_candidate_k(top_k, filter.is_active());
931 let mut results = match crate::core::dense_backend::dense_results_as_hybrid(
932 backend,
933 root,
934 index,
935 engine,
936 &aligned,
937 &changed_files,
938 query,
939 candidate_k,
940 filter_pred,
941 ) {
942 Ok(v) => v,
943 Err(e) => return format!("ERR: {e}"),
944 };
945 results.truncate(top_k);
946
947 let header = if compact {
948 format!(
949 "semantic_search(dense,{top_k}) → {} results, {} chunks, embed_cov={:.0}%\n",
950 results.len(),
951 index.doc_count,
952 coverage * 100.0
953 )
954 } else {
955 format!(
956 "Semantic search (Dense): \"{}\" ({} results from {} indexed chunks, embeddings coverage {:.0}%)\n",
957 truncate_query(query, 60),
958 results.len(),
959 index.doc_count,
960 coverage * 100.0
961 )
962 };
963
964 format!("{header}{}", format_hybrid_results(&results, compact))
965 }
966 #[cfg(not(feature = "embeddings"))]
967 {
968 "ERR: embeddings feature not enabled".to_string()
969 }
970}
971
972#[cfg(feature = "embeddings")]
973fn load_engine_and_index(
974 root: &Path,
975) -> Result<(&'static EmbeddingEngine, EmbeddingIndex), String> {
976 let cfg = crate::core::config::Config::load();
977 let profile = crate::core::config::MemoryProfile::effective(&cfg);
978 if !profile.embeddings_enabled() {
979 return Err("embeddings disabled by memory_profile=low".into());
980 }
981
982 let engine = crate::core::embeddings::shared_engine()
983 .ok_or_else(|| "embedding engine load failed".to_string())?;
984
985 let mut idx =
986 EmbeddingIndex::load(root).unwrap_or_else(|| EmbeddingIndex::new(engine.dimensions()));
987 if idx.dimensions != engine.dimensions() {
988 idx = EmbeddingIndex::new(engine.dimensions());
989 }
990 Ok((engine, idx))
991}
992
993#[cfg(feature = "embeddings")]
994fn ensure_embeddings(
995 root: &Path,
996 index: &BM25Index,
997 engine: &EmbeddingEngine,
998 embed_idx: &mut EmbeddingIndex,
999) -> Result<(Vec<Vec<f32>>, f64, Vec<String>), String> {
1000 let mut changed_files = embed_idx.files_needing_update(&index.chunks);
1001 changed_files.sort();
1002 changed_files.dedup();
1003
1004 if !changed_files.is_empty() {
1005 let changed_set: std::collections::HashSet<&str> = changed_files
1006 .iter()
1007 .map(std::string::String::as_str)
1008 .collect();
1009 let mut new_embeddings: Vec<(usize, Vec<f32>)> = Vec::new();
1010 for (i, c) in index.chunks.iter().enumerate() {
1011 if !changed_set.contains(c.file_path.as_str()) {
1012 continue;
1013 }
1014 let emb = engine
1015 .embed(&c.content)
1016 .map_err(|e| format!("embed failed for {}: {e}", c.file_path))?;
1017 new_embeddings.push((i, emb));
1018 }
1019 embed_idx.update(&index.chunks, &new_embeddings, &changed_files);
1020 embed_idx
1021 .save(root)
1022 .map_err(|e| format!("save embeddings failed: {e}"))?;
1023 }
1024
1025 if let Some(aligned) = embed_idx.get_aligned_embeddings(&index.chunks) {
1026 let coverage = embed_idx.coverage(index.chunks.len());
1027 return Ok((aligned, coverage, changed_files));
1028 }
1029
1030 let mut all_files: Vec<String> = index.chunks.iter().map(|c| c.file_path.clone()).collect();
1032 all_files.sort();
1033 all_files.dedup();
1034
1035 let mut new_embeddings: Vec<(usize, Vec<f32>)> = Vec::with_capacity(index.chunks.len());
1036 for (i, c) in index.chunks.iter().enumerate() {
1037 let emb = engine
1038 .embed(&c.content)
1039 .map_err(|e| format!("embed failed for {}: {e}", c.file_path))?;
1040 new_embeddings.push((i, emb));
1041 }
1042
1043 embed_idx.update(&index.chunks, &new_embeddings, &all_files);
1044 embed_idx
1045 .save(root)
1046 .map_err(|e| format!("save embeddings failed: {e}"))?;
1047
1048 let aligned = embed_idx
1049 .get_aligned_embeddings(&index.chunks)
1050 .ok_or_else(|| "embedding alignment failed after full rebuild".to_string())?;
1051 let coverage = embed_idx.coverage(index.chunks.len());
1052 Ok((aligned, coverage, all_files))
1053}
1054
1055struct SearchFilter {
1056 allowed_exts: Option<HashSet<String>>,
1057 path_glob: Option<glob::Pattern>,
1058}
1059
1060impl SearchFilter {
1061 fn new(languages: Option<&[String]>, path_glob: Option<&str>) -> Result<Self, String> {
1062 let allowed_exts = languages.map(normalize_languages);
1063 let path_glob = match path_glob {
1064 None => None,
1065 Some(s) if s.trim().is_empty() => None,
1066 Some(s) => Some(glob::Pattern::new(s).map_err(|e| e.msg.to_string())?),
1067 };
1068 Ok(Self {
1069 allowed_exts,
1070 path_glob,
1071 })
1072 }
1073
1074 fn is_active(&self) -> bool {
1075 self.allowed_exts.is_some() || self.path_glob.is_some()
1076 }
1077
1078 fn matches(&self, rel_path: &str) -> bool {
1079 let rel_path = rel_path.replace('\\', "/");
1080 if let Some(p) = &self.path_glob {
1081 if !p.matches(&rel_path) {
1082 return false;
1083 }
1084 }
1085 if let Some(exts) = &self.allowed_exts {
1086 let ext = Path::new(&rel_path)
1087 .extension()
1088 .and_then(|e| e.to_str())
1089 .unwrap_or("")
1090 .to_lowercase();
1091 if ext.is_empty() || !exts.contains(&ext) {
1092 return false;
1093 }
1094 }
1095 true
1096 }
1097}
1098
1099fn normalize_languages(langs: &[String]) -> HashSet<String> {
1100 let mut out = HashSet::new();
1101 for l in langs {
1102 let raw = l.trim().trim_start_matches('.').to_lowercase();
1103 match raw.as_str() {
1104 "rust" | "rs" => {
1105 out.insert("rs".to_string());
1106 }
1107 "ts" | "typescript" => {
1108 out.insert("ts".to_string());
1109 out.insert("tsx".to_string());
1110 }
1111 "js" | "javascript" => {
1112 out.insert("js".to_string());
1113 out.insert("jsx".to_string());
1114 out.insert("mjs".to_string());
1115 out.insert("cjs".to_string());
1116 }
1117 "py" | "python" => {
1118 out.insert("py".to_string());
1119 }
1120 "go" => {
1121 out.insert("go".to_string());
1122 }
1123 "java" => {
1124 out.insert("java".to_string());
1125 }
1126 "ruby" | "rb" => {
1127 out.insert("rb".to_string());
1128 }
1129 "php" => {
1130 out.insert("php".to_string());
1131 }
1132 "c" => {
1133 out.insert("c".to_string());
1134 out.insert("h".to_string());
1135 }
1136 "cpp" | "c++" | "cc" => {
1137 out.insert("cpp".to_string());
1138 out.insert("hpp".to_string());
1139 out.insert("cc".to_string());
1140 out.insert("hh".to_string());
1141 }
1142 "cs" | "csharp" => {
1143 out.insert("cs".to_string());
1144 }
1145 "swift" => {
1146 out.insert("swift".to_string());
1147 }
1148 "kt" | "kotlin" => {
1149 out.insert("kt".to_string());
1150 out.insert("kts".to_string());
1151 }
1152 "json" => {
1153 out.insert("json".to_string());
1154 }
1155 "yaml" | "yml" => {
1156 out.insert("yaml".to_string());
1157 out.insert("yml".to_string());
1158 }
1159 other if !other.is_empty() => {
1160 out.insert(other.to_string());
1161 }
1162 _ => {}
1163 }
1164 }
1165 out
1166}
1167
1168#[cfg(test)]
1169mod filter_tests {
1170 use super::*;
1171
1172 #[test]
1173 fn filter_language_rust() {
1174 let f = SearchFilter::new(Some(&["rust".into()]), None).unwrap();
1175 assert!(f.matches("src/main.rs"));
1176 assert!(!f.matches("src/main.ts"));
1177 }
1178
1179 #[test]
1180 fn filter_path_glob() {
1181 let f = SearchFilter::new(None, Some("rust/src/**")).unwrap();
1182 assert!(f.matches("rust/src/core/mod.rs"));
1183 assert!(!f.matches("website/src/pages/index.astro"));
1184 }
1185}
1186
1187#[cfg(test)]
1188mod determinism_tests {
1189 use super::*;
1190
1191 #[test]
1192 fn rrf_merge_hybrid_is_deterministic_on_ties() {
1193 let a = HybridResult {
1194 file_path: "a.rs".to_string(),
1195 symbol_name: "foo".to_string(),
1196 kind: crate::core::bm25_index::ChunkKind::Function,
1197 start_line: 1,
1198 end_line: 1,
1199 snippet: "a".to_string(),
1200 rrf_score: 0.0,
1201 bm25_score: None,
1202 dense_score: None,
1203 bm25_rank: None,
1204 dense_rank: None,
1205 };
1206 let b = HybridResult {
1207 file_path: "b.rs".to_string(),
1208 symbol_name: "foo".to_string(),
1209 kind: crate::core::bm25_index::ChunkKind::Function,
1210 start_line: 1,
1211 end_line: 1,
1212 snippet: "b".to_string(),
1213 rrf_score: 0.0,
1214 bm25_score: None,
1215 dense_score: None,
1216 bm25_rank: None,
1217 dense_rank: None,
1218 };
1219
1220 let fused = rrf_merge_hybrid(
1222 vec![
1223 ("root".to_string(), vec![a.clone(), b.clone()]),
1224 ("root".to_string(), vec![b.clone(), a.clone()]),
1225 ],
1226 10,
1227 );
1228
1229 assert_eq!(fused.len(), 2);
1230 assert_eq!(fused[0].file_path, "a.rs");
1231 assert_eq!(fused[1].file_path, "b.rs");
1232 }
1233}