1use anyhow::Result;
2use ck_ann::AnnIndex;
3use ck_core::{CkError, SearchMode, SearchOptions, SearchResult, Span};
4use globset::{Glob, GlobSet, GlobSetBuilder};
5use rayon::prelude::*;
6use regex::{Regex, RegexBuilder};
7use std::collections::HashMap;
8use std::fs;
9use std::path::PathBuf as StdPathBuf;
10use std::path::{Path, PathBuf};
11use tantivy::collector::TopDocs;
12use tantivy::query::QueryParser;
13use tantivy::schema::{STORED, Schema, TEXT, Value};
14use tantivy::{Index, ReloadPolicy, TantivyDocument, doc};
15use walkdir::WalkDir;
16
17mod semantic_v3;
18pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
19
20pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21
22async fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
24 let content = tokio::fs::read_to_string(file_path).await?;
25 let lines: Vec<&str> = content.lines().collect();
26
27 if span.line_start == 0 || span.line_start > lines.len() {
28 return Ok(String::new());
29 }
30
31 let start_idx = span.line_start - 1; let end_idx = (span.line_end - 1).min(lines.len().saturating_sub(1));
33
34 if start_idx <= end_idx {
35 Ok(lines[start_idx..=end_idx].join("\n"))
36 } else {
37 Ok(lines[start_idx].to_string())
38 }
39}
40
41fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
42 let mut current = if path.is_file() {
43 path.parent().unwrap_or(path)
44 } else {
45 path
46 };
47 loop {
48 if current.join(".ck").exists() {
49 return Some(current.to_path_buf());
50 }
51 match current.parent() {
52 Some(parent) => current = parent,
53 None => return None,
54 }
55 }
56}
57
58pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
59 search_with_progress(options, None).await
60}
61
62pub async fn search_with_progress(
63 options: &SearchOptions,
64 progress_callback: Option<SearchProgressCallback>,
65) -> Result<Vec<SearchResult>> {
66 if !options.path.exists() {
68 return Err(ck_core::CkError::Search(format!(
69 "Path does not exist: {}",
70 options.path.display()
71 ))
72 .into());
73 }
74
75 if !matches!(options.mode, SearchMode::Regex) {
77 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
78 ensure_index_updated(&options.path, options.reindex, need_embeddings).await?;
79 }
80
81 match options.mode {
82 SearchMode::Regex => regex_search(options),
83 SearchMode::Lexical => lexical_search(options).await,
84 SearchMode::Semantic => {
85 semantic_search_v3_with_progress(options, progress_callback).await
87 }
88 SearchMode::Hybrid => hybrid_search_with_progress(options, progress_callback).await,
89 }
90}
91
92fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
93 let pattern = if options.fixed_string {
94 regex::escape(&options.query)
95 } else if options.whole_word {
96 format!(r"\b{}\b", regex::escape(&options.query))
97 } else {
98 options.query.clone()
99 };
100
101 let regex = RegexBuilder::new(&pattern)
102 .case_insensitive(options.case_insensitive)
103 .build()
104 .map_err(CkError::Regex)?;
105
106 let should_recurse = options.path.is_dir() || options.recursive;
108 let files = if should_recurse {
109 ck_index::collect_files(
111 &options.path,
112 options.respect_gitignore,
113 &options.exclude_patterns,
114 )?
115 } else {
116 collect_files(&options.path, should_recurse, &options.exclude_patterns)?
118 };
119
120 let results: Vec<Vec<SearchResult>> = files
121 .par_iter()
122 .filter_map(|file_path| match search_file(®ex, file_path, options) {
123 Ok(matches) => {
124 if matches.is_empty() {
125 None
126 } else {
127 Some(matches)
128 }
129 }
130 Err(e) => {
131 tracing::debug!("Error searching {:?}: {}", file_path, e);
132 None
133 }
134 })
135 .collect();
136
137 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
138 all_results.sort_by(|a, b| {
140 let path_cmp = a.file.cmp(&b.file);
141 if path_cmp != std::cmp::Ordering::Equal {
142 return path_cmp;
143 }
144 a.span.line_start.cmp(&b.span.line_start)
145 });
146
147 if let Some(top_k) = options.top_k {
148 all_results.truncate(top_k);
149 }
150
151 Ok(all_results)
152}
153
154fn search_file(
155 regex: &Regex,
156 file_path: &Path,
157 options: &SearchOptions,
158) -> Result<Vec<SearchResult>> {
159 let content = fs::read_to_string(file_path)?;
160 let lines: Vec<&str> = content.lines().collect();
161 let mut results = Vec::new();
162
163 let code_sections = if options.full_section {
165 extract_code_sections(file_path, &content)
166 } else {
167 None
168 };
169
170 let mut byte_offset = 0;
172
173 for (line_idx, line) in lines.iter().enumerate() {
174 let line_number = line_idx + 1;
175
176 for mat in regex.find_iter(line) {
178 let preview = if options.full_section {
179 if let Some(ref sections) = code_sections {
181 if let Some(section) = find_containing_section(sections, line_idx) {
182 section.clone()
183 } else {
184 get_context_preview(&lines, line_idx, options)
186 }
187 } else {
188 get_context_preview(&lines, line_idx, options)
189 }
190 } else {
191 get_context_preview(&lines, line_idx, options)
192 };
193
194 results.push(SearchResult {
195 file: file_path.to_path_buf(),
196 span: Span {
197 byte_start: byte_offset + mat.start(),
198 byte_end: byte_offset + mat.end(),
199 line_start: line_number,
200 line_end: line_number,
201 },
202 score: 1.0,
203 preview,
204 lang: ck_core::Language::from_path(file_path),
205 symbol: None,
206 });
207 }
208
209 byte_offset += line.len();
211 if line_idx < lines.len() - 1 {
212 byte_offset += 1; }
214 }
215
216 Ok(results)
217}
218
219async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
220 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
222 if options.path.is_file() {
223 options.path.parent().unwrap_or(&options.path).to_path_buf()
224 } else {
225 options.path.clone()
226 }
227 });
228
229 let index_dir = index_root.join(".ck");
230 if !index_dir.exists() {
231 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
232 }
233
234 let tantivy_index_path = index_dir.join("tantivy_index");
235
236 if !tantivy_index_path.exists() {
237 return build_tantivy_index(options).await;
238 }
239
240 let mut schema_builder = Schema::builder();
241 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
242 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
243 let _schema = schema_builder.build();
244
245 let index = Index::open_in_dir(&tantivy_index_path)
246 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
247
248 let reader = index
249 .reader_builder()
250 .reload_policy(ReloadPolicy::OnCommitWithDelay)
251 .try_into()
252 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
253
254 let searcher = reader.searcher();
255 let query_parser = QueryParser::for_index(&index, vec![content_field]);
256
257 let query = query_parser
258 .parse_query(&options.query)
259 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
260
261 let top_docs = if let Some(top_k) = options.top_k {
262 searcher.search(&query, &TopDocs::with_limit(top_k))?
263 } else {
264 searcher.search(&query, &TopDocs::with_limit(100))?
265 };
266
267 let mut raw_results = Vec::new();
269 for (_score, doc_address) in top_docs {
270 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
271 let path_text = retrieved_doc
272 .get_first(path_field)
273 .map(|field_value| field_value.as_str().unwrap_or(""))
274 .unwrap_or("");
275 let content_text = retrieved_doc
276 .get_first(content_field)
277 .map(|field_value| field_value.as_str().unwrap_or(""))
278 .unwrap_or("");
279
280 let file_path = PathBuf::from(path_text);
281 let preview = if options.full_section {
282 content_text.to_string()
283 } else {
284 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
285 };
286
287 raw_results.push((
288 _score,
289 SearchResult {
290 file: file_path,
291 span: Span {
292 byte_start: 0,
293 byte_end: content_text.len(),
294 line_start: 1,
295 line_end: content_text.lines().count(),
296 },
297 score: _score,
298 preview,
299 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
300 symbol: None,
301 },
302 ));
303 }
304
305 let mut results = Vec::new();
307 if !raw_results.is_empty() {
308 let max_score = raw_results
309 .iter()
310 .map(|(score, _)| *score)
311 .fold(0.0f32, f32::max);
312 if max_score > 0.0 {
313 for (raw_score, mut result) in raw_results {
314 let normalized_score = raw_score / max_score;
315
316 if let Some(threshold) = options.threshold
318 && normalized_score < threshold
319 {
320 continue;
321 }
322
323 result.score = normalized_score;
324 results.push(result);
325 }
326 }
327 }
328
329 Ok(results)
330}
331
332async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
333 let index_root = if options.path.is_file() {
335 options.path.parent().unwrap_or(&options.path)
336 } else {
337 &options.path
338 };
339
340 let index_dir = index_root.join(".ck");
341 let tantivy_index_path = index_dir.join("tantivy_index");
342
343 fs::create_dir_all(&tantivy_index_path)?;
344
345 let mut schema_builder = Schema::builder();
346 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
347 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
348 let schema = schema_builder.build();
349
350 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
351 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
352
353 let mut index_writer = index
354 .writer(50_000_000)
355 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
356
357 let files = collect_files(index_root, true, &options.exclude_patterns)?;
358
359 for file_path in &files {
360 if let Ok(content) = fs::read_to_string(file_path) {
361 let doc = doc!(
362 content_field => content,
363 path_field => file_path.display().to_string()
364 );
365 index_writer.add_document(doc)?;
366 }
367 }
368
369 index_writer
370 .commit()
371 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
372
373 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
375 let mut schema_builder = Schema::builder();
376 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
377 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
378 let _schema = schema_builder.build();
379
380 let index = Index::open_in_dir(&tantivy_index_path)
381 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
382
383 let reader = index
384 .reader_builder()
385 .reload_policy(ReloadPolicy::OnCommitWithDelay)
386 .try_into()
387 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
388
389 let searcher = reader.searcher();
390 let query_parser = QueryParser::for_index(&index, vec![content_field]);
391
392 let query = query_parser
393 .parse_query(&options.query)
394 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
395
396 let top_docs = if let Some(top_k) = options.top_k {
397 searcher.search(&query, &TopDocs::with_limit(top_k))?
398 } else {
399 searcher.search(&query, &TopDocs::with_limit(100))?
400 };
401
402 let mut raw_results = Vec::new();
404 for (_score, doc_address) in top_docs {
405 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
406 let path_text = retrieved_doc
407 .get_first(path_field)
408 .map(|field_value| field_value.as_str().unwrap_or(""))
409 .unwrap_or("");
410 let content_text = retrieved_doc
411 .get_first(content_field)
412 .map(|field_value| field_value.as_str().unwrap_or(""))
413 .unwrap_or("");
414
415 let file_path = PathBuf::from(path_text);
416 let preview = if options.full_section {
417 content_text.to_string()
418 } else {
419 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
420 };
421
422 raw_results.push((
423 _score,
424 SearchResult {
425 file: file_path,
426 span: Span {
427 byte_start: 0,
428 byte_end: content_text.len(),
429 line_start: 1,
430 line_end: content_text.lines().count(),
431 },
432 score: _score,
433 preview,
434 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
435 symbol: None,
436 },
437 ));
438 }
439
440 let mut results = Vec::new();
442 if !raw_results.is_empty() {
443 let max_score = raw_results
444 .iter()
445 .map(|(score, _)| *score)
446 .fold(0.0f32, f32::max);
447 if max_score > 0.0 {
448 for (raw_score, mut result) in raw_results {
449 let normalized_score = raw_score / max_score;
450
451 if let Some(threshold) = options.threshold
453 && normalized_score < threshold
454 {
455 continue;
456 }
457
458 result.score = normalized_score;
459 results.push(result);
460 }
461 }
462 }
463
464 Ok(results)
465}
466
467#[allow(dead_code)]
468async fn semantic_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
469 semantic_search_with_progress(options, None).await
470}
471
472async fn semantic_search_with_progress(
473 options: &SearchOptions,
474 progress_callback: Option<SearchProgressCallback>,
475) -> Result<Vec<SearchResult>> {
476 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
478 if options.path.is_file() {
479 options.path.parent().unwrap_or(&options.path).to_path_buf()
480 } else {
481 options.path.clone()
482 }
483 });
484
485 let index_dir = index_root.join(".ck");
486 if !index_dir.exists() {
487 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
488 }
489
490 let ann_index_path = index_dir.join("ann_index.bin");
491 let embeddings_path = index_dir.join("embeddings.json");
492
493 if !ann_index_path.exists() || !embeddings_path.exists() {
494 return build_semantic_index_with_progress(options, progress_callback).await;
495 }
496
497 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
499
500 let embeddings_data = fs::read_to_string(&embeddings_path)?;
502 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
503
504 if let Some(ref callback) = progress_callback {
506 callback("Loading embedding model...");
507 }
508
509 let mut embedder = if let Some(ref callback) = progress_callback {
510 let _cb = callback.as_ref();
511 let model_cb = Box::new(|msg: &str| {
512 eprintln!("Model: {}", msg);
515 }) as ck_embed::ModelDownloadCallback;
516 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), Some(model_cb))?
517 } else {
518 ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?
519 };
520 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
521
522 if query_embeddings.is_empty() {
523 return Ok(Vec::new());
524 }
525
526 let query_embedding = &query_embeddings[0];
527
528 let top_k = options.top_k.unwrap_or(10);
530 let similar_docs = ann_index.search(query_embedding, top_k);
531
532 let mut results = Vec::new();
533
534 let filter_by_file = options.path.is_file();
536 let target_file = if filter_by_file {
537 Some(
538 options
539 .path
540 .canonicalize()
541 .unwrap_or_else(|_| options.path.clone()),
542 )
543 } else {
544 None
545 };
546
547 for (doc_id, similarity) in similar_docs {
548 if let Some(threshold) = options.threshold
550 && similarity < threshold
551 {
552 continue;
553 }
554
555 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
556 if let Some(target) = &target_file {
558 let canonical_result = file_path
559 .canonicalize()
560 .unwrap_or_else(|_| file_path.clone());
561 if canonical_result != *target {
562 continue; }
564 }
565
566 let preview = if options.full_section {
568 content.clone()
569 } else {
570 content.lines().take(3).collect::<Vec<_>>().join("\n")
571 };
572
573 results.push(SearchResult {
574 file: file_path.clone(),
575 span: Span {
576 byte_start: 0,
577 byte_end: content.len(),
578 line_start: 1,
579 line_end: content.lines().count(),
580 },
581 score: similarity,
582 preview,
583 lang: ck_core::Language::from_path(file_path),
584 symbol: None,
585 });
586 }
587 }
588
589 Ok(results)
590}
591
592#[allow(dead_code)]
593async fn build_semantic_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
594 build_semantic_index_with_progress(options, None).await
595}
596
597async fn build_semantic_index_with_progress(
598 options: &SearchOptions,
599 progress_callback: Option<SearchProgressCallback>,
600) -> Result<Vec<SearchResult>> {
601 let index_root = if options.path.is_file() {
603 options.path.parent().unwrap_or(&options.path)
604 } else {
605 &options.path
606 };
607
608 let index_dir = index_root.join(".ck");
609 let ann_index_path = index_dir.join("ann_index.bin");
610 let embeddings_path = index_dir.join("embeddings.json");
611
612 fs::create_dir_all(&index_dir)?;
613
614 if let Some(ref callback) = progress_callback {
615 callback("Building semantic index (no index found)...");
616 }
617
618 eprintln!("Building semantic index (no existing index found)...");
620
621 let files = collect_files(index_root, true, &options.exclude_patterns)?;
623
624 if let Some(ref callback) = progress_callback {
625 callback(&format!("Found {} files to index", files.len()));
626 }
627 eprintln!("Found {} files to embed and index", files.len());
628
629 let mut file_embeddings = Vec::new();
630 let mut embeddings = Vec::new();
631
632 if let Some(ref callback) = progress_callback {
634 callback("Loading embedding model...");
635 }
636
637 let model_callback = if progress_callback.is_some() {
638 Some(Box::new(|msg: &str| {
639 eprintln!("Model: {}", msg);
640 }) as ck_embed::ModelDownloadCallback)
641 } else {
642 None
643 };
644
645 let mut embedder =
646 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), model_callback)?;
647
648 if let Some(ref callback) = progress_callback {
649 callback("Generating embeddings for code chunks...");
650 }
651
652 for (file_idx, file_path) in files.iter().enumerate() {
653 if let Ok(content) = fs::read_to_string(file_path) {
654 if let Some(ref callback) = progress_callback {
655 let file_name = file_path
656 .file_name()
657 .map(|n| n.to_string_lossy().to_string())
658 .unwrap_or_else(|| file_path.to_string_lossy().to_string());
659 callback(&format!(
660 "Processing {}/{}: {}",
661 file_idx + 1,
662 files.len(),
663 file_name
664 ));
665 }
666
667 let chunks = ck_chunk::chunk_text(&content, ck_core::Language::from_path(file_path))?;
669
670 for chunk in chunks {
671 let chunk_embeddings = embedder.embed(std::slice::from_ref(&chunk.text))?;
672 if !chunk_embeddings.is_empty() {
673 embeddings.push(chunk_embeddings[0].clone());
674 file_embeddings.push((file_path.clone(), chunk.text));
675 }
676 }
677 }
678 }
679
680 if let Some(ref callback) = progress_callback {
681 callback(&format!(
682 "Built {} embeddings, creating search index...",
683 embeddings.len()
684 ));
685 }
686 eprintln!(
687 "Generated {} embeddings, building search index...",
688 embeddings.len()
689 );
690
691 let index = ck_ann::SimpleIndex::build(&embeddings)?;
693 index.save(&ann_index_path)?;
694
695 let embeddings_json = serde_json::to_string(&file_embeddings)?;
697 fs::write(&embeddings_path, embeddings_json)?;
698
699 if let Some(ref callback) = progress_callback {
700 callback("Semantic index built successfully, running search...");
701 }
702 eprintln!("Semantic index built successfully!");
703
704 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
706
707 let embeddings_data = fs::read_to_string(&embeddings_path)?;
709 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
710
711 let mut embedder = ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?;
713 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
714
715 if query_embeddings.is_empty() {
716 return Ok(Vec::new());
717 }
718
719 let query_embedding = &query_embeddings[0];
720
721 let top_k = options.top_k.unwrap_or(10);
723 let similar_docs = ann_index.search(query_embedding, top_k);
724
725 let mut results = Vec::new();
726
727 let filter_by_file = options.path.is_file();
729 let target_file = if filter_by_file {
730 Some(
731 options
732 .path
733 .canonicalize()
734 .unwrap_or_else(|_| options.path.clone()),
735 )
736 } else {
737 None
738 };
739
740 for (doc_id, similarity) in similar_docs {
741 if let Some(threshold) = options.threshold
743 && similarity < threshold
744 {
745 continue;
746 }
747
748 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
749 if let Some(target) = &target_file {
751 let canonical_result = file_path
752 .canonicalize()
753 .unwrap_or_else(|_| file_path.clone());
754 if canonical_result != *target {
755 continue; }
757 }
758
759 let preview = if options.full_section {
761 content.clone()
762 } else {
763 content.lines().take(3).collect::<Vec<_>>().join("\n")
764 };
765
766 results.push(SearchResult {
767 file: file_path.clone(),
768 span: Span {
769 byte_start: 0,
770 byte_end: content.len(),
771 line_start: 1,
772 line_end: content.lines().count(),
773 },
774 score: similarity,
775 preview,
776 lang: ck_core::Language::from_path(file_path),
777 symbol: None,
778 });
779 }
780 }
781
782 Ok(results)
783}
784
785#[allow(dead_code)]
786async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
787 hybrid_search_with_progress(options, None).await
788}
789
790async fn hybrid_search_with_progress(
791 options: &SearchOptions,
792 progress_callback: Option<SearchProgressCallback>,
793) -> Result<Vec<SearchResult>> {
794 if let Some(ref callback) = progress_callback {
795 callback("Running regex search...");
796 }
797 let regex_results = regex_search(options)?;
798
799 if let Some(ref callback) = progress_callback {
800 callback("Running semantic search...");
801 }
802 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
803
804 let mut combined = HashMap::new();
805
806 for (rank, result) in regex_results.iter().enumerate() {
807 let key = format!("{}:{}", result.file.display(), result.span.line_start);
808 combined
809 .entry(key)
810 .or_insert(Vec::new())
811 .push((rank + 1, result.clone()));
812 }
813
814 for (rank, result) in semantic_results.iter().enumerate() {
815 let key = format!("{}:{}", result.file.display(), result.span.line_start);
816 combined
817 .entry(key)
818 .or_insert(Vec::new())
819 .push((rank + 1, result.clone()));
820 }
821
822 let mut rrf_results: Vec<SearchResult> = combined
824 .into_values()
825 .map(|ranks| {
826 let mut result = ranks[0].1.clone();
827 let rrf_score = ranks
828 .iter()
829 .map(|(rank, _)| 1.0 / (60.0 + *rank as f32))
830 .sum();
831 result.score = rrf_score;
832 result
833 })
834 .filter(|result| {
835 if let Some(threshold) = options.threshold {
837 result.score >= threshold
838 } else {
839 true
840 }
841 })
842 .collect();
843
844 rrf_results.sort_by(|a, b| {
846 b.score
847 .partial_cmp(&a.score)
848 .unwrap_or(std::cmp::Ordering::Equal)
849 });
850
851 if let Some(top_k) = options.top_k {
852 rrf_results.truncate(top_k);
853 }
854
855 Ok(rrf_results)
856}
857
858fn build_globset(patterns: &[String]) -> GlobSet {
859 let mut builder = GlobSetBuilder::new();
860 for pat in patterns {
861 if let Ok(glob) = Glob::new(pat) {
863 builder.add(glob);
864 }
865 }
866 builder.build().unwrap_or_else(|_| GlobSet::empty())
867}
868
869fn should_exclude_path(path: &Path, exclude_patterns: &[String]) -> bool {
870 let globset = build_globset(exclude_patterns);
871 if globset.is_match(path) {
873 return true;
874 }
875 for component in path.components() {
876 if let std::path::Component::Normal(name) = component
877 && globset.is_match(name)
878 {
879 return true;
880 }
881 }
882 false
883}
884
885fn collect_files(
886 path: &Path,
887 recursive: bool,
888 exclude_patterns: &[String],
889) -> Result<Vec<PathBuf>> {
890 let mut files = Vec::new();
891 let globset = build_globset(exclude_patterns);
892
893 if path.is_file() {
894 files.push(path.to_path_buf());
896 } else if recursive {
897 for entry in WalkDir::new(path).into_iter().filter_entry(|e| {
898 let name = e.file_name();
900 !globset.is_match(e.path()) && !globset.is_match(name)
901 }) {
902 match entry {
903 Ok(entry) => {
904 if entry.file_type().is_file()
905 && !should_exclude_path(entry.path(), exclude_patterns)
906 {
907 files.push(entry.path().to_path_buf());
908 }
909 }
910 Err(e) => {
911 tracing::debug!("Skipping path due to error: {}", e);
913 continue;
914 }
915 }
916 }
917 } else {
918 match fs::read_dir(path) {
919 Ok(read_dir) => {
920 for entry in read_dir {
921 match entry {
922 Ok(entry) => {
923 let path = entry.path();
924 if path.is_file() && !should_exclude_path(&path, exclude_patterns) {
925 files.push(path);
926 }
927 }
928 Err(e) => {
929 tracing::debug!("Skipping directory entry due to error: {}", e);
930 continue;
931 }
932 }
933 }
934 }
935 Err(e) => {
936 tracing::debug!("Cannot read directory {:?}: {}", path, e);
937 return Err(e.into());
938 }
939 }
940 }
941
942 Ok(files)
943}
944
945async fn ensure_index_updated(
946 path: &Path,
947 force_reindex: bool,
948 need_embeddings: bool,
949) -> Result<()> {
950 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
952 if path.is_file() {
953 path.parent().unwrap_or(path).to_path_buf()
954 } else {
955 path.to_path_buf()
956 }
957 });
958 let index_root = &index_root_buf;
959
960 if force_reindex {
962 let stats = ck_index::smart_update_index_with_progress(
963 index_root,
964 false,
965 None,
966 need_embeddings,
967 true,
968 &[], )
970 .await?;
971 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
972 tracing::info!(
973 "Index updated: {} files indexed, {} orphaned files removed",
974 stats.files_indexed,
975 stats.orphaned_files_removed
976 );
977 }
978 return Ok(());
979 }
980
981 let stats = ck_index::smart_update_index_with_progress(
983 index_root,
984 false,
985 None,
986 need_embeddings,
987 true,
988 &[],
989 )
990 .await?;
991 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
992 tracing::info!(
993 "Index updated: {} files indexed, {} orphaned files removed",
994 stats.files_indexed,
995 stats.orphaned_files_removed
996 );
997 }
998
999 Ok(())
1000}
1001
1002fn get_context_preview(lines: &[&str], line_idx: usize, options: &SearchOptions) -> String {
1003 let before = options.before_context_lines.max(options.context_lines);
1004 let after = options.after_context_lines.max(options.context_lines);
1005
1006 if before > 0 || after > 0 {
1007 let start_idx = line_idx.saturating_sub(before);
1008 let end_idx = (line_idx + after + 1).min(lines.len());
1009 lines[start_idx..end_idx].join("\n")
1010 } else {
1011 lines[line_idx].to_string()
1012 }
1013}
1014
1015fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
1016 let lang = ck_core::Language::from_path(file_path)?;
1017
1018 if let Ok(chunks) = ck_chunk::chunk_text(content, Some(lang)) {
1020 let sections: Vec<(usize, usize, String)> = chunks
1021 .into_iter()
1022 .filter(|chunk| {
1023 matches!(
1024 chunk.chunk_type,
1025 ck_chunk::ChunkType::Function
1026 | ck_chunk::ChunkType::Class
1027 | ck_chunk::ChunkType::Method
1028 )
1029 })
1030 .map(|chunk| {
1031 (
1032 chunk.span.line_start - 1, chunk.span.line_end - 1,
1034 chunk.text,
1035 )
1036 })
1037 .collect();
1038
1039 if sections.is_empty() {
1040 None
1041 } else {
1042 Some(sections)
1043 }
1044 } else {
1045 None
1046 }
1047}
1048
1049fn find_containing_section(
1050 sections: &[(usize, usize, String)],
1051 line_idx: usize,
1052) -> Option<&String> {
1053 for (start, end, text) in sections {
1054 if line_idx >= *start && line_idx <= *end {
1055 return Some(text);
1056 }
1057 }
1058 None
1059}
1060
1061#[cfg(test)]
1062mod tests {
1063 use super::*;
1064 use std::fs;
1065 use tempfile::TempDir;
1066
1067 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
1068 let files = vec![
1069 ("test1.txt", "hello world rust programming"),
1070 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
1071 ("test3.py", "print('Hello Python')"),
1072 ("test4.txt", "machine learning artificial intelligence"),
1073 ];
1074
1075 let mut paths = Vec::new();
1076 for (name, content) in files {
1077 let path = dir.join(name);
1078 fs::write(&path, content).unwrap();
1079 paths.push(path);
1080 }
1081 paths
1082 }
1083
1084 #[test]
1085 fn test_collect_files() {
1086 let temp_dir = TempDir::new().unwrap();
1087 let test_files = create_test_files(temp_dir.path());
1088
1089 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1091 assert_eq!(files.len(), 4);
1092
1093 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1095 assert_eq!(files.len(), 4);
1096
1097 let files = collect_files(&test_files[0], false, &[]).unwrap();
1099 assert_eq!(files.len(), 1);
1100 assert_eq!(files[0], test_files[0]);
1101 }
1102
1103 #[test]
1104 fn test_regex_search() {
1105 let temp_dir = TempDir::new().unwrap();
1106 create_test_files(temp_dir.path());
1107
1108 let options = SearchOptions {
1109 mode: SearchMode::Regex,
1110 query: "rust".to_string(),
1111 path: temp_dir.path().to_path_buf(),
1112 recursive: true,
1113 ..Default::default()
1114 };
1115
1116 let results = regex_search(&options).unwrap();
1117 assert!(!results.is_empty());
1118
1119 let rust_matches: Vec<_> = results
1121 .iter()
1122 .filter(|r| r.preview.to_lowercase().contains("rust"))
1123 .collect();
1124 assert!(!rust_matches.is_empty());
1125 }
1126
1127 #[test]
1128 fn test_regex_search_case_insensitive() {
1129 let temp_dir = TempDir::new().unwrap();
1130 create_test_files(temp_dir.path());
1131
1132 let options = SearchOptions {
1133 mode: SearchMode::Regex,
1134 query: "HELLO".to_string(),
1135 path: temp_dir.path().to_path_buf(),
1136 recursive: true,
1137 case_insensitive: true,
1138 ..Default::default()
1139 };
1140
1141 let results = regex_search(&options).unwrap();
1142 assert!(!results.is_empty());
1143 }
1144
1145 #[test]
1146 fn test_regex_search_fixed_string() {
1147 let temp_dir = TempDir::new().unwrap();
1148 create_test_files(temp_dir.path());
1149
1150 let options = SearchOptions {
1151 mode: SearchMode::Regex,
1152 query: "fn main()".to_string(),
1153 path: temp_dir.path().to_path_buf(),
1154 recursive: true,
1155 fixed_string: true,
1156 ..Default::default()
1157 };
1158
1159 let results = regex_search(&options).unwrap();
1160 assert!(!results.is_empty());
1161 }
1162
1163 #[test]
1164 fn test_regex_search_whole_word() {
1165 let temp_dir = TempDir::new().unwrap();
1166 fs::write(
1167 temp_dir.path().join("word_test.txt"),
1168 "rust rusty rustacean",
1169 )
1170 .unwrap();
1171
1172 let options = SearchOptions {
1173 mode: SearchMode::Regex,
1174 query: "rust".to_string(),
1175 path: temp_dir.path().to_path_buf(),
1176 recursive: true,
1177 whole_word: true,
1178 ..Default::default()
1179 };
1180
1181 let results = regex_search(&options).unwrap();
1182 assert!(!results.is_empty());
1183 }
1185
1186 #[test]
1187 fn test_regex_search_top_k() {
1188 let temp_dir = TempDir::new().unwrap();
1189
1190 for i in 0..10 {
1192 fs::write(
1193 temp_dir.path().join(format!("file{}.txt", i)),
1194 "test content",
1195 )
1196 .unwrap();
1197 }
1198
1199 let options = SearchOptions {
1200 mode: SearchMode::Regex,
1201 query: "test".to_string(),
1202 path: temp_dir.path().to_path_buf(),
1203 recursive: true,
1204 top_k: Some(5),
1205 ..Default::default()
1206 };
1207
1208 let results = regex_search(&options).unwrap();
1209 assert!(results.len() <= 5);
1210 }
1211
1212 #[test]
1213 fn test_regex_search_span_offsets() {
1214 let temp_dir = TempDir::new().unwrap();
1216 let test_file = temp_dir.path().join("spans.txt");
1217 fs::write(&test_file, "test test test\nline two test\ntest end").unwrap();
1218
1219 let options = SearchOptions {
1220 mode: SearchMode::Regex,
1221 query: "test".to_string(),
1222 path: test_file.clone(),
1223 recursive: false,
1224 ..Default::default()
1225 };
1226
1227 let results = regex_search(&options).unwrap();
1228
1229 assert_eq!(results.len(), 5);
1231
1232 let line1_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 1).collect();
1234 assert_eq!(line1_matches.len(), 3);
1235 assert_eq!(line1_matches[0].span.byte_start, 0);
1236 assert_eq!(line1_matches[1].span.byte_start, 5);
1237 assert_eq!(line1_matches[2].span.byte_start, 10);
1238
1239 let line2_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 2).collect();
1241 assert_eq!(line2_matches.len(), 1);
1242 assert_eq!(line2_matches[0].span.byte_start, 24); let mut byte_starts: Vec<_> = results.iter().map(|r| r.span.byte_start).collect();
1246 byte_starts.sort();
1247 byte_starts.dedup();
1248 assert_eq!(byte_starts.len(), 5); }
1250
1251 #[test]
1252 fn test_search_file() {
1253 let temp_dir = TempDir::new().unwrap();
1254 let file_path = temp_dir.path().join("test.txt");
1255 fs::write(
1256 &file_path,
1257 "line 1: hello\nline 2: world\nline 3: rust programming",
1258 )
1259 .unwrap();
1260
1261 let regex = regex::Regex::new("rust").unwrap();
1262 let options = SearchOptions::default();
1263
1264 let results = search_file(®ex, &file_path, &options).unwrap();
1265 assert_eq!(results.len(), 1);
1266 assert_eq!(results[0].span.line_start, 3);
1267 assert!(results[0].preview.contains("rust"));
1268 }
1269
1270 #[test]
1271 fn test_search_file_with_context() {
1272 let temp_dir = TempDir::new().unwrap();
1273 let file_path = temp_dir.path().join("test.txt");
1274 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1275
1276 let regex = regex::Regex::new("target").unwrap();
1277 let options = SearchOptions {
1278 context_lines: 1,
1279 ..Default::default()
1280 };
1281
1282 let results = search_file(®ex, &file_path, &options).unwrap();
1283 assert_eq!(results.len(), 1);
1284
1285 println!("Preview: '{}'", results[0].preview);
1286
1287 assert!(results[0].preview.contains("line 2"));
1290 assert!(results[0].preview.contains("target line"));
1291 assert!(results[0].preview.contains("line 4"));
1292 }
1293
1294 #[tokio::test]
1295 async fn test_search_main_function() {
1296 let temp_dir = TempDir::new().unwrap();
1297 create_test_files(temp_dir.path());
1298
1299 let options = SearchOptions {
1300 mode: SearchMode::Regex,
1301 query: "hello".to_string(),
1302 path: temp_dir.path().to_path_buf(),
1303 recursive: true,
1304 case_insensitive: true,
1305 ..Default::default()
1306 };
1307
1308 let results = search(&options).await.unwrap();
1309 assert!(!results.is_empty());
1310 }
1311}