1use anyhow::Result;
2use ck_ann::AnnIndex;
3use ck_core::{CkError, SearchMode, SearchOptions, SearchResult, Span};
4use globset::{Glob, GlobSet, GlobSetBuilder};
5use rayon::prelude::*;
6use regex::{Regex, RegexBuilder};
7use std::collections::HashMap;
8use std::fs;
9use std::path::PathBuf as StdPathBuf;
10use std::path::{Path, PathBuf};
11use tantivy::collector::TopDocs;
12use tantivy::query::QueryParser;
13use tantivy::schema::{STORED, Schema, TEXT, Value};
14use tantivy::{Index, ReloadPolicy, TantivyDocument, doc};
15use walkdir::WalkDir;
16
17mod semantic_v3;
18pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
19
20pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21
22async fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
24 let content = tokio::fs::read_to_string(file_path).await?;
25 let lines: Vec<&str> = content.lines().collect();
26
27 if span.line_start == 0 || span.line_start > lines.len() {
28 return Ok(String::new());
29 }
30
31 let start_idx = span.line_start - 1; let end_idx = (span.line_end - 1).min(lines.len().saturating_sub(1));
33
34 if start_idx <= end_idx {
35 Ok(lines[start_idx..=end_idx].join("\n"))
36 } else {
37 Ok(lines[start_idx].to_string())
38 }
39}
40
41fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
42 let mut current = if path.is_file() {
43 path.parent().unwrap_or(path)
44 } else {
45 path
46 };
47 loop {
48 if current.join(".ck").exists() {
49 return Some(current.to_path_buf());
50 }
51 match current.parent() {
52 Some(parent) => current = parent,
53 None => return None,
54 }
55 }
56}
57
58pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
59 search_with_progress(options, None).await
60}
61
62pub async fn search_with_progress(
63 options: &SearchOptions,
64 progress_callback: Option<SearchProgressCallback>,
65) -> Result<Vec<SearchResult>> {
66 if !options.path.exists() {
68 return Err(ck_core::CkError::Search(format!(
69 "Path does not exist: {}",
70 options.path.display()
71 ))
72 .into());
73 }
74
75 if !matches!(options.mode, SearchMode::Regex) {
77 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
78 ensure_index_updated(&options.path, options.reindex, need_embeddings).await?;
79 }
80
81 match options.mode {
82 SearchMode::Regex => regex_search(options),
83 SearchMode::Lexical => lexical_search(options).await,
84 SearchMode::Semantic => {
85 semantic_search_v3_with_progress(options, progress_callback).await
87 }
88 SearchMode::Hybrid => hybrid_search_with_progress(options, progress_callback).await,
89 }
90}
91
92fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
93 let pattern = if options.fixed_string {
94 regex::escape(&options.query)
95 } else if options.whole_word {
96 format!(r"\b{}\b", regex::escape(&options.query))
97 } else {
98 options.query.clone()
99 };
100
101 let regex = RegexBuilder::new(&pattern)
102 .case_insensitive(options.case_insensitive)
103 .build()
104 .map_err(CkError::Regex)?;
105
106 let should_recurse = options.path.is_dir() || options.recursive;
108 let files = if should_recurse {
109 ck_index::collect_files(
111 &options.path,
112 options.respect_gitignore,
113 &options.exclude_patterns,
114 )?
115 } else {
116 collect_files(&options.path, should_recurse, &options.exclude_patterns)?
118 };
119
120 let results: Vec<Vec<SearchResult>> = files
121 .par_iter()
122 .filter_map(|file_path| match search_file(®ex, file_path, options) {
123 Ok(matches) => {
124 if matches.is_empty() {
125 None
126 } else {
127 Some(matches)
128 }
129 }
130 Err(e) => {
131 tracing::debug!("Error searching {:?}: {}", file_path, e);
132 None
133 }
134 })
135 .collect();
136
137 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
138 all_results.sort_by(|a, b| {
140 let path_cmp = a.file.cmp(&b.file);
141 if path_cmp != std::cmp::Ordering::Equal {
142 return path_cmp;
143 }
144 a.span.line_start.cmp(&b.span.line_start)
145 });
146
147 if let Some(top_k) = options.top_k {
148 all_results.truncate(top_k);
149 }
150
151 Ok(all_results)
152}
153
154fn search_file(
155 regex: &Regex,
156 file_path: &Path,
157 options: &SearchOptions,
158) -> Result<Vec<SearchResult>> {
159 let content = fs::read_to_string(file_path)?;
160 let lines: Vec<&str> = content.lines().collect();
161 let mut results = Vec::new();
162
163 let code_sections = if options.full_section {
165 extract_code_sections(file_path, &content)
166 } else {
167 None
168 };
169
170 let mut byte_offset = 0;
172
173 for (line_idx, line) in lines.iter().enumerate() {
174 let line_number = line_idx + 1;
175
176 for mat in regex.find_iter(line) {
178 let preview = if options.full_section {
179 if let Some(ref sections) = code_sections {
181 if let Some(section) = find_containing_section(sections, line_idx) {
182 section.clone()
183 } else {
184 get_context_preview(&lines, line_idx, options)
186 }
187 } else {
188 get_context_preview(&lines, line_idx, options)
189 }
190 } else {
191 get_context_preview(&lines, line_idx, options)
192 };
193
194 results.push(SearchResult {
195 file: file_path.to_path_buf(),
196 span: Span {
197 byte_start: byte_offset + mat.start(),
198 byte_end: byte_offset + mat.end(),
199 line_start: line_number,
200 line_end: line_number,
201 },
202 score: 1.0,
203 preview,
204 lang: ck_core::Language::from_path(file_path),
205 symbol: None,
206 chunk_hash: None,
207 index_epoch: None,
208 });
209 }
210
211 byte_offset += line.len();
213 if line_idx < lines.len() - 1 {
214 byte_offset += 1; }
216 }
217
218 Ok(results)
219}
220
221async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
222 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
224 if options.path.is_file() {
225 options.path.parent().unwrap_or(&options.path).to_path_buf()
226 } else {
227 options.path.clone()
228 }
229 });
230
231 let index_dir = index_root.join(".ck");
232 if !index_dir.exists() {
233 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
234 }
235
236 let tantivy_index_path = index_dir.join("tantivy_index");
237
238 if !tantivy_index_path.exists() {
239 return build_tantivy_index(options).await;
240 }
241
242 let mut schema_builder = Schema::builder();
243 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
244 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
245 let _schema = schema_builder.build();
246
247 let index = Index::open_in_dir(&tantivy_index_path)
248 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
249
250 let reader = index
251 .reader_builder()
252 .reload_policy(ReloadPolicy::OnCommitWithDelay)
253 .try_into()
254 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
255
256 let searcher = reader.searcher();
257 let query_parser = QueryParser::for_index(&index, vec![content_field]);
258
259 let query = query_parser
260 .parse_query(&options.query)
261 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
262
263 let top_docs = if let Some(top_k) = options.top_k {
264 searcher.search(&query, &TopDocs::with_limit(top_k))?
265 } else {
266 searcher.search(&query, &TopDocs::with_limit(100))?
267 };
268
269 let mut raw_results = Vec::new();
271 for (_score, doc_address) in top_docs {
272 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
273 let path_text = retrieved_doc
274 .get_first(path_field)
275 .map(|field_value| field_value.as_str().unwrap_or(""))
276 .unwrap_or("");
277 let content_text = retrieved_doc
278 .get_first(content_field)
279 .map(|field_value| field_value.as_str().unwrap_or(""))
280 .unwrap_or("");
281
282 let file_path = PathBuf::from(path_text);
283 let preview = if options.full_section {
284 content_text.to_string()
285 } else {
286 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
287 };
288
289 raw_results.push((
290 _score,
291 SearchResult {
292 file: file_path,
293 span: Span {
294 byte_start: 0,
295 byte_end: content_text.len(),
296 line_start: 1,
297 line_end: content_text.lines().count(),
298 },
299 score: _score,
300 preview,
301 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
302 symbol: None,
303 chunk_hash: None,
304 index_epoch: None,
305 },
306 ));
307 }
308
309 let mut results = Vec::new();
311 if !raw_results.is_empty() {
312 let max_score = raw_results
313 .iter()
314 .map(|(score, _)| *score)
315 .fold(0.0f32, f32::max);
316 if max_score > 0.0 {
317 for (raw_score, mut result) in raw_results {
318 let normalized_score = raw_score / max_score;
319
320 if let Some(threshold) = options.threshold
322 && normalized_score < threshold
323 {
324 continue;
325 }
326
327 result.score = normalized_score;
328 results.push(result);
329 }
330 }
331 }
332
333 Ok(results)
334}
335
336async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
337 let index_root = if options.path.is_file() {
339 options.path.parent().unwrap_or(&options.path)
340 } else {
341 &options.path
342 };
343
344 let index_dir = index_root.join(".ck");
345 let tantivy_index_path = index_dir.join("tantivy_index");
346
347 fs::create_dir_all(&tantivy_index_path)?;
348
349 let mut schema_builder = Schema::builder();
350 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
351 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
352 let schema = schema_builder.build();
353
354 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
355 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
356
357 let mut index_writer = index
358 .writer(50_000_000)
359 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
360
361 let files = collect_files(index_root, true, &options.exclude_patterns)?;
362
363 for file_path in &files {
364 if let Ok(content) = fs::read_to_string(file_path) {
365 let doc = doc!(
366 content_field => content,
367 path_field => file_path.display().to_string()
368 );
369 index_writer.add_document(doc)?;
370 }
371 }
372
373 index_writer
374 .commit()
375 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
376
377 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
379 let mut schema_builder = Schema::builder();
380 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
381 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
382 let _schema = schema_builder.build();
383
384 let index = Index::open_in_dir(&tantivy_index_path)
385 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
386
387 let reader = index
388 .reader_builder()
389 .reload_policy(ReloadPolicy::OnCommitWithDelay)
390 .try_into()
391 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
392
393 let searcher = reader.searcher();
394 let query_parser = QueryParser::for_index(&index, vec![content_field]);
395
396 let query = query_parser
397 .parse_query(&options.query)
398 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
399
400 let top_docs = if let Some(top_k) = options.top_k {
401 searcher.search(&query, &TopDocs::with_limit(top_k))?
402 } else {
403 searcher.search(&query, &TopDocs::with_limit(100))?
404 };
405
406 let mut raw_results = Vec::new();
408 for (_score, doc_address) in top_docs {
409 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
410 let path_text = retrieved_doc
411 .get_first(path_field)
412 .map(|field_value| field_value.as_str().unwrap_or(""))
413 .unwrap_or("");
414 let content_text = retrieved_doc
415 .get_first(content_field)
416 .map(|field_value| field_value.as_str().unwrap_or(""))
417 .unwrap_or("");
418
419 let file_path = PathBuf::from(path_text);
420 let preview = if options.full_section {
421 content_text.to_string()
422 } else {
423 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
424 };
425
426 raw_results.push((
427 _score,
428 SearchResult {
429 file: file_path,
430 span: Span {
431 byte_start: 0,
432 byte_end: content_text.len(),
433 line_start: 1,
434 line_end: content_text.lines().count(),
435 },
436 score: _score,
437 preview,
438 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
439 symbol: None,
440 chunk_hash: None,
441 index_epoch: None,
442 },
443 ));
444 }
445
446 let mut results = Vec::new();
448 if !raw_results.is_empty() {
449 let max_score = raw_results
450 .iter()
451 .map(|(score, _)| *score)
452 .fold(0.0f32, f32::max);
453 if max_score > 0.0 {
454 for (raw_score, mut result) in raw_results {
455 let normalized_score = raw_score / max_score;
456
457 if let Some(threshold) = options.threshold
459 && normalized_score < threshold
460 {
461 continue;
462 }
463
464 result.score = normalized_score;
465 results.push(result);
466 }
467 }
468 }
469
470 Ok(results)
471}
472
473#[allow(dead_code)]
474async fn semantic_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
475 semantic_search_with_progress(options, None).await
476}
477
478async fn semantic_search_with_progress(
479 options: &SearchOptions,
480 progress_callback: Option<SearchProgressCallback>,
481) -> Result<Vec<SearchResult>> {
482 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
484 if options.path.is_file() {
485 options.path.parent().unwrap_or(&options.path).to_path_buf()
486 } else {
487 options.path.clone()
488 }
489 });
490
491 let index_dir = index_root.join(".ck");
492 if !index_dir.exists() {
493 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
494 }
495
496 let ann_index_path = index_dir.join("ann_index.bin");
497 let embeddings_path = index_dir.join("embeddings.json");
498
499 if !ann_index_path.exists() || !embeddings_path.exists() {
500 return build_semantic_index_with_progress(options, progress_callback).await;
501 }
502
503 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
505
506 let embeddings_data = fs::read_to_string(&embeddings_path)?;
508 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
509
510 if let Some(ref callback) = progress_callback {
512 callback("Loading embedding model...");
513 }
514
515 let mut embedder = if let Some(ref callback) = progress_callback {
516 let _cb = callback.as_ref();
517 let model_cb = Box::new(|msg: &str| {
518 eprintln!("Model: {}", msg);
521 }) as ck_embed::ModelDownloadCallback;
522 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), Some(model_cb))?
523 } else {
524 ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?
525 };
526 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
527
528 if query_embeddings.is_empty() {
529 return Ok(Vec::new());
530 }
531
532 let query_embedding = &query_embeddings[0];
533
534 let top_k = options.top_k.unwrap_or(10);
536 let similar_docs = ann_index.search(query_embedding, top_k);
537
538 let mut results = Vec::new();
539
540 let filter_by_file = options.path.is_file();
542 let target_file = if filter_by_file {
543 Some(
544 options
545 .path
546 .canonicalize()
547 .unwrap_or_else(|_| options.path.clone()),
548 )
549 } else {
550 None
551 };
552
553 for (doc_id, similarity) in similar_docs {
554 if let Some(threshold) = options.threshold
556 && similarity < threshold
557 {
558 continue;
559 }
560
561 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
562 if let Some(target) = &target_file {
564 let canonical_result = file_path
565 .canonicalize()
566 .unwrap_or_else(|_| file_path.clone());
567 if canonical_result != *target {
568 continue; }
570 }
571
572 let preview = if options.full_section {
574 content.clone()
575 } else {
576 content.lines().take(3).collect::<Vec<_>>().join("\n")
577 };
578
579 results.push(SearchResult {
580 file: file_path.clone(),
581 span: Span {
582 byte_start: 0,
583 byte_end: content.len(),
584 line_start: 1,
585 line_end: content.lines().count(),
586 },
587 score: similarity,
588 preview,
589 lang: ck_core::Language::from_path(file_path),
590 symbol: None,
591 chunk_hash: None,
592 index_epoch: None,
593 });
594 }
595 }
596
597 Ok(results)
598}
599
600#[allow(dead_code)]
601async fn build_semantic_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
602 build_semantic_index_with_progress(options, None).await
603}
604
605async fn build_semantic_index_with_progress(
606 options: &SearchOptions,
607 progress_callback: Option<SearchProgressCallback>,
608) -> Result<Vec<SearchResult>> {
609 let index_root = if options.path.is_file() {
611 options.path.parent().unwrap_or(&options.path)
612 } else {
613 &options.path
614 };
615
616 let index_dir = index_root.join(".ck");
617 let ann_index_path = index_dir.join("ann_index.bin");
618 let embeddings_path = index_dir.join("embeddings.json");
619
620 fs::create_dir_all(&index_dir)?;
621
622 if let Some(ref callback) = progress_callback {
623 callback("Building semantic index (no index found)...");
624 }
625
626 eprintln!("Building semantic index (no existing index found)...");
628
629 let files = collect_files(index_root, true, &options.exclude_patterns)?;
631
632 if let Some(ref callback) = progress_callback {
633 callback(&format!("Found {} files to index", files.len()));
634 }
635 eprintln!("Found {} files to embed and index", files.len());
636
637 let mut file_embeddings = Vec::new();
638 let mut embeddings = Vec::new();
639
640 if let Some(ref callback) = progress_callback {
642 callback("Loading embedding model...");
643 }
644
645 let model_callback = if progress_callback.is_some() {
646 Some(Box::new(|msg: &str| {
647 eprintln!("Model: {}", msg);
648 }) as ck_embed::ModelDownloadCallback)
649 } else {
650 None
651 };
652
653 let mut embedder =
654 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), model_callback)?;
655
656 if let Some(ref callback) = progress_callback {
657 callback("Generating embeddings for code chunks...");
658 }
659
660 for (file_idx, file_path) in files.iter().enumerate() {
661 if let Ok(content) = fs::read_to_string(file_path) {
662 if let Some(ref callback) = progress_callback {
663 let file_name = file_path
664 .file_name()
665 .map(|n| n.to_string_lossy().to_string())
666 .unwrap_or_else(|| file_path.to_string_lossy().to_string());
667 callback(&format!(
668 "Processing {}/{}: {}",
669 file_idx + 1,
670 files.len(),
671 file_name
672 ));
673 }
674
675 let chunks = ck_chunk::chunk_text(&content, ck_core::Language::from_path(file_path))?;
677
678 for chunk in chunks {
679 let chunk_embeddings = embedder.embed(std::slice::from_ref(&chunk.text))?;
680 if !chunk_embeddings.is_empty() {
681 embeddings.push(chunk_embeddings[0].clone());
682 file_embeddings.push((file_path.clone(), chunk.text));
683 }
684 }
685 }
686 }
687
688 if let Some(ref callback) = progress_callback {
689 callback(&format!(
690 "Built {} embeddings, creating search index...",
691 embeddings.len()
692 ));
693 }
694 eprintln!(
695 "Generated {} embeddings, building search index...",
696 embeddings.len()
697 );
698
699 let index = ck_ann::SimpleIndex::build(&embeddings)?;
701 index.save(&ann_index_path)?;
702
703 let embeddings_json = serde_json::to_string(&file_embeddings)?;
705 fs::write(&embeddings_path, embeddings_json)?;
706
707 if let Some(ref callback) = progress_callback {
708 callback("Semantic index built successfully, running search...");
709 }
710 eprintln!("Semantic index built successfully!");
711
712 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
714
715 let embeddings_data = fs::read_to_string(&embeddings_path)?;
717 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
718
719 let mut embedder = ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?;
721 let query_embeddings = embedder.embed(std::slice::from_ref(&options.query))?;
722
723 if query_embeddings.is_empty() {
724 return Ok(Vec::new());
725 }
726
727 let query_embedding = &query_embeddings[0];
728
729 let top_k = options.top_k.unwrap_or(10);
731 let similar_docs = ann_index.search(query_embedding, top_k);
732
733 let mut results = Vec::new();
734
735 let filter_by_file = options.path.is_file();
737 let target_file = if filter_by_file {
738 Some(
739 options
740 .path
741 .canonicalize()
742 .unwrap_or_else(|_| options.path.clone()),
743 )
744 } else {
745 None
746 };
747
748 for (doc_id, similarity) in similar_docs {
749 if let Some(threshold) = options.threshold
751 && similarity < threshold
752 {
753 continue;
754 }
755
756 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
757 if let Some(target) = &target_file {
759 let canonical_result = file_path
760 .canonicalize()
761 .unwrap_or_else(|_| file_path.clone());
762 if canonical_result != *target {
763 continue; }
765 }
766
767 let preview = if options.full_section {
769 content.clone()
770 } else {
771 content.lines().take(3).collect::<Vec<_>>().join("\n")
772 };
773
774 results.push(SearchResult {
775 file: file_path.clone(),
776 span: Span {
777 byte_start: 0,
778 byte_end: content.len(),
779 line_start: 1,
780 line_end: content.lines().count(),
781 },
782 score: similarity,
783 preview,
784 lang: ck_core::Language::from_path(file_path),
785 symbol: None,
786 chunk_hash: None,
787 index_epoch: None,
788 });
789 }
790 }
791
792 Ok(results)
793}
794
795#[allow(dead_code)]
796async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
797 hybrid_search_with_progress(options, None).await
798}
799
800async fn hybrid_search_with_progress(
801 options: &SearchOptions,
802 progress_callback: Option<SearchProgressCallback>,
803) -> Result<Vec<SearchResult>> {
804 if let Some(ref callback) = progress_callback {
805 callback("Running regex search...");
806 }
807 let regex_results = regex_search(options)?;
808
809 if let Some(ref callback) = progress_callback {
810 callback("Running semantic search...");
811 }
812 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
813
814 let mut combined = HashMap::new();
815
816 for (rank, result) in regex_results.iter().enumerate() {
817 let key = format!("{}:{}", result.file.display(), result.span.line_start);
818 combined
819 .entry(key)
820 .or_insert(Vec::new())
821 .push((rank + 1, result.clone()));
822 }
823
824 for (rank, result) in semantic_results.iter().enumerate() {
825 let key = format!("{}:{}", result.file.display(), result.span.line_start);
826 combined
827 .entry(key)
828 .or_insert(Vec::new())
829 .push((rank + 1, result.clone()));
830 }
831
832 let mut rrf_results: Vec<SearchResult> = combined
834 .into_values()
835 .map(|ranks| {
836 let mut result = ranks[0].1.clone();
837 let rrf_score = ranks
838 .iter()
839 .map(|(rank, _)| 1.0 / (60.0 + *rank as f32))
840 .sum();
841 result.score = rrf_score;
842 result
843 })
844 .filter(|result| {
845 if let Some(threshold) = options.threshold {
847 result.score >= threshold
848 } else {
849 true
850 }
851 })
852 .collect();
853
854 rrf_results.sort_by(|a, b| {
856 b.score
857 .partial_cmp(&a.score)
858 .unwrap_or(std::cmp::Ordering::Equal)
859 });
860
861 if let Some(top_k) = options.top_k {
862 rrf_results.truncate(top_k);
863 }
864
865 Ok(rrf_results)
866}
867
868fn build_globset(patterns: &[String]) -> GlobSet {
869 let mut builder = GlobSetBuilder::new();
870 for pat in patterns {
871 if let Ok(glob) = Glob::new(pat) {
873 builder.add(glob);
874 }
875 }
876 builder.build().unwrap_or_else(|_| GlobSet::empty())
877}
878
879fn should_exclude_path(path: &Path, exclude_patterns: &[String]) -> bool {
880 let globset = build_globset(exclude_patterns);
881 if globset.is_match(path) {
883 return true;
884 }
885 for component in path.components() {
886 if let std::path::Component::Normal(name) = component
887 && globset.is_match(name)
888 {
889 return true;
890 }
891 }
892 false
893}
894
895fn collect_files(
896 path: &Path,
897 recursive: bool,
898 exclude_patterns: &[String],
899) -> Result<Vec<PathBuf>> {
900 let mut files = Vec::new();
901 let globset = build_globset(exclude_patterns);
902
903 if path.is_file() {
904 files.push(path.to_path_buf());
906 } else if recursive {
907 for entry in WalkDir::new(path).into_iter().filter_entry(|e| {
908 let name = e.file_name();
910 !globset.is_match(e.path()) && !globset.is_match(name)
911 }) {
912 match entry {
913 Ok(entry) => {
914 if entry.file_type().is_file()
915 && !should_exclude_path(entry.path(), exclude_patterns)
916 {
917 files.push(entry.path().to_path_buf());
918 }
919 }
920 Err(e) => {
921 tracing::debug!("Skipping path due to error: {}", e);
923 continue;
924 }
925 }
926 }
927 } else {
928 match fs::read_dir(path) {
929 Ok(read_dir) => {
930 for entry in read_dir {
931 match entry {
932 Ok(entry) => {
933 let path = entry.path();
934 if path.is_file() && !should_exclude_path(&path, exclude_patterns) {
935 files.push(path);
936 }
937 }
938 Err(e) => {
939 tracing::debug!("Skipping directory entry due to error: {}", e);
940 continue;
941 }
942 }
943 }
944 }
945 Err(e) => {
946 tracing::debug!("Cannot read directory {:?}: {}", path, e);
947 return Err(e.into());
948 }
949 }
950 }
951
952 Ok(files)
953}
954
955async fn ensure_index_updated(
956 path: &Path,
957 force_reindex: bool,
958 need_embeddings: bool,
959) -> Result<()> {
960 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
962 if path.is_file() {
963 path.parent().unwrap_or(path).to_path_buf()
964 } else {
965 path.to_path_buf()
966 }
967 });
968 let index_root = &index_root_buf;
969
970 if force_reindex {
972 let stats = ck_index::smart_update_index_with_progress(
973 index_root,
974 false,
975 None,
976 need_embeddings,
977 true,
978 &[], None, )
981 .await?;
982 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
983 tracing::info!(
984 "Index updated: {} files indexed, {} orphaned files removed",
985 stats.files_indexed,
986 stats.orphaned_files_removed
987 );
988 }
989 return Ok(());
990 }
991
992 let stats = ck_index::smart_update_index_with_progress(
994 index_root,
995 false,
996 None,
997 need_embeddings,
998 true,
999 &[],
1000 None, )
1002 .await?;
1003 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1004 tracing::info!(
1005 "Index updated: {} files indexed, {} orphaned files removed",
1006 stats.files_indexed,
1007 stats.orphaned_files_removed
1008 );
1009 }
1010
1011 Ok(())
1012}
1013
1014fn get_context_preview(lines: &[&str], line_idx: usize, options: &SearchOptions) -> String {
1015 let before = options.before_context_lines.max(options.context_lines);
1016 let after = options.after_context_lines.max(options.context_lines);
1017
1018 if before > 0 || after > 0 {
1019 let start_idx = line_idx.saturating_sub(before);
1020 let end_idx = (line_idx + after + 1).min(lines.len());
1021 lines[start_idx..end_idx].join("\n")
1022 } else {
1023 lines[line_idx].to_string()
1024 }
1025}
1026
1027fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
1028 let lang = ck_core::Language::from_path(file_path)?;
1029
1030 if let Ok(chunks) = ck_chunk::chunk_text(content, Some(lang)) {
1032 let sections: Vec<(usize, usize, String)> = chunks
1033 .into_iter()
1034 .filter(|chunk| {
1035 matches!(
1036 chunk.chunk_type,
1037 ck_chunk::ChunkType::Function
1038 | ck_chunk::ChunkType::Class
1039 | ck_chunk::ChunkType::Method
1040 )
1041 })
1042 .map(|chunk| {
1043 (
1044 chunk.span.line_start - 1, chunk.span.line_end - 1,
1046 chunk.text,
1047 )
1048 })
1049 .collect();
1050
1051 if sections.is_empty() {
1052 None
1053 } else {
1054 Some(sections)
1055 }
1056 } else {
1057 None
1058 }
1059}
1060
1061fn find_containing_section(
1062 sections: &[(usize, usize, String)],
1063 line_idx: usize,
1064) -> Option<&String> {
1065 for (start, end, text) in sections {
1066 if line_idx >= *start && line_idx <= *end {
1067 return Some(text);
1068 }
1069 }
1070 None
1071}
1072
1073#[cfg(test)]
1074mod tests {
1075 use super::*;
1076 use std::fs;
1077 use tempfile::TempDir;
1078
1079 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
1080 let files = vec![
1081 ("test1.txt", "hello world rust programming"),
1082 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
1083 ("test3.py", "print('Hello Python')"),
1084 ("test4.txt", "machine learning artificial intelligence"),
1085 ];
1086
1087 let mut paths = Vec::new();
1088 for (name, content) in files {
1089 let path = dir.join(name);
1090 fs::write(&path, content).unwrap();
1091 paths.push(path);
1092 }
1093 paths
1094 }
1095
1096 #[test]
1097 fn test_collect_files() {
1098 let temp_dir = TempDir::new().unwrap();
1099 let test_files = create_test_files(temp_dir.path());
1100
1101 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1103 assert_eq!(files.len(), 4);
1104
1105 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1107 assert_eq!(files.len(), 4);
1108
1109 let files = collect_files(&test_files[0], false, &[]).unwrap();
1111 assert_eq!(files.len(), 1);
1112 assert_eq!(files[0], test_files[0]);
1113 }
1114
1115 #[test]
1116 fn test_regex_search() {
1117 let temp_dir = TempDir::new().unwrap();
1118 create_test_files(temp_dir.path());
1119
1120 let options = SearchOptions {
1121 mode: SearchMode::Regex,
1122 query: "rust".to_string(),
1123 path: temp_dir.path().to_path_buf(),
1124 recursive: true,
1125 ..Default::default()
1126 };
1127
1128 let results = regex_search(&options).unwrap();
1129 assert!(!results.is_empty());
1130
1131 let rust_matches: Vec<_> = results
1133 .iter()
1134 .filter(|r| r.preview.to_lowercase().contains("rust"))
1135 .collect();
1136 assert!(!rust_matches.is_empty());
1137 }
1138
1139 #[test]
1140 fn test_regex_search_case_insensitive() {
1141 let temp_dir = TempDir::new().unwrap();
1142 create_test_files(temp_dir.path());
1143
1144 let options = SearchOptions {
1145 mode: SearchMode::Regex,
1146 query: "HELLO".to_string(),
1147 path: temp_dir.path().to_path_buf(),
1148 recursive: true,
1149 case_insensitive: true,
1150 ..Default::default()
1151 };
1152
1153 let results = regex_search(&options).unwrap();
1154 assert!(!results.is_empty());
1155 }
1156
1157 #[test]
1158 fn test_regex_search_fixed_string() {
1159 let temp_dir = TempDir::new().unwrap();
1160 create_test_files(temp_dir.path());
1161
1162 let options = SearchOptions {
1163 mode: SearchMode::Regex,
1164 query: "fn main()".to_string(),
1165 path: temp_dir.path().to_path_buf(),
1166 recursive: true,
1167 fixed_string: true,
1168 ..Default::default()
1169 };
1170
1171 let results = regex_search(&options).unwrap();
1172 assert!(!results.is_empty());
1173 }
1174
1175 #[test]
1176 fn test_regex_search_whole_word() {
1177 let temp_dir = TempDir::new().unwrap();
1178 fs::write(
1179 temp_dir.path().join("word_test.txt"),
1180 "rust rusty rustacean",
1181 )
1182 .unwrap();
1183
1184 let options = SearchOptions {
1185 mode: SearchMode::Regex,
1186 query: "rust".to_string(),
1187 path: temp_dir.path().to_path_buf(),
1188 recursive: true,
1189 whole_word: true,
1190 ..Default::default()
1191 };
1192
1193 let results = regex_search(&options).unwrap();
1194 assert!(!results.is_empty());
1195 }
1197
1198 #[test]
1199 fn test_regex_search_top_k() {
1200 let temp_dir = TempDir::new().unwrap();
1201
1202 for i in 0..10 {
1204 fs::write(
1205 temp_dir.path().join(format!("file{}.txt", i)),
1206 "test content",
1207 )
1208 .unwrap();
1209 }
1210
1211 let options = SearchOptions {
1212 mode: SearchMode::Regex,
1213 query: "test".to_string(),
1214 path: temp_dir.path().to_path_buf(),
1215 recursive: true,
1216 top_k: Some(5),
1217 ..Default::default()
1218 };
1219
1220 let results = regex_search(&options).unwrap();
1221 assert!(results.len() <= 5);
1222 }
1223
1224 #[test]
1225 fn test_regex_search_span_offsets() {
1226 let temp_dir = TempDir::new().unwrap();
1228 let test_file = temp_dir.path().join("spans.txt");
1229 fs::write(&test_file, "test test test\nline two test\ntest end").unwrap();
1230
1231 let options = SearchOptions {
1232 mode: SearchMode::Regex,
1233 query: "test".to_string(),
1234 path: test_file.clone(),
1235 recursive: false,
1236 ..Default::default()
1237 };
1238
1239 let results = regex_search(&options).unwrap();
1240
1241 assert_eq!(results.len(), 5);
1243
1244 let line1_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 1).collect();
1246 assert_eq!(line1_matches.len(), 3);
1247 assert_eq!(line1_matches[0].span.byte_start, 0);
1248 assert_eq!(line1_matches[1].span.byte_start, 5);
1249 assert_eq!(line1_matches[2].span.byte_start, 10);
1250
1251 let line2_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 2).collect();
1253 assert_eq!(line2_matches.len(), 1);
1254 assert_eq!(line2_matches[0].span.byte_start, 24); let mut byte_starts: Vec<_> = results.iter().map(|r| r.span.byte_start).collect();
1258 byte_starts.sort();
1259 byte_starts.dedup();
1260 assert_eq!(byte_starts.len(), 5); }
1262
1263 #[test]
1264 fn test_search_file() {
1265 let temp_dir = TempDir::new().unwrap();
1266 let file_path = temp_dir.path().join("test.txt");
1267 fs::write(
1268 &file_path,
1269 "line 1: hello\nline 2: world\nline 3: rust programming",
1270 )
1271 .unwrap();
1272
1273 let regex = regex::Regex::new("rust").unwrap();
1274 let options = SearchOptions::default();
1275
1276 let results = search_file(®ex, &file_path, &options).unwrap();
1277 assert_eq!(results.len(), 1);
1278 assert_eq!(results[0].span.line_start, 3);
1279 assert!(results[0].preview.contains("rust"));
1280 }
1281
1282 #[test]
1283 fn test_search_file_with_context() {
1284 let temp_dir = TempDir::new().unwrap();
1285 let file_path = temp_dir.path().join("test.txt");
1286 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1287
1288 let regex = regex::Regex::new("target").unwrap();
1289 let options = SearchOptions {
1290 context_lines: 1,
1291 ..Default::default()
1292 };
1293
1294 let results = search_file(®ex, &file_path, &options).unwrap();
1295 assert_eq!(results.len(), 1);
1296
1297 println!("Preview: '{}'", results[0].preview);
1298
1299 assert!(results[0].preview.contains("line 2"));
1302 assert!(results[0].preview.contains("target line"));
1303 assert!(results[0].preview.contains("line 4"));
1304 }
1305
1306 #[tokio::test]
1307 async fn test_search_main_function() {
1308 let temp_dir = TempDir::new().unwrap();
1309 create_test_files(temp_dir.path());
1310
1311 let options = SearchOptions {
1312 mode: SearchMode::Regex,
1313 query: "hello".to_string(),
1314 path: temp_dir.path().to_path_buf(),
1315 recursive: true,
1316 case_insensitive: true,
1317 ..Default::default()
1318 };
1319
1320 let results = search(&options).await.unwrap();
1321 assert!(!results.is_empty());
1322 }
1323}