1use anyhow::Result;
2use ck_core::{CkError, SearchMode, SearchOptions, SearchResult, Span};
3use globset::{Glob, GlobSet, GlobSetBuilder};
4use rayon::prelude::*;
5use regex::{Regex, RegexBuilder};
6use std::collections::HashMap;
7use std::fs;
8use std::path::PathBuf as StdPathBuf;
9use std::path::{Path, PathBuf};
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{STORED, Schema, TEXT, Value};
13use tantivy::{Index, ReloadPolicy, TantivyDocument, doc};
14use walkdir::WalkDir;
15
16mod semantic_v3;
17pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
18
19pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
20pub type IndexingProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21pub type DetailedIndexingProgressCallback = Box<dyn Fn(ck_index::EmbeddingProgress) + Send + Sync>;
22
23fn resolve_content_path(file_path: &Path, repo_root: &Path) -> Result<PathBuf> {
27 if ck_core::pdf::is_pdf_file(file_path) {
28 let cache_path = ck_core::pdf::get_content_cache_path(repo_root, file_path);
30 if !cache_path.exists() {
31 return Err(anyhow::anyhow!(
32 "PDF not preprocessed. Run 'ck --index' first."
33 ));
34 }
35 Ok(cache_path)
36 } else {
37 Ok(file_path.to_path_buf())
39 }
40}
41
42fn read_file_content(file_path: &Path, repo_root: &Path) -> Result<String> {
46 let content_path = resolve_content_path(file_path, repo_root)?;
47 Ok(fs::read_to_string(content_path)?)
48}
49
50async fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
52 let repo_root = find_nearest_index_root(file_path)
54 .unwrap_or_else(|| file_path.parent().unwrap_or(file_path).to_path_buf());
55
56 let content_path = resolve_content_path(file_path, &repo_root)?;
58
59 extract_lines_from_file(&content_path, span.line_start, span.line_end)
61}
62
63fn extract_lines_from_file(file_path: &Path, line_start: usize, line_end: usize) -> Result<String> {
65 use std::io::{BufRead, BufReader};
66
67 if line_start == 0 {
68 return Ok(String::new());
69 }
70
71 let file = fs::File::open(file_path)?;
72 let reader = BufReader::new(file);
73 let mut result = Vec::new();
74
75 let start_idx = line_start.saturating_sub(1);
77 let end_idx = line_end.saturating_sub(1);
78
79 for (current_line, line_result) in reader.lines().enumerate() {
80 if current_line > end_idx {
81 break; }
83
84 let line = line_result?;
85
86 if current_line >= start_idx {
87 result.push(line);
88 }
89 }
90
91 if result.is_empty() && line_start > 0 {
93 return Ok(String::new());
94 }
95
96 Ok(result.join("\n"))
97}
98
99fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
100 let mut current = if path.is_file() {
101 path.parent().unwrap_or(path)
102 } else {
103 path
104 };
105 loop {
106 if current.join(".ck").exists() {
107 return Some(current.to_path_buf());
108 }
109 match current.parent() {
110 Some(parent) => current = parent,
111 None => return None,
112 }
113 }
114}
115
116#[derive(Clone, Debug)]
117pub struct ResolvedModel {
118 pub canonical_name: String,
119 pub alias: String,
120 pub dimensions: usize,
121}
122
123fn find_model_entry<'a>(
124 registry: &'a ck_models::ModelRegistry,
125 key: &str,
126) -> Option<(String, &'a ck_models::ModelConfig)> {
127 if let Some(config) = registry.get_model(key) {
128 return Some((key.to_string(), config));
129 }
130
131 registry
132 .models
133 .iter()
134 .find(|(_, config)| config.name == key)
135 .map(|(alias, config)| (alias.clone(), config))
136}
137
138pub(crate) fn resolve_model_from_root(
139 index_root: &Path,
140 cli_model: Option<&str>,
141) -> Result<ResolvedModel> {
142 use ck_models::ModelRegistry;
143
144 let registry = ModelRegistry::default();
145 let index_dir = index_root.join(".ck");
146 let manifest_path = index_dir.join("manifest.json");
147
148 if manifest_path.exists() {
149 let data = std::fs::read(&manifest_path)?;
150 let manifest: ck_index::IndexManifest = serde_json::from_slice(&data)?;
151
152 if let Some(existing_model) = manifest.embedding_model {
153 let (alias, config_opt) = find_model_entry(®istry, &existing_model)
154 .map(|(alias, config)| (alias, Some(config)))
155 .unwrap_or_else(|| (existing_model.clone(), None));
156
157 let dims = manifest
158 .embedding_dimensions
159 .or_else(|| config_opt.map(|c| c.dimensions))
160 .unwrap_or(384);
161
162 if let Some(requested) = cli_model {
163 let (_, requested_config) =
164 find_model_entry(®istry, requested).ok_or_else(|| {
165 CkError::Embedding(format!(
166 "Unknown model '{}'. Available models: {}",
167 requested,
168 registry
169 .models
170 .keys()
171 .cloned()
172 .collect::<Vec<_>>()
173 .join(", ")
174 ))
175 })?;
176
177 if requested_config.name != existing_model {
178 let suggested_alias = alias.clone();
179 return Err(CkError::Embedding(format!(
180 "Index was built with embedding model '{}' (alias '{}'), but '--model {}' was requested. To switch models run `ck --clean .` then `ck --index --model {}`. To keep using this index rerun your command with '--model {}'.",
181 existing_model,
182 suggested_alias,
183 requested,
184 requested,
185 suggested_alias
186 ))
187 .into());
188 }
189 }
190
191 return Ok(ResolvedModel {
192 canonical_name: existing_model,
193 alias,
194 dimensions: dims,
195 });
196 }
197 }
198
199 let (alias, config) = if let Some(requested) = cli_model {
200 find_model_entry(®istry, requested).ok_or_else(|| {
201 CkError::Embedding(format!(
202 "Unknown model '{}'. Available models: {}",
203 requested,
204 registry
205 .models
206 .keys()
207 .cloned()
208 .collect::<Vec<_>>()
209 .join(", ")
210 ))
211 })?
212 } else {
213 let alias = registry.default_model.clone();
214 let config = registry.get_default_model().ok_or_else(|| {
215 CkError::Embedding("No default embedding model configured".to_string())
216 })?;
217 (alias, config)
218 };
219
220 Ok(ResolvedModel {
221 canonical_name: config.name.clone(),
222 alias,
223 dimensions: config.dimensions,
224 })
225}
226
227pub fn resolve_model_for_path(path: &Path, cli_model: Option<&str>) -> Result<ResolvedModel> {
228 let index_root = find_nearest_index_root(path).unwrap_or_else(|| {
229 if path.is_file() {
230 path.parent().unwrap_or(path).to_path_buf()
231 } else {
232 path.to_path_buf()
233 }
234 });
235 resolve_model_from_root(&index_root, cli_model)
236}
237
238pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
239 let results = search_enhanced(options).await?;
240 Ok(results.matches)
241}
242
243pub async fn search_with_progress(
244 options: &SearchOptions,
245 progress_callback: Option<SearchProgressCallback>,
246) -> Result<Vec<SearchResult>> {
247 let results = search_enhanced_with_progress(options, progress_callback).await?;
248 Ok(results.matches)
249}
250
251pub async fn search_enhanced(options: &SearchOptions) -> Result<ck_core::SearchResults> {
253 search_enhanced_with_progress(options, None).await
254}
255
256pub async fn search_enhanced_with_progress(
258 options: &SearchOptions,
259 progress_callback: Option<SearchProgressCallback>,
260) -> Result<ck_core::SearchResults> {
261 search_enhanced_with_indexing_progress(options, progress_callback, None, None).await
262}
263
264pub async fn search_enhanced_with_indexing_progress(
266 options: &SearchOptions,
267 progress_callback: Option<SearchProgressCallback>,
268 indexing_progress_callback: Option<IndexingProgressCallback>,
269 detailed_indexing_progress_callback: Option<DetailedIndexingProgressCallback>,
270) -> Result<ck_core::SearchResults> {
271 if !options.path.exists() {
273 return Err(ck_core::CkError::Search(format!(
274 "Path does not exist: {}",
275 options.path.display()
276 ))
277 .into());
278 }
279
280 if !matches!(options.mode, SearchMode::Regex) {
282 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
283 ensure_index_updated_with_progress(
284 &options.path,
285 options.reindex,
286 need_embeddings,
287 indexing_progress_callback,
288 detailed_indexing_progress_callback,
289 options.respect_gitignore,
290 &options.exclude_patterns,
291 options.embedding_model.as_deref(),
292 )
293 .await?;
294 }
295
296 let search_results = match options.mode {
297 SearchMode::Regex => {
298 let matches = regex_search(options)?;
299 ck_core::SearchResults {
300 matches,
301 closest_below_threshold: None,
302 }
303 }
304 SearchMode::Lexical => {
305 let matches = lexical_search(options).await?;
306 ck_core::SearchResults {
307 matches,
308 closest_below_threshold: None,
309 }
310 }
311 SearchMode::Semantic => {
312 semantic_search_v3_with_progress(options, progress_callback).await?
314 }
315 SearchMode::Hybrid => {
316 let matches = hybrid_search_with_progress(options, progress_callback).await?;
317 ck_core::SearchResults {
318 matches,
319 closest_below_threshold: None,
320 }
321 }
322 };
323
324 Ok(search_results)
325}
326
327fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
328 let pattern = if options.fixed_string {
329 regex::escape(&options.query)
330 } else if options.whole_word {
331 format!(r"\b{}\b", regex::escape(&options.query))
332 } else {
333 options.query.clone()
334 };
335
336 let regex = RegexBuilder::new(&pattern)
337 .case_insensitive(options.case_insensitive)
338 .build()
339 .map_err(CkError::Regex)?;
340
341 let should_recurse = options.path.is_dir() || options.recursive;
343 let files = if should_recurse {
344 ck_index::collect_files(
346 &options.path,
347 options.respect_gitignore,
348 &options.exclude_patterns,
349 )?
350 } else {
351 collect_files(&options.path, should_recurse, &options.exclude_patterns)?
353 };
354
355 let results: Vec<Vec<SearchResult>> = files
356 .par_iter()
357 .filter_map(|file_path| match search_file(®ex, file_path, options) {
358 Ok(matches) => {
359 if matches.is_empty() {
360 None
361 } else {
362 Some(matches)
363 }
364 }
365 Err(e) => {
366 tracing::debug!("Error searching {:?}: {}", file_path, e);
367 None
368 }
369 })
370 .collect();
371
372 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
373 all_results.sort_by(|a, b| {
375 let path_cmp = a.file.cmp(&b.file);
376 if path_cmp != std::cmp::Ordering::Equal {
377 return path_cmp;
378 }
379 a.span.line_start.cmp(&b.span.line_start)
380 });
381
382 if let Some(top_k) = options.top_k {
383 all_results.truncate(top_k);
384 }
385
386 Ok(all_results)
387}
388
389fn search_file(
390 regex: &Regex,
391 file_path: &Path,
392 options: &SearchOptions,
393) -> Result<Vec<SearchResult>> {
394 let repo_root = find_nearest_index_root(file_path)
396 .unwrap_or_else(|| file_path.parent().unwrap_or(file_path).to_path_buf());
397
398 if options.full_section || options.context_lines > 0 {
402 let content = read_file_content(file_path, &repo_root)?;
404 let lines: Vec<String> = content.lines().map(|s| s.to_string()).collect();
405
406 let code_sections = if options.full_section {
408 extract_code_sections(file_path, &content)
409 } else {
410 None
411 };
412
413 search_file_in_memory(regex, file_path, options, &lines, &code_sections)
414 } else {
415 search_file_streaming(regex, file_path, &repo_root, options)
417 }
418}
419
420fn search_file_in_memory(
422 regex: &Regex,
423 file_path: &Path,
424 options: &SearchOptions,
425 lines: &[String],
426 code_sections: &Option<Vec<(usize, usize, String)>>,
427) -> Result<Vec<SearchResult>> {
428 let mut results = Vec::new();
429 let mut byte_offset = 0;
430
431 for (line_idx, line) in lines.iter().enumerate() {
432 let line_number = line_idx + 1;
433
434 if regex.as_str().is_empty() {
437 let preview = if options.full_section {
439 if let Some(sections) = code_sections {
441 if let Some(section) = find_containing_section(sections, line_idx) {
442 section.clone()
443 } else {
444 get_context_preview(lines, line_idx, options)
446 }
447 } else {
448 get_context_preview(lines, line_idx, options)
449 }
450 } else {
451 get_context_preview(lines, line_idx, options)
452 };
453
454 results.push(SearchResult {
455 file: file_path.to_path_buf(),
456 span: Span {
457 byte_start: byte_offset,
458 byte_end: byte_offset + line.len(),
459 line_start: line_number,
460 line_end: line_number,
461 },
462 score: 1.0,
463 preview,
464 lang: ck_core::Language::from_path(file_path),
465 symbol: None,
466 chunk_hash: None,
467 index_epoch: None,
468 });
469 } else {
470 for mat in regex.find_iter(line) {
472 let preview = if options.full_section {
473 if let Some(sections) = code_sections {
475 if let Some(section) = find_containing_section(sections, line_idx) {
476 section.clone()
477 } else {
478 get_context_preview(lines, line_idx, options)
480 }
481 } else {
482 get_context_preview(lines, line_idx, options)
483 }
484 } else {
485 get_context_preview(lines, line_idx, options)
486 };
487
488 results.push(SearchResult {
489 file: file_path.to_path_buf(),
490 span: Span {
491 byte_start: byte_offset + mat.start(),
492 byte_end: byte_offset + mat.end(),
493 line_start: line_number,
494 line_end: line_number,
495 },
496 score: 1.0,
497 preview,
498 lang: ck_core::Language::from_path(file_path),
499 symbol: None,
500 chunk_hash: None,
501 index_epoch: None,
502 });
503 }
504 }
505
506 byte_offset += line.len();
508 if line_idx < lines.len() - 1 {
509 byte_offset += 1; }
511 }
512
513 Ok(results)
514}
515
516fn search_file_streaming(
518 regex: &Regex,
519 file_path: &Path,
520 repo_root: &Path,
521 _options: &SearchOptions,
522) -> Result<Vec<SearchResult>> {
523 use std::io::{BufRead, BufReader};
524
525 let content_path = resolve_content_path(file_path, repo_root)?;
526 let file = std::fs::File::open(&content_path)?;
527 let reader = BufReader::new(file);
528
529 let mut results = Vec::new();
530 let mut byte_offset = 0;
531
532 for (line_idx, line_result) in reader.lines().enumerate() {
533 let line = line_result?;
534 let line_number = line_idx + 1;
535
536 if regex.as_str().is_empty() {
538 results.push(SearchResult {
539 file: file_path.to_path_buf(),
540 span: Span {
541 byte_start: byte_offset,
542 byte_end: byte_offset + line.len(),
543 line_start: line_number,
544 line_end: line_number,
545 },
546 score: 1.0,
547 preview: line.clone(), lang: ck_core::Language::from_path(file_path),
549 symbol: None,
550 chunk_hash: None,
551 index_epoch: None,
552 });
553 } else {
554 for mat in regex.find_iter(&line) {
556 results.push(SearchResult {
557 file: file_path.to_path_buf(),
558 span: Span {
559 byte_start: byte_offset + mat.start(),
560 byte_end: byte_offset + mat.end(),
561 line_start: line_number,
562 line_end: line_number,
563 },
564 score: 1.0,
565 preview: line.clone(), lang: ck_core::Language::from_path(file_path),
567 symbol: None,
568 chunk_hash: None,
569 index_epoch: None,
570 });
571 }
572 }
573
574 byte_offset += line.len() + 1; }
577
578 Ok(results)
579}
580
581async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
582 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
584 if options.path.is_file() {
585 options.path.parent().unwrap_or(&options.path).to_path_buf()
586 } else {
587 options.path.clone()
588 }
589 });
590
591 let index_dir = index_root.join(".ck");
592 if !index_dir.exists() {
593 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
594 }
595
596 let tantivy_index_path = index_dir.join("tantivy_index");
597
598 if !tantivy_index_path.exists() {
599 return build_tantivy_index(options).await;
600 }
601
602 let mut schema_builder = Schema::builder();
603 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
604 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
605 let _schema = schema_builder.build();
606
607 let index = Index::open_in_dir(&tantivy_index_path)
608 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
609
610 let reader = index
611 .reader_builder()
612 .reload_policy(ReloadPolicy::OnCommitWithDelay)
613 .try_into()
614 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
615
616 let searcher = reader.searcher();
617 let query_parser = QueryParser::for_index(&index, vec![content_field]);
618
619 let query = query_parser
620 .parse_query(&options.query)
621 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
622
623 let top_docs = if let Some(top_k) = options.top_k {
624 searcher.search(&query, &TopDocs::with_limit(top_k))?
625 } else {
626 searcher.search(&query, &TopDocs::with_limit(100))?
627 };
628
629 let mut raw_results = Vec::new();
631 for (_score, doc_address) in top_docs {
632 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
633 let path_text = retrieved_doc
634 .get_first(path_field)
635 .map(|field_value| field_value.as_str().unwrap_or(""))
636 .unwrap_or("");
637 let content_text = retrieved_doc
638 .get_first(content_field)
639 .map(|field_value| field_value.as_str().unwrap_or(""))
640 .unwrap_or("");
641
642 let file_path = PathBuf::from(path_text);
643 let preview = if options.full_section {
644 content_text.to_string()
645 } else {
646 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
647 };
648
649 raw_results.push((
650 _score,
651 SearchResult {
652 file: file_path,
653 span: Span {
654 byte_start: 0,
655 byte_end: content_text.len(),
656 line_start: 1,
657 line_end: content_text.lines().count(),
658 },
659 score: _score,
660 preview,
661 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
662 symbol: None,
663 chunk_hash: None,
664 index_epoch: None,
665 },
666 ));
667 }
668
669 let mut results = Vec::new();
671 if !raw_results.is_empty() {
672 let max_score = raw_results
673 .iter()
674 .map(|(score, _)| *score)
675 .fold(0.0f32, f32::max);
676 if max_score > 0.0 {
677 for (raw_score, mut result) in raw_results {
678 let normalized_score = raw_score / max_score;
679
680 if let Some(threshold) = options.threshold
682 && normalized_score < threshold
683 {
684 continue;
685 }
686
687 result.score = normalized_score;
688 results.push(result);
689 }
690 }
691 }
692
693 Ok(results)
694}
695
696async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
697 let index_root = if options.path.is_file() {
699 options.path.parent().unwrap_or(&options.path)
700 } else {
701 &options.path
702 };
703
704 let index_dir = index_root.join(".ck");
705 let tantivy_index_path = index_dir.join("tantivy_index");
706
707 fs::create_dir_all(&tantivy_index_path)?;
708
709 let mut schema_builder = Schema::builder();
710 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
711 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
712 let schema = schema_builder.build();
713
714 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
715 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
716
717 let mut index_writer = index
718 .writer(50_000_000)
719 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
720
721 let files = collect_files(index_root, true, &options.exclude_patterns)?;
722
723 for file_path in &files {
724 if let Ok(content) = fs::read_to_string(file_path) {
725 let doc = doc!(
726 content_field => content,
727 path_field => file_path.display().to_string()
728 );
729 index_writer.add_document(doc)?;
730 }
731 }
732
733 index_writer
734 .commit()
735 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
736
737 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
739 let mut schema_builder = Schema::builder();
740 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
741 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
742 let _schema = schema_builder.build();
743
744 let index = Index::open_in_dir(&tantivy_index_path)
745 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
746
747 let reader = index
748 .reader_builder()
749 .reload_policy(ReloadPolicy::OnCommitWithDelay)
750 .try_into()
751 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
752
753 let searcher = reader.searcher();
754 let query_parser = QueryParser::for_index(&index, vec![content_field]);
755
756 let query = query_parser
757 .parse_query(&options.query)
758 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
759
760 let top_docs = if let Some(top_k) = options.top_k {
761 searcher.search(&query, &TopDocs::with_limit(top_k))?
762 } else {
763 searcher.search(&query, &TopDocs::with_limit(100))?
764 };
765
766 let mut raw_results = Vec::new();
768 for (_score, doc_address) in top_docs {
769 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
770 let path_text = retrieved_doc
771 .get_first(path_field)
772 .map(|field_value| field_value.as_str().unwrap_or(""))
773 .unwrap_or("");
774 let content_text = retrieved_doc
775 .get_first(content_field)
776 .map(|field_value| field_value.as_str().unwrap_or(""))
777 .unwrap_or("");
778
779 let file_path = PathBuf::from(path_text);
780 let preview = if options.full_section {
781 content_text.to_string()
782 } else {
783 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
784 };
785
786 raw_results.push((
787 _score,
788 SearchResult {
789 file: file_path,
790 span: Span {
791 byte_start: 0,
792 byte_end: content_text.len(),
793 line_start: 1,
794 line_end: content_text.lines().count(),
795 },
796 score: _score,
797 preview,
798 lang: ck_core::Language::from_path(&PathBuf::from(path_text)),
799 symbol: None,
800 chunk_hash: None,
801 index_epoch: None,
802 },
803 ));
804 }
805
806 let mut results = Vec::new();
808 if !raw_results.is_empty() {
809 let max_score = raw_results
810 .iter()
811 .map(|(score, _)| *score)
812 .fold(0.0f32, f32::max);
813 if max_score > 0.0 {
814 for (raw_score, mut result) in raw_results {
815 let normalized_score = raw_score / max_score;
816
817 if let Some(threshold) = options.threshold
819 && normalized_score < threshold
820 {
821 continue;
822 }
823
824 result.score = normalized_score;
825 results.push(result);
826 }
827 }
828 }
829
830 Ok(results)
831}
832
833#[allow(dead_code)]
834async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
835 hybrid_search_with_progress(options, None).await
836}
837
838async fn hybrid_search_with_progress(
839 options: &SearchOptions,
840 progress_callback: Option<SearchProgressCallback>,
841) -> Result<Vec<SearchResult>> {
842 if let Some(ref callback) = progress_callback {
843 callback("Running regex search...");
844 }
845 let regex_results = regex_search(options)?;
846
847 if let Some(ref callback) = progress_callback {
848 callback("Running semantic search...");
849 }
850 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
851
852 let mut combined = HashMap::new();
853
854 for (rank, result) in regex_results.iter().enumerate() {
855 let key = format!("{}:{}", result.file.display(), result.span.line_start);
856 combined
857 .entry(key)
858 .or_insert(Vec::new())
859 .push((rank + 1, result.clone()));
860 }
861
862 for (rank, result) in semantic_results.matches.iter().enumerate() {
863 let key = format!("{}:{}", result.file.display(), result.span.line_start);
864 combined
865 .entry(key)
866 .or_insert(Vec::new())
867 .push((rank + 1, result.clone()));
868 }
869
870 let mut rrf_results: Vec<SearchResult> = combined
872 .into_values()
873 .map(|ranks| {
874 let mut result = ranks[0].1.clone();
875 let rrf_score = ranks
876 .iter()
877 .map(|(rank, _)| 1.0 / (60.0 + *rank as f32))
878 .sum();
879 result.score = rrf_score;
880 result
881 })
882 .filter(|result| {
883 if let Some(threshold) = options.threshold {
885 result.score >= threshold
886 } else {
887 true
888 }
889 })
890 .collect();
891
892 rrf_results.sort_by(|a, b| {
894 b.score
895 .partial_cmp(&a.score)
896 .unwrap_or(std::cmp::Ordering::Equal)
897 });
898
899 if let Some(top_k) = options.top_k {
900 rrf_results.truncate(top_k);
901 }
902
903 Ok(rrf_results)
904}
905
906fn build_globset(patterns: &[String]) -> GlobSet {
907 let mut builder = GlobSetBuilder::new();
908 for pat in patterns {
909 if let Ok(glob) = Glob::new(pat) {
911 builder.add(glob);
912 }
913 }
914 builder.build().unwrap_or_else(|_| GlobSet::empty())
915}
916
917fn should_exclude_path(path: &Path, exclude_patterns: &[String]) -> bool {
918 let globset = build_globset(exclude_patterns);
919 if globset.is_match(path) {
921 return true;
922 }
923 for component in path.components() {
924 if let std::path::Component::Normal(name) = component
925 && globset.is_match(name)
926 {
927 return true;
928 }
929 }
930 false
931}
932
933fn collect_files(
934 path: &Path,
935 recursive: bool,
936 exclude_patterns: &[String],
937) -> Result<Vec<PathBuf>> {
938 let mut files = Vec::new();
939 let globset = build_globset(exclude_patterns);
940
941 if path.is_file() {
942 files.push(path.to_path_buf());
944 } else if recursive {
945 for entry in WalkDir::new(path).into_iter().filter_entry(|e| {
946 let name = e.file_name();
948 !globset.is_match(e.path()) && !globset.is_match(name)
949 }) {
950 match entry {
951 Ok(entry) => {
952 if entry.file_type().is_file()
953 && !should_exclude_path(entry.path(), exclude_patterns)
954 {
955 files.push(entry.path().to_path_buf());
956 }
957 }
958 Err(e) => {
959 tracing::debug!("Skipping path due to error: {}", e);
961 continue;
962 }
963 }
964 }
965 } else {
966 match fs::read_dir(path) {
967 Ok(read_dir) => {
968 for entry in read_dir {
969 match entry {
970 Ok(entry) => {
971 let path = entry.path();
972 if path.is_file() && !should_exclude_path(&path, exclude_patterns) {
973 files.push(path);
974 }
975 }
976 Err(e) => {
977 tracing::debug!("Skipping directory entry due to error: {}", e);
978 continue;
979 }
980 }
981 }
982 }
983 Err(e) => {
984 tracing::debug!("Cannot read directory {:?}: {}", path, e);
985 return Err(e.into());
986 }
987 }
988 }
989
990 Ok(files)
991}
992
993#[allow(clippy::too_many_arguments)]
994async fn ensure_index_updated_with_progress(
995 path: &Path,
996 force_reindex: bool,
997 need_embeddings: bool,
998 progress_callback: Option<ck_index::ProgressCallback>,
999 detailed_progress_callback: Option<ck_index::DetailedProgressCallback>,
1000 respect_gitignore: bool,
1001 exclude_patterns: &[String],
1002 model_override: Option<&str>,
1003) -> Result<()> {
1004 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
1006 if path.is_file() {
1007 path.parent().unwrap_or(path).to_path_buf()
1008 } else {
1009 path.to_path_buf()
1010 }
1011 });
1012 let index_root = &index_root_buf;
1013
1014 if force_reindex {
1016 let stats = ck_index::smart_update_index_with_detailed_progress(
1017 index_root,
1018 false,
1019 progress_callback,
1020 detailed_progress_callback,
1021 need_embeddings,
1022 respect_gitignore,
1023 exclude_patterns, model_override,
1025 )
1026 .await?;
1027 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1028 tracing::info!(
1029 "Index updated: {} files indexed, {} orphaned files removed",
1030 stats.files_indexed,
1031 stats.orphaned_files_removed
1032 );
1033 }
1034 return Ok(());
1035 }
1036
1037 let stats = ck_index::smart_update_index_with_detailed_progress(
1039 index_root,
1040 false,
1041 progress_callback,
1042 detailed_progress_callback,
1043 need_embeddings,
1044 respect_gitignore,
1045 exclude_patterns,
1046 model_override,
1047 )
1048 .await?;
1049 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
1050 tracing::info!(
1051 "Index updated: {} files indexed, {} orphaned files removed",
1052 stats.files_indexed,
1053 stats.orphaned_files_removed
1054 );
1055 }
1056
1057 Ok(())
1058}
1059
1060fn get_context_preview(lines: &[String], line_idx: usize, options: &SearchOptions) -> String {
1061 let before = options.before_context_lines.max(options.context_lines);
1062 let after = options.after_context_lines.max(options.context_lines);
1063
1064 if before > 0 || after > 0 {
1065 let start_idx = line_idx.saturating_sub(before);
1066 let end_idx = (line_idx + after + 1).min(lines.len());
1067 lines[start_idx..end_idx].join("\n")
1068 } else {
1069 lines[line_idx].to_string()
1070 }
1071}
1072
1073fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
1074 let lang = ck_core::Language::from_path(file_path)?;
1075
1076 if let Ok(chunks) = ck_chunk::chunk_text(content, Some(lang)) {
1078 let sections: Vec<(usize, usize, String)> = chunks
1079 .into_iter()
1080 .filter(|chunk| {
1081 matches!(
1082 chunk.chunk_type,
1083 ck_chunk::ChunkType::Function
1084 | ck_chunk::ChunkType::Class
1085 | ck_chunk::ChunkType::Method
1086 )
1087 })
1088 .map(|chunk| {
1089 (
1090 chunk.span.line_start - 1, chunk.span.line_end - 1,
1092 chunk.text,
1093 )
1094 })
1095 .collect();
1096
1097 if sections.is_empty() {
1098 None
1099 } else {
1100 Some(sections)
1101 }
1102 } else {
1103 None
1104 }
1105}
1106
1107fn find_containing_section(
1108 sections: &[(usize, usize, String)],
1109 line_idx: usize,
1110) -> Option<&String> {
1111 for (start, end, text) in sections {
1112 if line_idx >= *start && line_idx <= *end {
1113 return Some(text);
1114 }
1115 }
1116 None
1117}
1118
1119#[cfg(test)]
1120mod tests {
1121 use super::*;
1122 use std::fs;
1123 use tempfile::TempDir;
1124
1125 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
1126 let files = vec![
1127 ("test1.txt", "hello world rust programming"),
1128 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
1129 ("test3.py", "print('Hello Python')"),
1130 ("test4.txt", "machine learning artificial intelligence"),
1131 ];
1132
1133 let mut paths = Vec::new();
1134 for (name, content) in files {
1135 let path = dir.join(name);
1136 fs::write(&path, content).unwrap();
1137 paths.push(path);
1138 }
1139 paths
1140 }
1141
1142 #[test]
1143 fn test_extract_lines_from_file() {
1144 let temp_dir = TempDir::new().unwrap();
1145 let test_file = temp_dir.path().join("test_lines.txt");
1146
1147 let content =
1149 "Line 1\nLine 2\nLine 3\nLine 4\nLine 5\nLine 6\nLine 7\nLine 8\nLine 9\nLine 10";
1150 fs::write(&test_file, content).unwrap();
1151
1152 let result = extract_lines_from_file(&test_file, 3, 5).unwrap();
1154 assert_eq!(result, "Line 3\nLine 4\nLine 5");
1155
1156 let result = extract_lines_from_file(&test_file, 7, 7).unwrap();
1158 assert_eq!(result, "Line 7");
1159
1160 let result = extract_lines_from_file(&test_file, 8, 100).unwrap();
1162 assert_eq!(result, "Line 8\nLine 9\nLine 10");
1163
1164 let result = extract_lines_from_file(&test_file, 0, 5).unwrap();
1166 assert_eq!(result, "");
1167
1168 let result = extract_lines_from_file(&test_file, 20, 25).unwrap();
1170 assert_eq!(result, "");
1171 }
1172
1173 #[tokio::test]
1174 async fn test_extract_content_from_span() {
1175 let temp_dir = TempDir::new().unwrap();
1176 let test_file = temp_dir.path().join("code.rs");
1177
1178 let content = "fn first() {\n println!(\"First\");\n}\n\nfn second() {\n println!(\"Second\");\n}\n\nfn third() {\n println!(\"Third\");\n}";
1180 fs::write(&test_file, content).unwrap();
1181
1182 let span = ck_core::Span {
1184 byte_start: 0, byte_end: 0, line_start: 5,
1187 line_end: 7,
1188 };
1189
1190 let result = extract_content_from_span(&test_file, &span).await.unwrap();
1191 assert_eq!(result, "fn second() {\n println!(\"Second\");\n}");
1192
1193 let span = ck_core::Span {
1195 byte_start: 0,
1196 byte_end: 0,
1197 line_start: 2,
1198 line_end: 2,
1199 };
1200
1201 let result = extract_content_from_span(&test_file, &span).await.unwrap();
1202 assert_eq!(result, " println!(\"First\");");
1203 }
1204
1205 #[test]
1206 fn test_collect_files() {
1207 let temp_dir = TempDir::new().unwrap();
1208 let test_files = create_test_files(temp_dir.path());
1209
1210 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1212 assert_eq!(files.len(), 4);
1213
1214 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1216 assert_eq!(files.len(), 4);
1217
1218 let files = collect_files(&test_files[0], false, &[]).unwrap();
1220 assert_eq!(files.len(), 1);
1221 assert_eq!(files[0], test_files[0]);
1222 }
1223
1224 #[test]
1225 fn test_regex_search() {
1226 let temp_dir = TempDir::new().unwrap();
1227 create_test_files(temp_dir.path());
1228
1229 let options = SearchOptions {
1230 mode: SearchMode::Regex,
1231 query: "rust".to_string(),
1232 path: temp_dir.path().to_path_buf(),
1233 recursive: true,
1234 ..Default::default()
1235 };
1236
1237 let results = regex_search(&options).unwrap();
1238 assert!(!results.is_empty());
1239
1240 let rust_matches: Vec<_> = results
1242 .iter()
1243 .filter(|r| r.preview.to_lowercase().contains("rust"))
1244 .collect();
1245 assert!(!rust_matches.is_empty());
1246 }
1247
1248 #[test]
1249 fn test_regex_search_case_insensitive() {
1250 let temp_dir = TempDir::new().unwrap();
1251 create_test_files(temp_dir.path());
1252
1253 let options = SearchOptions {
1254 mode: SearchMode::Regex,
1255 query: "HELLO".to_string(),
1256 path: temp_dir.path().to_path_buf(),
1257 recursive: true,
1258 case_insensitive: true,
1259 ..Default::default()
1260 };
1261
1262 let results = regex_search(&options).unwrap();
1263 assert!(!results.is_empty());
1264 }
1265
1266 #[test]
1267 fn test_regex_search_fixed_string() {
1268 let temp_dir = TempDir::new().unwrap();
1269 create_test_files(temp_dir.path());
1270
1271 let options = SearchOptions {
1272 mode: SearchMode::Regex,
1273 query: "fn main()".to_string(),
1274 path: temp_dir.path().to_path_buf(),
1275 recursive: true,
1276 fixed_string: true,
1277 ..Default::default()
1278 };
1279
1280 let results = regex_search(&options).unwrap();
1281 assert!(!results.is_empty());
1282 }
1283
1284 #[test]
1285 fn test_regex_search_whole_word() {
1286 let temp_dir = TempDir::new().unwrap();
1287 fs::write(
1288 temp_dir.path().join("word_test.txt"),
1289 "rust rusty rustacean",
1290 )
1291 .unwrap();
1292
1293 let options = SearchOptions {
1294 mode: SearchMode::Regex,
1295 query: "rust".to_string(),
1296 path: temp_dir.path().to_path_buf(),
1297 recursive: true,
1298 whole_word: true,
1299 ..Default::default()
1300 };
1301
1302 let results = regex_search(&options).unwrap();
1303 assert!(!results.is_empty());
1304 }
1306
1307 #[test]
1308 fn test_regex_search_top_k() {
1309 let temp_dir = TempDir::new().unwrap();
1310
1311 for i in 0..10 {
1313 fs::write(
1314 temp_dir.path().join(format!("file{}.txt", i)),
1315 "test content",
1316 )
1317 .unwrap();
1318 }
1319
1320 let options = SearchOptions {
1321 mode: SearchMode::Regex,
1322 query: "test".to_string(),
1323 path: temp_dir.path().to_path_buf(),
1324 recursive: true,
1325 top_k: Some(5),
1326 ..Default::default()
1327 };
1328
1329 let results = regex_search(&options).unwrap();
1330 assert!(results.len() <= 5);
1331 }
1332
1333 #[test]
1334 fn test_regex_search_span_offsets() {
1335 let temp_dir = TempDir::new().unwrap();
1337 let test_file = temp_dir.path().join("spans.txt");
1338 fs::write(&test_file, "test test test\nline two test\ntest end").unwrap();
1339
1340 let options = SearchOptions {
1341 mode: SearchMode::Regex,
1342 query: "test".to_string(),
1343 path: test_file.clone(),
1344 recursive: false,
1345 ..Default::default()
1346 };
1347
1348 let results = regex_search(&options).unwrap();
1349
1350 assert_eq!(results.len(), 5);
1352
1353 let line1_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 1).collect();
1355 assert_eq!(line1_matches.len(), 3);
1356 assert_eq!(line1_matches[0].span.byte_start, 0);
1357 assert_eq!(line1_matches[1].span.byte_start, 5);
1358 assert_eq!(line1_matches[2].span.byte_start, 10);
1359
1360 let line2_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 2).collect();
1362 assert_eq!(line2_matches.len(), 1);
1363 assert_eq!(line2_matches[0].span.byte_start, 24); let mut byte_starts: Vec<_> = results.iter().map(|r| r.span.byte_start).collect();
1367 byte_starts.sort();
1368 byte_starts.dedup();
1369 assert_eq!(byte_starts.len(), 5); }
1371
1372 #[test]
1373 fn test_search_file() {
1374 let temp_dir = TempDir::new().unwrap();
1375 let file_path = temp_dir.path().join("test.txt");
1376 fs::write(
1377 &file_path,
1378 "line 1: hello\nline 2: world\nline 3: rust programming",
1379 )
1380 .unwrap();
1381
1382 let regex = regex::Regex::new("rust").unwrap();
1383 let options = SearchOptions::default();
1384
1385 let results = search_file(®ex, &file_path, &options).unwrap();
1386 assert_eq!(results.len(), 1);
1387 assert_eq!(results[0].span.line_start, 3);
1388 assert!(results[0].preview.contains("rust"));
1389 }
1390
1391 #[test]
1392 fn test_search_file_with_context() {
1393 let temp_dir = TempDir::new().unwrap();
1394 let file_path = temp_dir.path().join("test.txt");
1395 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1396
1397 let regex = regex::Regex::new("target").unwrap();
1398 let options = SearchOptions {
1399 context_lines: 1,
1400 ..Default::default()
1401 };
1402
1403 let results = search_file(®ex, &file_path, &options).unwrap();
1404 assert_eq!(results.len(), 1);
1405
1406 println!("Preview: '{}'", results[0].preview);
1407
1408 assert!(results[0].preview.contains("line 2"));
1411 assert!(results[0].preview.contains("target line"));
1412 assert!(results[0].preview.contains("line 4"));
1413 }
1414
1415 #[tokio::test]
1416 async fn test_search_main_function() {
1417 let temp_dir = TempDir::new().unwrap();
1418 create_test_files(temp_dir.path());
1419
1420 let options = SearchOptions {
1421 mode: SearchMode::Regex,
1422 query: "hello".to_string(),
1423 path: temp_dir.path().to_path_buf(),
1424 recursive: true,
1425 case_insensitive: true,
1426 ..Default::default()
1427 };
1428
1429 let results = search(&options).await.unwrap();
1430 assert!(!results.is_empty());
1431 }
1432}