1use anyhow::Result;
2use ck_core::{CkError, SearchMode, SearchOptions, SearchResult, Span};
3use globset::{Glob, GlobSet, GlobSetBuilder};
4use regex::{Regex, RegexBuilder};
5use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8use walkdir::WalkDir;
9use rayon::prelude::*;
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{Schema, STORED, TEXT, Value};
13use tantivy::{doc, Index, ReloadPolicy, TantivyDocument};
14use ck_ann::AnnIndex;
15use std::path::PathBuf as StdPathBuf;
16
17mod semantic_v3;
18pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
19
20pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21
22fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
24 let content = fs::read_to_string(file_path)?;
25 let lines: Vec<&str> = content.lines().collect();
26
27 if span.line_start == 0 || span.line_start > lines.len() {
28 return Ok(String::new());
29 }
30
31 let start_idx = span.line_start - 1; let end_idx = (span.line_end - 1).min(lines.len().saturating_sub(1));
33
34 if start_idx <= end_idx {
35 Ok(lines[start_idx..=end_idx].join("\n"))
36 } else {
37 Ok(lines[start_idx].to_string())
38 }
39}
40
41fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
42 let mut current = if path.is_file() { path.parent().unwrap_or(path) } else { path };
43 loop {
44 if current.join(".ck").exists() {
45 return Some(current.to_path_buf());
46 }
47 match current.parent() {
48 Some(parent) => current = parent,
49 None => return None,
50 }
51 }
52}
53
54pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
55 search_with_progress(options, None).await
56}
57
58pub async fn search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
59 if !options.path.exists() {
61 return Err(ck_core::CkError::Search(format!("Path does not exist: {}", options.path.display())).into());
62 }
63
64 if !matches!(options.mode, SearchMode::Regex) {
66 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
67 ensure_index_updated(&options.path, options.reindex, need_embeddings).await?;
68 }
69
70 match options.mode {
71 SearchMode::Regex => regex_search(options),
72 SearchMode::Lexical => lexical_search(options).await,
73 SearchMode::Semantic => {
74 semantic_search_v3_with_progress(options, progress_callback).await
76 },
77 SearchMode::Hybrid => hybrid_search_with_progress(options, progress_callback).await,
78 }
79}
80
81fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
82 let pattern = if options.fixed_string {
83 regex::escape(&options.query)
84 } else if options.whole_word {
85 format!(r"\b{}\b", regex::escape(&options.query))
86 } else {
87 options.query.clone()
88 };
89
90 let regex = RegexBuilder::new(&pattern)
91 .case_insensitive(options.case_insensitive)
92 .build()
93 .map_err(|e| CkError::Regex(e))?;
94
95 let should_recurse = options.path.is_dir() || options.recursive;
97 let files = collect_files(&options.path, should_recurse, &options.exclude_patterns)?;
98
99 let results: Vec<Vec<SearchResult>> = files
100 .par_iter()
101 .filter_map(|file_path| {
102 match search_file(®ex, file_path, options) {
103 Ok(matches) => {
104 if matches.is_empty() {
105 None
106 } else {
107 Some(matches)
108 }
109 }
110 Err(e) => {
111 tracing::debug!("Error searching {:?}: {}", file_path, e);
112 None
113 }
114 }
115 })
116 .collect();
117
118 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
119 all_results.sort_by(|a, b| {
121 let path_cmp = a.file.cmp(&b.file);
122 if path_cmp != std::cmp::Ordering::Equal {
123 return path_cmp;
124 }
125 a.span.line_start.cmp(&b.span.line_start)
126 });
127
128 if let Some(top_k) = options.top_k {
129 all_results.truncate(top_k);
130 }
131
132 Ok(all_results)
133}
134
135fn search_file(regex: &Regex, file_path: &Path, options: &SearchOptions) -> Result<Vec<SearchResult>> {
136 let content = fs::read_to_string(file_path)?;
137 let lines: Vec<&str> = content.lines().collect();
138 let mut results = Vec::new();
139
140 let code_sections = if options.full_section {
142 extract_code_sections(file_path, &content)
143 } else {
144 None
145 };
146
147 for (line_idx, line) in lines.iter().enumerate() {
148 let line_number = line_idx + 1;
149
150 if regex.is_match(line) {
151 let preview = if options.full_section {
152 if let Some(ref sections) = code_sections {
154 if let Some(section) = find_containing_section(sections, line_idx) {
155 section.clone()
156 } else {
157 get_context_preview(&lines, line_idx, options)
159 }
160 } else {
161 get_context_preview(&lines, line_idx, options)
162 }
163 } else {
164 get_context_preview(&lines, line_idx, options)
165 };
166
167 results.push(SearchResult {
168 file: file_path.to_path_buf(),
169 span: Span {
170 byte_start: 0,
171 byte_end: line.len(),
172 line_start: line_number,
173 line_end: line_number,
174 },
175 score: 1.0,
176 preview,
177 lang: detect_language(file_path),
178 symbol: None,
179 });
180 }
181 }
182
183 Ok(results)
184}
185
186async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
187 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
189 if options.path.is_file() {
190 options.path.parent().unwrap_or(&options.path).to_path_buf()
191 } else {
192 options.path.clone()
193 }
194 });
195
196 let index_dir = index_root.join(".ck");
197 if !index_dir.exists() {
198 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
199 }
200
201 let tantivy_index_path = index_dir.join("tantivy_index");
202
203 if !tantivy_index_path.exists() {
204 return build_tantivy_index(options).await;
205 }
206
207 let mut schema_builder = Schema::builder();
208 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
209 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
210 let _schema = schema_builder.build();
211
212 let index = Index::open_in_dir(&tantivy_index_path)
213 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
214
215 let reader = index
216 .reader_builder()
217 .reload_policy(ReloadPolicy::OnCommitWithDelay)
218 .try_into()
219 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
220
221 let searcher = reader.searcher();
222 let query_parser = QueryParser::for_index(&index, vec![content_field]);
223
224 let query = query_parser
225 .parse_query(&options.query)
226 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
227
228 let top_docs = if let Some(top_k) = options.top_k {
229 searcher.search(&query, &TopDocs::with_limit(top_k))?
230 } else {
231 searcher.search(&query, &TopDocs::with_limit(100))?
232 };
233
234 let mut raw_results = Vec::new();
236 for (_score, doc_address) in top_docs {
237 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
238 let path_text = retrieved_doc
239 .get_first(path_field)
240 .map(|field_value| field_value.as_str().unwrap_or(""))
241 .unwrap_or("");
242 let content_text = retrieved_doc
243 .get_first(content_field)
244 .map(|field_value| field_value.as_str().unwrap_or(""))
245 .unwrap_or("");
246
247 let file_path = PathBuf::from(path_text);
248 let preview = if options.full_section {
249 content_text.to_string()
250 } else {
251 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
252 };
253
254 raw_results.push((_score, SearchResult {
255 file: file_path,
256 span: Span {
257 byte_start: 0,
258 byte_end: content_text.len(),
259 line_start: 1,
260 line_end: content_text.lines().count(),
261 },
262 score: _score,
263 preview,
264 lang: detect_language(&PathBuf::from(path_text)),
265 symbol: None,
266 }));
267 }
268
269 let mut results = Vec::new();
271 if !raw_results.is_empty() {
272 let max_score = raw_results.iter().map(|(score, _)| *score).fold(0.0f32, f32::max);
273 if max_score > 0.0 {
274 for (raw_score, mut result) in raw_results {
275 let normalized_score = raw_score / max_score;
276
277 if let Some(threshold) = options.threshold {
279 if normalized_score < threshold {
280 continue;
281 }
282 }
283
284 result.score = normalized_score;
285 results.push(result);
286 }
287 }
288 }
289
290 Ok(results)
291}
292
293async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
294 let index_root = if options.path.is_file() {
296 options.path.parent().unwrap_or(&options.path)
297 } else {
298 &options.path
299 };
300
301 let index_dir = index_root.join(".ck");
302 let tantivy_index_path = index_dir.join("tantivy_index");
303
304 fs::create_dir_all(&tantivy_index_path)?;
305
306 let mut schema_builder = Schema::builder();
307 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
308 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
309 let schema = schema_builder.build();
310
311 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
312 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
313
314 let mut index_writer = index.writer(50_000_000)
315 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
316
317 let files = collect_files(&index_root, true, &options.exclude_patterns)?;
318
319 for file_path in &files {
320 if let Ok(content) = fs::read_to_string(file_path) {
321 let doc = doc!(
322 content_field => content,
323 path_field => file_path.display().to_string()
324 );
325 index_writer.add_document(doc)?;
326 }
327 }
328
329 index_writer.commit()
330 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
331
332 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
334 let mut schema_builder = Schema::builder();
335 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
336 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
337 let _schema = schema_builder.build();
338
339 let index = Index::open_in_dir(&tantivy_index_path)
340 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
341
342 let reader = index
343 .reader_builder()
344 .reload_policy(ReloadPolicy::OnCommitWithDelay)
345 .try_into()
346 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
347
348 let searcher = reader.searcher();
349 let query_parser = QueryParser::for_index(&index, vec![content_field]);
350
351 let query = query_parser
352 .parse_query(&options.query)
353 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
354
355 let top_docs = if let Some(top_k) = options.top_k {
356 searcher.search(&query, &TopDocs::with_limit(top_k))?
357 } else {
358 searcher.search(&query, &TopDocs::with_limit(100))?
359 };
360
361 let mut raw_results = Vec::new();
363 for (_score, doc_address) in top_docs {
364 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
365 let path_text = retrieved_doc
366 .get_first(path_field)
367 .map(|field_value| field_value.as_str().unwrap_or(""))
368 .unwrap_or("");
369 let content_text = retrieved_doc
370 .get_first(content_field)
371 .map(|field_value| field_value.as_str().unwrap_or(""))
372 .unwrap_or("");
373
374 let file_path = PathBuf::from(path_text);
375 let preview = if options.full_section {
376 content_text.to_string()
377 } else {
378 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
379 };
380
381 raw_results.push((_score, SearchResult {
382 file: file_path,
383 span: Span {
384 byte_start: 0,
385 byte_end: content_text.len(),
386 line_start: 1,
387 line_end: content_text.lines().count(),
388 },
389 score: _score,
390 preview,
391 lang: detect_language(&PathBuf::from(path_text)),
392 symbol: None,
393 }));
394 }
395
396 let mut results = Vec::new();
398 if !raw_results.is_empty() {
399 let max_score = raw_results.iter().map(|(score, _)| *score).fold(0.0f32, f32::max);
400 if max_score > 0.0 {
401 for (raw_score, mut result) in raw_results {
402 let normalized_score = raw_score / max_score;
403
404 if let Some(threshold) = options.threshold {
406 if normalized_score < threshold {
407 continue;
408 }
409 }
410
411 result.score = normalized_score;
412 results.push(result);
413 }
414 }
415 }
416
417 Ok(results)
418}
419
420#[allow(dead_code)]
421async fn semantic_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
422 semantic_search_with_progress(options, None).await
423}
424
425async fn semantic_search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
426 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
428 if options.path.is_file() {
429 options.path.parent().unwrap_or(&options.path).to_path_buf()
430 } else {
431 options.path.clone()
432 }
433 });
434
435 let index_dir = index_root.join(".ck");
436 if !index_dir.exists() {
437 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
438 }
439
440 let ann_index_path = index_dir.join("ann_index.bin");
441 let embeddings_path = index_dir.join("embeddings.json");
442
443 if !ann_index_path.exists() || !embeddings_path.exists() {
444 return build_semantic_index_with_progress(options, progress_callback).await;
445 }
446
447 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
449
450 let embeddings_data = fs::read_to_string(&embeddings_path)?;
452 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
453
454 if let Some(ref callback) = progress_callback {
456 callback("Loading embedding model...");
457 }
458
459 let mut embedder = if let Some(ref callback) = progress_callback {
460 let _cb = callback.as_ref();
461 let model_cb = Box::new(|msg: &str| {
462 eprintln!("Model: {}", msg);
465 }) as ck_embed::ModelDownloadCallback;
466 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), Some(model_cb))?
467 } else {
468 ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?
469 };
470 let query_embeddings = embedder.embed(&[options.query.clone()])?;
471
472 if query_embeddings.is_empty() {
473 return Ok(Vec::new());
474 }
475
476 let query_embedding = &query_embeddings[0];
477
478 let top_k = options.top_k.unwrap_or(10);
480 let similar_docs = ann_index.search(query_embedding, top_k);
481
482 let mut results = Vec::new();
483
484 let filter_by_file = options.path.is_file();
486 let target_file = if filter_by_file {
487 Some(options.path.canonicalize().unwrap_or_else(|_| options.path.clone()))
488 } else {
489 None
490 };
491
492 for (doc_id, similarity) in similar_docs {
493 if let Some(threshold) = options.threshold {
495 if similarity < threshold {
496 continue;
497 }
498 }
499
500 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
501 if let Some(target) = &target_file {
503 let canonical_result = file_path.canonicalize().unwrap_or_else(|_| file_path.clone());
504 if canonical_result != *target {
505 continue; }
507 }
508
509 let preview = if options.full_section {
511 content.clone()
512 } else {
513 content.lines().take(3).collect::<Vec<_>>().join("\n")
514 };
515
516 results.push(SearchResult {
517 file: file_path.clone(),
518 span: Span {
519 byte_start: 0,
520 byte_end: content.len(),
521 line_start: 1,
522 line_end: content.lines().count(),
523 },
524 score: similarity,
525 preview,
526 lang: detect_language(file_path),
527 symbol: None,
528 });
529 }
530 }
531
532 Ok(results)
533}
534
535#[allow(dead_code)]
536async fn build_semantic_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
537 build_semantic_index_with_progress(options, None).await
538}
539
540async fn build_semantic_index_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
541 let index_root = if options.path.is_file() {
543 options.path.parent().unwrap_or(&options.path)
544 } else {
545 &options.path
546 };
547
548 let index_dir = index_root.join(".ck");
549 let ann_index_path = index_dir.join("ann_index.bin");
550 let embeddings_path = index_dir.join("embeddings.json");
551
552 fs::create_dir_all(&index_dir)?;
553
554 if let Some(ref callback) = progress_callback {
555 callback("Building semantic index (no index found)...");
556 }
557
558 eprintln!("Building semantic index (no existing index found)...");
560
561 let files = collect_files(&index_root, true, &options.exclude_patterns)?;
563
564 if let Some(ref callback) = progress_callback {
565 callback(&format!("Found {} files to index", files.len()));
566 }
567 eprintln!("Found {} files to embed and index", files.len());
568
569 let mut file_embeddings = Vec::new();
570 let mut embeddings = Vec::new();
571
572 if let Some(ref callback) = progress_callback {
574 callback("Loading embedding model...");
575 }
576
577 let model_callback = if progress_callback.is_some() {
578 Some(Box::new(|msg: &str| {
579 eprintln!("Model: {}", msg);
580 }) as ck_embed::ModelDownloadCallback)
581 } else {
582 None
583 };
584
585 let mut embedder = ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), model_callback)?;
586
587 if let Some(ref callback) = progress_callback {
588 callback("Generating embeddings for code chunks...");
589 }
590
591 for (file_idx, file_path) in files.iter().enumerate() {
592 if let Ok(content) = fs::read_to_string(file_path) {
593 if let Some(ref callback) = progress_callback {
594 let file_name = file_path.file_name()
595 .map(|n| n.to_string_lossy().to_string())
596 .unwrap_or_else(|| file_path.to_string_lossy().to_string());
597 callback(&format!("Processing {}/{}: {}", file_idx + 1, files.len(), file_name));
598 }
599
600 let chunks = ck_chunk::chunk_text(&content, detect_language(file_path).as_deref())?;
602
603 for chunk in chunks {
604 let chunk_embeddings = embedder.embed(&[chunk.text.clone()])?;
605 if !chunk_embeddings.is_empty() {
606 embeddings.push(chunk_embeddings[0].clone());
607 file_embeddings.push((file_path.clone(), chunk.text));
608 }
609 }
610 }
611 }
612
613 if let Some(ref callback) = progress_callback {
614 callback(&format!("Built {} embeddings, creating search index...", embeddings.len()));
615 }
616 eprintln!("Generated {} embeddings, building search index...", embeddings.len());
617
618 let index = ck_ann::SimpleIndex::build(&embeddings)?;
620 index.save(&ann_index_path)?;
621
622 let embeddings_json = serde_json::to_string(&file_embeddings)?;
624 fs::write(&embeddings_path, embeddings_json)?;
625
626 if let Some(ref callback) = progress_callback {
627 callback("Semantic index built successfully, running search...");
628 }
629 eprintln!("Semantic index built successfully!");
630
631 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
633
634 let embeddings_data = fs::read_to_string(&embeddings_path)?;
636 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
637
638 let mut embedder = ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?;
640 let query_embeddings = embedder.embed(&[options.query.clone()])?;
641
642 if query_embeddings.is_empty() {
643 return Ok(Vec::new());
644 }
645
646 let query_embedding = &query_embeddings[0];
647
648 let top_k = options.top_k.unwrap_or(10);
650 let similar_docs = ann_index.search(query_embedding, top_k);
651
652 let mut results = Vec::new();
653
654 let filter_by_file = options.path.is_file();
656 let target_file = if filter_by_file {
657 Some(options.path.canonicalize().unwrap_or_else(|_| options.path.clone()))
658 } else {
659 None
660 };
661
662 for (doc_id, similarity) in similar_docs {
663 if let Some(threshold) = options.threshold {
665 if similarity < threshold {
666 continue;
667 }
668 }
669
670 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
671 if let Some(target) = &target_file {
673 let canonical_result = file_path.canonicalize().unwrap_or_else(|_| file_path.clone());
674 if canonical_result != *target {
675 continue; }
677 }
678
679 let preview = if options.full_section {
681 content.clone()
682 } else {
683 content.lines().take(3).collect::<Vec<_>>().join("\n")
684 };
685
686 results.push(SearchResult {
687 file: file_path.clone(),
688 span: Span {
689 byte_start: 0,
690 byte_end: content.len(),
691 line_start: 1,
692 line_end: content.lines().count(),
693 },
694 score: similarity,
695 preview,
696 lang: detect_language(file_path),
697 symbol: None,
698 });
699 }
700 }
701
702 Ok(results)
703}
704
705#[allow(dead_code)]
706async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
707 hybrid_search_with_progress(options, None).await
708}
709
710async fn hybrid_search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
711 if let Some(ref callback) = progress_callback {
712 callback("Running regex search...");
713 }
714 let regex_results = regex_search(options)?;
715
716 if let Some(ref callback) = progress_callback {
717 callback("Running semantic search...");
718 }
719 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
720
721 let mut combined = HashMap::new();
722
723 for (rank, result) in regex_results.iter().enumerate() {
724 let key = format!("{}:{}", result.file.display(), result.span.line_start);
725 combined.entry(key).or_insert(Vec::new()).push((rank + 1, result.clone()));
726 }
727
728 for (rank, result) in semantic_results.iter().enumerate() {
729 let key = format!("{}:{}", result.file.display(), result.span.line_start);
730 combined.entry(key).or_insert(Vec::new()).push((rank + 1, result.clone()));
731 }
732
733 let mut rrf_results: Vec<SearchResult> = combined
735 .into_iter()
736 .map(|(_, ranks)| {
737 let mut result = ranks[0].1.clone();
738 let rrf_score = ranks.iter().map(|(rank, _)| 1.0 / (60.0 + *rank as f32)).sum();
739 result.score = rrf_score;
740 result
741 })
742 .filter(|result| {
743 if let Some(threshold) = options.threshold {
745 result.score >= threshold
746 } else {
747 true
748 }
749 })
750 .collect();
751
752 rrf_results.sort_by(|a, b| {
754 b.score
755 .partial_cmp(&a.score)
756 .unwrap_or(std::cmp::Ordering::Equal)
757 });
758
759 if let Some(top_k) = options.top_k {
760 rrf_results.truncate(top_k);
761 }
762
763 Ok(rrf_results)
764}
765
766fn build_globset(patterns: &[String]) -> GlobSet {
767 let mut builder = GlobSetBuilder::new();
768 for pat in patterns {
769 if let Ok(glob) = Glob::new(pat) {
771 builder.add(glob);
772 }
773 }
774 builder.build().unwrap_or_else(|_| GlobSet::empty())
775}
776
777fn should_exclude_path(path: &Path, exclude_patterns: &[String]) -> bool {
778 let globset = build_globset(exclude_patterns);
779 if globset.is_match(path) {
781 return true;
782 }
783 for component in path.components() {
784 if let std::path::Component::Normal(name) = component {
785 if globset.is_match(name) {
786 return true;
787 }
788 }
789 }
790 false
791}
792
793fn collect_files(path: &Path, recursive: bool, exclude_patterns: &[String]) -> Result<Vec<PathBuf>> {
794 let mut files = Vec::new();
795 let globset = build_globset(exclude_patterns);
796
797 if path.is_file() {
798 files.push(path.to_path_buf());
800 } else if recursive {
801 for entry in WalkDir::new(path)
802 .into_iter()
803 .filter_entry(|e| {
804 let name = e.file_name();
806 !globset.is_match(e.path()) && !globset.is_match(name)
807 }) {
808 match entry {
809 Ok(entry) => {
810 if entry.file_type().is_file() && !should_exclude_path(entry.path(), exclude_patterns) {
811 files.push(entry.path().to_path_buf());
812 }
813 }
814 Err(e) => {
815 tracing::debug!("Skipping path due to error: {}", e);
817 continue;
818 }
819 }
820 }
821 } else {
822 match fs::read_dir(path) {
823 Ok(read_dir) => {
824 for entry in read_dir {
825 match entry {
826 Ok(entry) => {
827 let path = entry.path();
828 if path.is_file() && !should_exclude_path(&path, exclude_patterns) {
829 files.push(path);
830 }
831 }
832 Err(e) => {
833 tracing::debug!("Skipping directory entry due to error: {}", e);
834 continue;
835 }
836 }
837 }
838 }
839 Err(e) => {
840 tracing::debug!("Cannot read directory {:?}: {}", path, e);
841 return Err(e.into());
842 }
843 }
844 }
845
846 Ok(files)
847}
848
849fn detect_language(path: &Path) -> Option<String> {
850 path.extension()
851 .and_then(|ext| ext.to_str())
852 .map(|ext| match ext {
853 "rs" => "rust",
854 "py" => "python",
855 "js" => "javascript",
856 "ts" => "typescript",
857 "go" => "go",
858 "java" => "java",
859 "c" => "c",
860 "cpp" | "cc" | "cxx" => "cpp",
861 "h" | "hpp" => "cpp",
862 "cs" => "csharp",
863 "rb" => "ruby",
864 "php" => "php",
865 "swift" => "swift",
866 "kt" => "kotlin",
867 _ => ext,
868 })
869 .map(String::from)
870}
871
872async fn ensure_index_updated(path: &Path, force_reindex: bool, need_embeddings: bool) -> Result<()> {
873
874 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
876 if path.is_file() {
877 path.parent().unwrap_or(path).to_path_buf()
878 } else {
879 path.to_path_buf()
880 }
881 });
882 let index_root = &index_root_buf;
883
884 if force_reindex {
886 let stats = ck_index::smart_update_index_with_progress(index_root, false, None, need_embeddings).await?;
887 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
888 tracing::info!("Index updated: {} files indexed, {} orphaned files removed",
889 stats.files_indexed, stats.orphaned_files_removed);
890 }
891 return Ok(());
892 }
893
894 let stats = ck_index::smart_update_index_with_progress(index_root, false, None, need_embeddings).await?;
896 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
897 tracing::info!("Index updated: {} files indexed, {} orphaned files removed",
898 stats.files_indexed, stats.orphaned_files_removed);
899 }
900
901 Ok(())
902}
903
904fn get_context_preview(lines: &[&str], line_idx: usize, options: &SearchOptions) -> String {
905 let before = options.before_context_lines.max(options.context_lines);
906 let after = options.after_context_lines.max(options.context_lines);
907
908 if before > 0 || after > 0 {
909 let start_idx = line_idx.saturating_sub(before);
910 let end_idx = (line_idx + after + 1).min(lines.len());
911 lines[start_idx..end_idx].join("\n")
912 } else {
913 lines[line_idx].to_string()
914 }
915}
916
917fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
918 let lang = match file_path.extension().and_then(|s| s.to_str()) {
920 Some("py") => Some("python"),
921 Some("js") => Some("javascript"),
922 Some("ts") | Some("tsx") => Some("typescript"),
923 _ => return None,
924 };
925
926 if let Ok(chunks) = ck_chunk::chunk_text(content, lang) {
928 let sections: Vec<(usize, usize, String)> = chunks
929 .into_iter()
930 .filter(|chunk| matches!(
931 chunk.chunk_type,
932 ck_chunk::ChunkType::Function |
933 ck_chunk::ChunkType::Class |
934 ck_chunk::ChunkType::Method
935 ))
936 .map(|chunk| {
937 (
938 chunk.span.line_start - 1, chunk.span.line_end - 1,
940 chunk.text,
941 )
942 })
943 .collect();
944
945 if sections.is_empty() {
946 None
947 } else {
948 Some(sections)
949 }
950 } else {
951 None
952 }
953}
954
955fn find_containing_section(sections: &[(usize, usize, String)], line_idx: usize) -> Option<&String> {
956 for (start, end, text) in sections {
957 if line_idx >= *start && line_idx <= *end {
958 return Some(text);
959 }
960 }
961 None
962}
963
964#[cfg(test)]
965mod tests {
966 use super::*;
967 use std::fs;
968 use tempfile::TempDir;
969
970 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
971 let files = vec![
972 ("test1.txt", "hello world rust programming"),
973 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
974 ("test3.py", "print('Hello Python')"),
975 ("test4.txt", "machine learning artificial intelligence"),
976 ];
977
978 let mut paths = Vec::new();
979 for (name, content) in files {
980 let path = dir.join(name);
981 fs::write(&path, content).unwrap();
982 paths.push(path);
983 }
984 paths
985 }
986
987 #[test]
988 fn test_detect_language() {
989 assert_eq!(detect_language(&PathBuf::from("test.rs")), Some("rust".to_string()));
990 assert_eq!(detect_language(&PathBuf::from("test.py")), Some("python".to_string()));
991 assert_eq!(detect_language(&PathBuf::from("test.js")), Some("javascript".to_string()));
992 assert_eq!(detect_language(&PathBuf::from("test.unknown")), Some("unknown".to_string()));
993 assert_eq!(detect_language(&PathBuf::from("noext")), None);
994 }
995
996 #[test]
997 fn test_collect_files() {
998 let temp_dir = TempDir::new().unwrap();
999 let test_files = create_test_files(temp_dir.path());
1000
1001 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1003 assert_eq!(files.len(), 4);
1004
1005 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1007 assert_eq!(files.len(), 4);
1008
1009 let files = collect_files(&test_files[0], false, &[]).unwrap();
1011 assert_eq!(files.len(), 1);
1012 assert_eq!(files[0], test_files[0]);
1013 }
1014
1015 #[test]
1016 fn test_regex_search() {
1017 let temp_dir = TempDir::new().unwrap();
1018 create_test_files(temp_dir.path());
1019
1020 let options = SearchOptions {
1021 mode: SearchMode::Regex,
1022 query: "rust".to_string(),
1023 path: temp_dir.path().to_path_buf(),
1024 recursive: true,
1025 ..Default::default()
1026 };
1027
1028 let results = regex_search(&options).unwrap();
1029 assert!(!results.is_empty());
1030
1031 let rust_matches: Vec<_> = results.iter()
1033 .filter(|r| r.preview.to_lowercase().contains("rust"))
1034 .collect();
1035 assert!(!rust_matches.is_empty());
1036 }
1037
1038 #[test]
1039 fn test_regex_search_case_insensitive() {
1040 let temp_dir = TempDir::new().unwrap();
1041 create_test_files(temp_dir.path());
1042
1043 let options = SearchOptions {
1044 mode: SearchMode::Regex,
1045 query: "HELLO".to_string(),
1046 path: temp_dir.path().to_path_buf(),
1047 recursive: true,
1048 case_insensitive: true,
1049 ..Default::default()
1050 };
1051
1052 let results = regex_search(&options).unwrap();
1053 assert!(!results.is_empty());
1054 }
1055
1056 #[test]
1057 fn test_regex_search_fixed_string() {
1058 let temp_dir = TempDir::new().unwrap();
1059 create_test_files(temp_dir.path());
1060
1061 let options = SearchOptions {
1062 mode: SearchMode::Regex,
1063 query: "fn main()".to_string(),
1064 path: temp_dir.path().to_path_buf(),
1065 recursive: true,
1066 fixed_string: true,
1067 ..Default::default()
1068 };
1069
1070 let results = regex_search(&options).unwrap();
1071 assert!(!results.is_empty());
1072 }
1073
1074 #[test]
1075 fn test_regex_search_whole_word() {
1076 let temp_dir = TempDir::new().unwrap();
1077 fs::write(temp_dir.path().join("word_test.txt"), "rust rusty rustacean").unwrap();
1078
1079 let options = SearchOptions {
1080 mode: SearchMode::Regex,
1081 query: "rust".to_string(),
1082 path: temp_dir.path().to_path_buf(),
1083 recursive: true,
1084 whole_word: true,
1085 ..Default::default()
1086 };
1087
1088 let results = regex_search(&options).unwrap();
1089 assert!(!results.is_empty());
1090 }
1092
1093 #[test]
1094 fn test_regex_search_top_k() {
1095 let temp_dir = TempDir::new().unwrap();
1096
1097 for i in 0..10 {
1099 fs::write(temp_dir.path().join(format!("file{}.txt", i)), "test content").unwrap();
1100 }
1101
1102 let options = SearchOptions {
1103 mode: SearchMode::Regex,
1104 query: "test".to_string(),
1105 path: temp_dir.path().to_path_buf(),
1106 recursive: true,
1107 top_k: Some(5),
1108 ..Default::default()
1109 };
1110
1111 let results = regex_search(&options).unwrap();
1112 assert!(results.len() <= 5);
1113 }
1114
1115 #[test]
1116 fn test_search_file() {
1117 let temp_dir = TempDir::new().unwrap();
1118 let file_path = temp_dir.path().join("test.txt");
1119 fs::write(&file_path, "line 1: hello\nline 2: world\nline 3: rust programming").unwrap();
1120
1121 let regex = regex::Regex::new("rust").unwrap();
1122 let options = SearchOptions::default();
1123
1124 let results = search_file(®ex, &file_path, &options).unwrap();
1125 assert_eq!(results.len(), 1);
1126 assert_eq!(results[0].span.line_start, 3);
1127 assert!(results[0].preview.contains("rust"));
1128 }
1129
1130 #[test]
1131 fn test_search_file_with_context() {
1132 let temp_dir = TempDir::new().unwrap();
1133 let file_path = temp_dir.path().join("test.txt");
1134 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1135
1136 let regex = regex::Regex::new("target").unwrap();
1137 let options = SearchOptions {
1138 context_lines: 1,
1139 ..Default::default()
1140 };
1141
1142 let results = search_file(®ex, &file_path, &options).unwrap();
1143 assert_eq!(results.len(), 1);
1144
1145 println!("Preview: '{}'", results[0].preview);
1146
1147 assert!(results[0].preview.contains("line 2"));
1150 assert!(results[0].preview.contains("target line"));
1151 assert!(results[0].preview.contains("line 4"));
1152 }
1153
1154 #[tokio::test]
1155 async fn test_search_main_function() {
1156 let temp_dir = TempDir::new().unwrap();
1157 create_test_files(temp_dir.path());
1158
1159 let options = SearchOptions {
1160 mode: SearchMode::Regex,
1161 query: "hello".to_string(),
1162 path: temp_dir.path().to_path_buf(),
1163 recursive: true,
1164 case_insensitive: true,
1165 ..Default::default()
1166 };
1167
1168 let results = search(&options).await.unwrap();
1169 assert!(!results.is_empty());
1170 }
1171}