1use anyhow::Result;
2use ck_core::{CkError, SearchMode, SearchOptions, SearchResult, Span};
3use globset::{Glob, GlobSet, GlobSetBuilder};
4use regex::{Regex, RegexBuilder};
5use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8use walkdir::WalkDir;
9use rayon::prelude::*;
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{Schema, STORED, TEXT, Value};
13use tantivy::{doc, Index, ReloadPolicy, TantivyDocument};
14use ck_ann::AnnIndex;
15use std::path::PathBuf as StdPathBuf;
16
17mod semantic_v3;
18pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
19
20pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21
22fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
24 let content = fs::read_to_string(file_path)?;
25 let lines: Vec<&str> = content.lines().collect();
26
27 if span.line_start == 0 || span.line_start > lines.len() {
28 return Ok(String::new());
29 }
30
31 let start_idx = span.line_start - 1; let end_idx = (span.line_end - 1).min(lines.len().saturating_sub(1));
33
34 if start_idx <= end_idx {
35 Ok(lines[start_idx..=end_idx].join("\n"))
36 } else {
37 Ok(lines[start_idx].to_string())
38 }
39}
40
41fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
42 let mut current = if path.is_file() { path.parent().unwrap_or(path) } else { path };
43 loop {
44 if current.join(".ck").exists() {
45 return Some(current.to_path_buf());
46 }
47 match current.parent() {
48 Some(parent) => current = parent,
49 None => return None,
50 }
51 }
52}
53
54pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
55 search_with_progress(options, None).await
56}
57
58pub async fn search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
59 if !options.path.exists() {
61 return Err(ck_core::CkError::Search(format!("Path does not exist: {}", options.path.display())).into());
62 }
63
64 if !matches!(options.mode, SearchMode::Regex) {
66 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
67 ensure_index_updated(&options.path, options.reindex, need_embeddings).await?;
68 }
69
70 match options.mode {
71 SearchMode::Regex => regex_search(options),
72 SearchMode::Lexical => lexical_search(options).await,
73 SearchMode::Semantic => {
74 semantic_search_v3_with_progress(options, progress_callback).await
76 },
77 SearchMode::Hybrid => hybrid_search_with_progress(options, progress_callback).await,
78 }
79}
80
81fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
82 let pattern = if options.fixed_string {
83 regex::escape(&options.query)
84 } else if options.whole_word {
85 format!(r"\b{}\b", regex::escape(&options.query))
86 } else {
87 options.query.clone()
88 };
89
90 let regex = RegexBuilder::new(&pattern)
91 .case_insensitive(options.case_insensitive)
92 .build()
93 .map_err(|e| CkError::Regex(e))?;
94
95 let files = collect_files(&options.path, options.recursive, &options.exclude_patterns)?;
96
97 let results: Vec<Vec<SearchResult>> = files
98 .par_iter()
99 .filter_map(|file_path| {
100 match search_file(®ex, file_path, options) {
101 Ok(matches) => {
102 if matches.is_empty() {
103 None
104 } else {
105 Some(matches)
106 }
107 }
108 Err(e) => {
109 tracing::debug!("Error searching {:?}: {}", file_path, e);
110 None
111 }
112 }
113 })
114 .collect();
115
116 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
117 all_results.sort_by(|a, b| {
119 let path_cmp = a.file.cmp(&b.file);
120 if path_cmp != std::cmp::Ordering::Equal {
121 return path_cmp;
122 }
123 a.span.line_start.cmp(&b.span.line_start)
124 });
125
126 if let Some(top_k) = options.top_k {
127 all_results.truncate(top_k);
128 }
129
130 Ok(all_results)
131}
132
133fn search_file(regex: &Regex, file_path: &Path, options: &SearchOptions) -> Result<Vec<SearchResult>> {
134 let content = fs::read_to_string(file_path)?;
135 let lines: Vec<&str> = content.lines().collect();
136 let mut results = Vec::new();
137
138 let code_sections = if options.full_section {
140 extract_code_sections(file_path, &content)
141 } else {
142 None
143 };
144
145 for (line_idx, line) in lines.iter().enumerate() {
146 let line_number = line_idx + 1;
147
148 if regex.is_match(line) {
149 let preview = if options.full_section {
150 if let Some(ref sections) = code_sections {
152 if let Some(section) = find_containing_section(sections, line_idx) {
153 section.clone()
154 } else {
155 get_context_preview(&lines, line_idx, options)
157 }
158 } else {
159 get_context_preview(&lines, line_idx, options)
160 }
161 } else {
162 get_context_preview(&lines, line_idx, options)
163 };
164
165 results.push(SearchResult {
166 file: file_path.to_path_buf(),
167 span: Span {
168 byte_start: 0,
169 byte_end: line.len(),
170 line_start: line_number,
171 line_end: line_number,
172 },
173 score: 1.0,
174 preview,
175 lang: detect_language(file_path),
176 symbol: None,
177 });
178 }
179 }
180
181 Ok(results)
182}
183
184async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
185 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
187 if options.path.is_file() {
188 options.path.parent().unwrap_or(&options.path).to_path_buf()
189 } else {
190 options.path.clone()
191 }
192 });
193
194 let index_dir = index_root.join(".ck");
195 if !index_dir.exists() {
196 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
197 }
198
199 let tantivy_index_path = index_dir.join("tantivy_index");
200
201 if !tantivy_index_path.exists() {
202 return build_tantivy_index(options).await;
203 }
204
205 let mut schema_builder = Schema::builder();
206 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
207 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
208 let _schema = schema_builder.build();
209
210 let index = Index::open_in_dir(&tantivy_index_path)
211 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
212
213 let reader = index
214 .reader_builder()
215 .reload_policy(ReloadPolicy::OnCommitWithDelay)
216 .try_into()
217 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
218
219 let searcher = reader.searcher();
220 let query_parser = QueryParser::for_index(&index, vec![content_field]);
221
222 let query = query_parser
223 .parse_query(&options.query)
224 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
225
226 let top_docs = if let Some(top_k) = options.top_k {
227 searcher.search(&query, &TopDocs::with_limit(top_k))?
228 } else {
229 searcher.search(&query, &TopDocs::with_limit(100))?
230 };
231
232 let mut raw_results = Vec::new();
234 for (_score, doc_address) in top_docs {
235 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
236 let path_text = retrieved_doc
237 .get_first(path_field)
238 .map(|field_value| field_value.as_str().unwrap_or(""))
239 .unwrap_or("");
240 let content_text = retrieved_doc
241 .get_first(content_field)
242 .map(|field_value| field_value.as_str().unwrap_or(""))
243 .unwrap_or("");
244
245 let file_path = PathBuf::from(path_text);
246 let preview = if options.full_section {
247 content_text.to_string()
248 } else {
249 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
250 };
251
252 raw_results.push((_score, SearchResult {
253 file: file_path,
254 span: Span {
255 byte_start: 0,
256 byte_end: content_text.len(),
257 line_start: 1,
258 line_end: content_text.lines().count(),
259 },
260 score: _score,
261 preview,
262 lang: detect_language(&PathBuf::from(path_text)),
263 symbol: None,
264 }));
265 }
266
267 let mut results = Vec::new();
269 if !raw_results.is_empty() {
270 let max_score = raw_results.iter().map(|(score, _)| *score).fold(0.0f32, f32::max);
271 if max_score > 0.0 {
272 for (raw_score, mut result) in raw_results {
273 let normalized_score = raw_score / max_score;
274
275 if let Some(threshold) = options.threshold {
277 if normalized_score < threshold {
278 continue;
279 }
280 }
281
282 result.score = normalized_score;
283 results.push(result);
284 }
285 }
286 }
287
288 Ok(results)
289}
290
291async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
292 let index_root = if options.path.is_file() {
294 options.path.parent().unwrap_or(&options.path)
295 } else {
296 &options.path
297 };
298
299 let index_dir = index_root.join(".ck");
300 let tantivy_index_path = index_dir.join("tantivy_index");
301
302 fs::create_dir_all(&tantivy_index_path)?;
303
304 let mut schema_builder = Schema::builder();
305 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
306 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
307 let schema = schema_builder.build();
308
309 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
310 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
311
312 let mut index_writer = index.writer(50_000_000)
313 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
314
315 let files = collect_files(&index_root, true, &options.exclude_patterns)?;
316
317 for file_path in &files {
318 if let Ok(content) = fs::read_to_string(file_path) {
319 let doc = doc!(
320 content_field => content,
321 path_field => file_path.display().to_string()
322 );
323 index_writer.add_document(doc)?;
324 }
325 }
326
327 index_writer.commit()
328 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
329
330 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
332 let mut schema_builder = Schema::builder();
333 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
334 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
335 let _schema = schema_builder.build();
336
337 let index = Index::open_in_dir(&tantivy_index_path)
338 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
339
340 let reader = index
341 .reader_builder()
342 .reload_policy(ReloadPolicy::OnCommitWithDelay)
343 .try_into()
344 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
345
346 let searcher = reader.searcher();
347 let query_parser = QueryParser::for_index(&index, vec![content_field]);
348
349 let query = query_parser
350 .parse_query(&options.query)
351 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
352
353 let top_docs = if let Some(top_k) = options.top_k {
354 searcher.search(&query, &TopDocs::with_limit(top_k))?
355 } else {
356 searcher.search(&query, &TopDocs::with_limit(100))?
357 };
358
359 let mut raw_results = Vec::new();
361 for (_score, doc_address) in top_docs {
362 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
363 let path_text = retrieved_doc
364 .get_first(path_field)
365 .map(|field_value| field_value.as_str().unwrap_or(""))
366 .unwrap_or("");
367 let content_text = retrieved_doc
368 .get_first(content_field)
369 .map(|field_value| field_value.as_str().unwrap_or(""))
370 .unwrap_or("");
371
372 let file_path = PathBuf::from(path_text);
373 let preview = if options.full_section {
374 content_text.to_string()
375 } else {
376 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
377 };
378
379 raw_results.push((_score, SearchResult {
380 file: file_path,
381 span: Span {
382 byte_start: 0,
383 byte_end: content_text.len(),
384 line_start: 1,
385 line_end: content_text.lines().count(),
386 },
387 score: _score,
388 preview,
389 lang: detect_language(&PathBuf::from(path_text)),
390 symbol: None,
391 }));
392 }
393
394 let mut results = Vec::new();
396 if !raw_results.is_empty() {
397 let max_score = raw_results.iter().map(|(score, _)| *score).fold(0.0f32, f32::max);
398 if max_score > 0.0 {
399 for (raw_score, mut result) in raw_results {
400 let normalized_score = raw_score / max_score;
401
402 if let Some(threshold) = options.threshold {
404 if normalized_score < threshold {
405 continue;
406 }
407 }
408
409 result.score = normalized_score;
410 results.push(result);
411 }
412 }
413 }
414
415 Ok(results)
416}
417
418#[allow(dead_code)]
419async fn semantic_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
420 semantic_search_with_progress(options, None).await
421}
422
423async fn semantic_search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
424 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
426 if options.path.is_file() {
427 options.path.parent().unwrap_or(&options.path).to_path_buf()
428 } else {
429 options.path.clone()
430 }
431 });
432
433 let index_dir = index_root.join(".ck");
434 if !index_dir.exists() {
435 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
436 }
437
438 let ann_index_path = index_dir.join("ann_index.bin");
439 let embeddings_path = index_dir.join("embeddings.json");
440
441 if !ann_index_path.exists() || !embeddings_path.exists() {
442 return build_semantic_index_with_progress(options, progress_callback).await;
443 }
444
445 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
447
448 let embeddings_data = fs::read_to_string(&embeddings_path)?;
450 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
451
452 if let Some(ref callback) = progress_callback {
454 callback("Loading embedding model...");
455 }
456
457 let mut embedder = if let Some(ref callback) = progress_callback {
458 let _cb = callback.as_ref();
459 let model_cb = Box::new(|msg: &str| {
460 eprintln!("Model: {}", msg);
463 }) as ck_embed::ModelDownloadCallback;
464 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), Some(model_cb))?
465 } else {
466 ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?
467 };
468 let query_embeddings = embedder.embed(&[options.query.clone()])?;
469
470 if query_embeddings.is_empty() {
471 return Ok(Vec::new());
472 }
473
474 let query_embedding = &query_embeddings[0];
475
476 let top_k = options.top_k.unwrap_or(10);
478 let similar_docs = ann_index.search(query_embedding, top_k);
479
480 let mut results = Vec::new();
481
482 let filter_by_file = options.path.is_file();
484 let target_file = if filter_by_file {
485 Some(options.path.canonicalize().unwrap_or_else(|_| options.path.clone()))
486 } else {
487 None
488 };
489
490 for (doc_id, similarity) in similar_docs {
491 if let Some(threshold) = options.threshold {
493 if similarity < threshold {
494 continue;
495 }
496 }
497
498 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
499 if let Some(target) = &target_file {
501 let canonical_result = file_path.canonicalize().unwrap_or_else(|_| file_path.clone());
502 if canonical_result != *target {
503 continue; }
505 }
506
507 let preview = if options.full_section {
509 content.clone()
510 } else {
511 content.lines().take(3).collect::<Vec<_>>().join("\n")
512 };
513
514 results.push(SearchResult {
515 file: file_path.clone(),
516 span: Span {
517 byte_start: 0,
518 byte_end: content.len(),
519 line_start: 1,
520 line_end: content.lines().count(),
521 },
522 score: similarity,
523 preview,
524 lang: detect_language(file_path),
525 symbol: None,
526 });
527 }
528 }
529
530 Ok(results)
531}
532
533#[allow(dead_code)]
534async fn build_semantic_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
535 build_semantic_index_with_progress(options, None).await
536}
537
538async fn build_semantic_index_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
539 let index_root = if options.path.is_file() {
541 options.path.parent().unwrap_or(&options.path)
542 } else {
543 &options.path
544 };
545
546 let index_dir = index_root.join(".ck");
547 let ann_index_path = index_dir.join("ann_index.bin");
548 let embeddings_path = index_dir.join("embeddings.json");
549
550 fs::create_dir_all(&index_dir)?;
551
552 if let Some(ref callback) = progress_callback {
553 callback("Building semantic index (no index found)...");
554 }
555
556 eprintln!("Building semantic index (no existing index found)...");
558
559 let files = collect_files(&index_root, true, &options.exclude_patterns)?;
561
562 if let Some(ref callback) = progress_callback {
563 callback(&format!("Found {} files to index", files.len()));
564 }
565 eprintln!("Found {} files to embed and index", files.len());
566
567 let mut file_embeddings = Vec::new();
568 let mut embeddings = Vec::new();
569
570 if let Some(ref callback) = progress_callback {
572 callback("Loading embedding model...");
573 }
574
575 let model_callback = if progress_callback.is_some() {
576 Some(Box::new(|msg: &str| {
577 eprintln!("Model: {}", msg);
578 }) as ck_embed::ModelDownloadCallback)
579 } else {
580 None
581 };
582
583 let mut embedder = ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), model_callback)?;
584
585 if let Some(ref callback) = progress_callback {
586 callback("Generating embeddings for code chunks...");
587 }
588
589 for (file_idx, file_path) in files.iter().enumerate() {
590 if let Ok(content) = fs::read_to_string(file_path) {
591 if let Some(ref callback) = progress_callback {
592 let file_name = file_path.file_name()
593 .map(|n| n.to_string_lossy().to_string())
594 .unwrap_or_else(|| file_path.to_string_lossy().to_string());
595 callback(&format!("Processing {}/{}: {}", file_idx + 1, files.len(), file_name));
596 }
597
598 let chunks = ck_chunk::chunk_text(&content, detect_language(file_path).as_deref())?;
600
601 for chunk in chunks {
602 let chunk_embeddings = embedder.embed(&[chunk.text.clone()])?;
603 if !chunk_embeddings.is_empty() {
604 embeddings.push(chunk_embeddings[0].clone());
605 file_embeddings.push((file_path.clone(), chunk.text));
606 }
607 }
608 }
609 }
610
611 if let Some(ref callback) = progress_callback {
612 callback(&format!("Built {} embeddings, creating search index...", embeddings.len()));
613 }
614 eprintln!("Generated {} embeddings, building search index...", embeddings.len());
615
616 let index = ck_ann::SimpleIndex::build(&embeddings)?;
618 index.save(&ann_index_path)?;
619
620 let embeddings_json = serde_json::to_string(&file_embeddings)?;
622 fs::write(&embeddings_path, embeddings_json)?;
623
624 if let Some(ref callback) = progress_callback {
625 callback("Semantic index built successfully, running search...");
626 }
627 eprintln!("Semantic index built successfully!");
628
629 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
631
632 let embeddings_data = fs::read_to_string(&embeddings_path)?;
634 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
635
636 let mut embedder = ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?;
638 let query_embeddings = embedder.embed(&[options.query.clone()])?;
639
640 if query_embeddings.is_empty() {
641 return Ok(Vec::new());
642 }
643
644 let query_embedding = &query_embeddings[0];
645
646 let top_k = options.top_k.unwrap_or(10);
648 let similar_docs = ann_index.search(query_embedding, top_k);
649
650 let mut results = Vec::new();
651
652 let filter_by_file = options.path.is_file();
654 let target_file = if filter_by_file {
655 Some(options.path.canonicalize().unwrap_or_else(|_| options.path.clone()))
656 } else {
657 None
658 };
659
660 for (doc_id, similarity) in similar_docs {
661 if let Some(threshold) = options.threshold {
663 if similarity < threshold {
664 continue;
665 }
666 }
667
668 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
669 if let Some(target) = &target_file {
671 let canonical_result = file_path.canonicalize().unwrap_or_else(|_| file_path.clone());
672 if canonical_result != *target {
673 continue; }
675 }
676
677 let preview = if options.full_section {
679 content.clone()
680 } else {
681 content.lines().take(3).collect::<Vec<_>>().join("\n")
682 };
683
684 results.push(SearchResult {
685 file: file_path.clone(),
686 span: Span {
687 byte_start: 0,
688 byte_end: content.len(),
689 line_start: 1,
690 line_end: content.lines().count(),
691 },
692 score: similarity,
693 preview,
694 lang: detect_language(file_path),
695 symbol: None,
696 });
697 }
698 }
699
700 Ok(results)
701}
702
703#[allow(dead_code)]
704async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
705 hybrid_search_with_progress(options, None).await
706}
707
708async fn hybrid_search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
709 if let Some(ref callback) = progress_callback {
710 callback("Running regex search...");
711 }
712 let regex_results = regex_search(options)?;
713
714 if let Some(ref callback) = progress_callback {
715 callback("Running semantic search...");
716 }
717 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
718
719 let mut combined = HashMap::new();
720
721 for (rank, result) in regex_results.iter().enumerate() {
722 let key = format!("{}:{}", result.file.display(), result.span.line_start);
723 combined.entry(key).or_insert(Vec::new()).push((rank + 1, result.clone()));
724 }
725
726 for (rank, result) in semantic_results.iter().enumerate() {
727 let key = format!("{}:{}", result.file.display(), result.span.line_start);
728 combined.entry(key).or_insert(Vec::new()).push((rank + 1, result.clone()));
729 }
730
731 let mut rrf_results: Vec<SearchResult> = combined
733 .into_iter()
734 .map(|(_, ranks)| {
735 let mut result = ranks[0].1.clone();
736 let rrf_score = ranks.iter().map(|(rank, _)| 1.0 / (60.0 + *rank as f32)).sum();
737 result.score = rrf_score;
738 result
739 })
740 .filter(|result| {
741 if let Some(threshold) = options.threshold {
743 result.score >= threshold
744 } else {
745 true
746 }
747 })
748 .collect();
749
750 rrf_results.sort_by(|a, b| {
752 b.score
753 .partial_cmp(&a.score)
754 .unwrap_or(std::cmp::Ordering::Equal)
755 });
756
757 if let Some(top_k) = options.top_k {
758 rrf_results.truncate(top_k);
759 }
760
761 Ok(rrf_results)
762}
763
764fn build_globset(patterns: &[String]) -> GlobSet {
765 let mut builder = GlobSetBuilder::new();
766 for pat in patterns {
767 if let Ok(glob) = Glob::new(pat) {
769 builder.add(glob);
770 }
771 }
772 builder.build().unwrap_or_else(|_| GlobSet::empty())
773}
774
775fn should_exclude_path(path: &Path, exclude_patterns: &[String]) -> bool {
776 let globset = build_globset(exclude_patterns);
777 if globset.is_match(path) {
779 return true;
780 }
781 for component in path.components() {
782 if let std::path::Component::Normal(name) = component {
783 if globset.is_match(name) {
784 return true;
785 }
786 }
787 }
788 false
789}
790
791fn collect_files(path: &Path, recursive: bool, exclude_patterns: &[String]) -> Result<Vec<PathBuf>> {
792 let mut files = Vec::new();
793 let globset = build_globset(exclude_patterns);
794
795 if path.is_file() {
796 files.push(path.to_path_buf());
798 } else if recursive {
799 for entry in WalkDir::new(path)
800 .into_iter()
801 .filter_entry(|e| {
802 let name = e.file_name();
804 !globset.is_match(e.path()) && !globset.is_match(name)
805 }) {
806 match entry {
807 Ok(entry) => {
808 if entry.file_type().is_file() && !should_exclude_path(entry.path(), exclude_patterns) {
809 files.push(entry.path().to_path_buf());
810 }
811 }
812 Err(e) => {
813 tracing::debug!("Skipping path due to error: {}", e);
815 continue;
816 }
817 }
818 }
819 } else {
820 match fs::read_dir(path) {
821 Ok(read_dir) => {
822 for entry in read_dir {
823 match entry {
824 Ok(entry) => {
825 let path = entry.path();
826 if path.is_file() && !should_exclude_path(&path, exclude_patterns) {
827 files.push(path);
828 }
829 }
830 Err(e) => {
831 tracing::debug!("Skipping directory entry due to error: {}", e);
832 continue;
833 }
834 }
835 }
836 }
837 Err(e) => {
838 tracing::debug!("Cannot read directory {:?}: {}", path, e);
839 return Err(e.into());
840 }
841 }
842 }
843
844 Ok(files)
845}
846
847fn detect_language(path: &Path) -> Option<String> {
848 path.extension()
849 .and_then(|ext| ext.to_str())
850 .map(|ext| match ext {
851 "rs" => "rust",
852 "py" => "python",
853 "js" => "javascript",
854 "ts" => "typescript",
855 "go" => "go",
856 "java" => "java",
857 "c" => "c",
858 "cpp" | "cc" | "cxx" => "cpp",
859 "h" | "hpp" => "cpp",
860 "cs" => "csharp",
861 "rb" => "ruby",
862 "php" => "php",
863 "swift" => "swift",
864 "kt" => "kotlin",
865 _ => ext,
866 })
867 .map(String::from)
868}
869
870async fn ensure_index_updated(path: &Path, force_reindex: bool, need_embeddings: bool) -> Result<()> {
871
872 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
874 if path.is_file() {
875 path.parent().unwrap_or(path).to_path_buf()
876 } else {
877 path.to_path_buf()
878 }
879 });
880 let index_root = &index_root_buf;
881
882 if force_reindex {
884 let stats = ck_index::smart_update_index_with_progress(index_root, false, None, need_embeddings).await?;
885 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
886 tracing::info!("Index updated: {} files indexed, {} orphaned files removed",
887 stats.files_indexed, stats.orphaned_files_removed);
888 }
889 return Ok(());
890 }
891
892 let stats = ck_index::smart_update_index_with_progress(index_root, false, None, need_embeddings).await?;
894 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
895 tracing::info!("Index updated: {} files indexed, {} orphaned files removed",
896 stats.files_indexed, stats.orphaned_files_removed);
897 }
898
899 Ok(())
900}
901
902fn get_context_preview(lines: &[&str], line_idx: usize, options: &SearchOptions) -> String {
903 let before = options.before_context_lines.max(options.context_lines);
904 let after = options.after_context_lines.max(options.context_lines);
905
906 if before > 0 || after > 0 {
907 let start_idx = line_idx.saturating_sub(before);
908 let end_idx = (line_idx + after + 1).min(lines.len());
909 lines[start_idx..end_idx].join("\n")
910 } else {
911 lines[line_idx].to_string()
912 }
913}
914
915fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
916 let lang = match file_path.extension().and_then(|s| s.to_str()) {
918 Some("py") => Some("python"),
919 Some("js") => Some("javascript"),
920 Some("ts") | Some("tsx") => Some("typescript"),
921 _ => return None,
922 };
923
924 if let Ok(chunks) = ck_chunk::chunk_text(content, lang) {
926 let sections: Vec<(usize, usize, String)> = chunks
927 .into_iter()
928 .filter(|chunk| matches!(
929 chunk.chunk_type,
930 ck_chunk::ChunkType::Function |
931 ck_chunk::ChunkType::Class |
932 ck_chunk::ChunkType::Method
933 ))
934 .map(|chunk| {
935 (
936 chunk.span.line_start - 1, chunk.span.line_end - 1,
938 chunk.text,
939 )
940 })
941 .collect();
942
943 if sections.is_empty() {
944 None
945 } else {
946 Some(sections)
947 }
948 } else {
949 None
950 }
951}
952
953fn find_containing_section(sections: &[(usize, usize, String)], line_idx: usize) -> Option<&String> {
954 for (start, end, text) in sections {
955 if line_idx >= *start && line_idx <= *end {
956 return Some(text);
957 }
958 }
959 None
960}
961
962#[cfg(test)]
963mod tests {
964 use super::*;
965 use std::fs;
966 use tempfile::TempDir;
967
968 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
969 let files = vec![
970 ("test1.txt", "hello world rust programming"),
971 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
972 ("test3.py", "print('Hello Python')"),
973 ("test4.txt", "machine learning artificial intelligence"),
974 ];
975
976 let mut paths = Vec::new();
977 for (name, content) in files {
978 let path = dir.join(name);
979 fs::write(&path, content).unwrap();
980 paths.push(path);
981 }
982 paths
983 }
984
985 #[test]
986 fn test_detect_language() {
987 assert_eq!(detect_language(&PathBuf::from("test.rs")), Some("rust".to_string()));
988 assert_eq!(detect_language(&PathBuf::from("test.py")), Some("python".to_string()));
989 assert_eq!(detect_language(&PathBuf::from("test.js")), Some("javascript".to_string()));
990 assert_eq!(detect_language(&PathBuf::from("test.unknown")), Some("unknown".to_string()));
991 assert_eq!(detect_language(&PathBuf::from("noext")), None);
992 }
993
994 #[test]
995 fn test_collect_files() {
996 let temp_dir = TempDir::new().unwrap();
997 let test_files = create_test_files(temp_dir.path());
998
999 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1001 assert_eq!(files.len(), 4);
1002
1003 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1005 assert_eq!(files.len(), 4);
1006
1007 let files = collect_files(&test_files[0], false, &[]).unwrap();
1009 assert_eq!(files.len(), 1);
1010 assert_eq!(files[0], test_files[0]);
1011 }
1012
1013 #[test]
1014 fn test_regex_search() {
1015 let temp_dir = TempDir::new().unwrap();
1016 create_test_files(temp_dir.path());
1017
1018 let options = SearchOptions {
1019 mode: SearchMode::Regex,
1020 query: "rust".to_string(),
1021 path: temp_dir.path().to_path_buf(),
1022 recursive: true,
1023 ..Default::default()
1024 };
1025
1026 let results = regex_search(&options).unwrap();
1027 assert!(!results.is_empty());
1028
1029 let rust_matches: Vec<_> = results.iter()
1031 .filter(|r| r.preview.to_lowercase().contains("rust"))
1032 .collect();
1033 assert!(!rust_matches.is_empty());
1034 }
1035
1036 #[test]
1037 fn test_regex_search_case_insensitive() {
1038 let temp_dir = TempDir::new().unwrap();
1039 create_test_files(temp_dir.path());
1040
1041 let options = SearchOptions {
1042 mode: SearchMode::Regex,
1043 query: "HELLO".to_string(),
1044 path: temp_dir.path().to_path_buf(),
1045 recursive: true,
1046 case_insensitive: true,
1047 ..Default::default()
1048 };
1049
1050 let results = regex_search(&options).unwrap();
1051 assert!(!results.is_empty());
1052 }
1053
1054 #[test]
1055 fn test_regex_search_fixed_string() {
1056 let temp_dir = TempDir::new().unwrap();
1057 create_test_files(temp_dir.path());
1058
1059 let options = SearchOptions {
1060 mode: SearchMode::Regex,
1061 query: "fn main()".to_string(),
1062 path: temp_dir.path().to_path_buf(),
1063 recursive: true,
1064 fixed_string: true,
1065 ..Default::default()
1066 };
1067
1068 let results = regex_search(&options).unwrap();
1069 assert!(!results.is_empty());
1070 }
1071
1072 #[test]
1073 fn test_regex_search_whole_word() {
1074 let temp_dir = TempDir::new().unwrap();
1075 fs::write(temp_dir.path().join("word_test.txt"), "rust rusty rustacean").unwrap();
1076
1077 let options = SearchOptions {
1078 mode: SearchMode::Regex,
1079 query: "rust".to_string(),
1080 path: temp_dir.path().to_path_buf(),
1081 recursive: true,
1082 whole_word: true,
1083 ..Default::default()
1084 };
1085
1086 let results = regex_search(&options).unwrap();
1087 assert!(!results.is_empty());
1088 }
1090
1091 #[test]
1092 fn test_regex_search_top_k() {
1093 let temp_dir = TempDir::new().unwrap();
1094
1095 for i in 0..10 {
1097 fs::write(temp_dir.path().join(format!("file{}.txt", i)), "test content").unwrap();
1098 }
1099
1100 let options = SearchOptions {
1101 mode: SearchMode::Regex,
1102 query: "test".to_string(),
1103 path: temp_dir.path().to_path_buf(),
1104 recursive: true,
1105 top_k: Some(5),
1106 ..Default::default()
1107 };
1108
1109 let results = regex_search(&options).unwrap();
1110 assert!(results.len() <= 5);
1111 }
1112
1113 #[test]
1114 fn test_search_file() {
1115 let temp_dir = TempDir::new().unwrap();
1116 let file_path = temp_dir.path().join("test.txt");
1117 fs::write(&file_path, "line 1: hello\nline 2: world\nline 3: rust programming").unwrap();
1118
1119 let regex = regex::Regex::new("rust").unwrap();
1120 let options = SearchOptions::default();
1121
1122 let results = search_file(®ex, &file_path, &options).unwrap();
1123 assert_eq!(results.len(), 1);
1124 assert_eq!(results[0].span.line_start, 3);
1125 assert!(results[0].preview.contains("rust"));
1126 }
1127
1128 #[test]
1129 fn test_search_file_with_context() {
1130 let temp_dir = TempDir::new().unwrap();
1131 let file_path = temp_dir.path().join("test.txt");
1132 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1133
1134 let regex = regex::Regex::new("target").unwrap();
1135 let options = SearchOptions {
1136 context_lines: 1,
1137 ..Default::default()
1138 };
1139
1140 let results = search_file(®ex, &file_path, &options).unwrap();
1141 assert_eq!(results.len(), 1);
1142
1143 println!("Preview: '{}'", results[0].preview);
1144
1145 assert!(results[0].preview.contains("line 2"));
1148 assert!(results[0].preview.contains("target line"));
1149 assert!(results[0].preview.contains("line 4"));
1150 }
1151
1152 #[tokio::test]
1153 async fn test_search_main_function() {
1154 let temp_dir = TempDir::new().unwrap();
1155 create_test_files(temp_dir.path());
1156
1157 let options = SearchOptions {
1158 mode: SearchMode::Regex,
1159 query: "hello".to_string(),
1160 path: temp_dir.path().to_path_buf(),
1161 recursive: true,
1162 case_insensitive: true,
1163 ..Default::default()
1164 };
1165
1166 let results = search(&options).await.unwrap();
1167 assert!(!results.is_empty());
1168 }
1169}