1use anyhow::Result;
2use ck_core::{CkError, SearchMode, SearchOptions, SearchResult, Span};
3use globset::{Glob, GlobSet, GlobSetBuilder};
4use regex::{Regex, RegexBuilder};
5use std::collections::HashMap;
6use std::fs;
7use std::path::{Path, PathBuf};
8use walkdir::WalkDir;
9use rayon::prelude::*;
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{Schema, STORED, TEXT, Value};
13use tantivy::{doc, Index, ReloadPolicy, TantivyDocument};
14use ck_ann::AnnIndex;
15use std::path::PathBuf as StdPathBuf;
16
17mod semantic_v3;
18pub use semantic_v3::{semantic_search_v3, semantic_search_v3_with_progress};
19
20pub type SearchProgressCallback = Box<dyn Fn(&str) + Send + Sync>;
21
22fn extract_content_from_span(file_path: &Path, span: &ck_core::Span) -> Result<String> {
24 let content = fs::read_to_string(file_path)?;
25 let lines: Vec<&str> = content.lines().collect();
26
27 if span.line_start == 0 || span.line_start > lines.len() {
28 return Ok(String::new());
29 }
30
31 let start_idx = span.line_start - 1; let end_idx = (span.line_end - 1).min(lines.len().saturating_sub(1));
33
34 if start_idx <= end_idx {
35 Ok(lines[start_idx..=end_idx].join("\n"))
36 } else {
37 Ok(lines[start_idx].to_string())
38 }
39}
40
41fn find_nearest_index_root(path: &Path) -> Option<StdPathBuf> {
42 let mut current = if path.is_file() { path.parent().unwrap_or(path) } else { path };
43 loop {
44 if current.join(".ck").exists() {
45 return Some(current.to_path_buf());
46 }
47 match current.parent() {
48 Some(parent) => current = parent,
49 None => return None,
50 }
51 }
52}
53
54pub async fn search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
55 search_with_progress(options, None).await
56}
57
58pub async fn search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
59 if !options.path.exists() {
61 return Err(ck_core::CkError::Search(format!("Path does not exist: {}", options.path.display())).into());
62 }
63
64 if !matches!(options.mode, SearchMode::Regex) {
66 let need_embeddings = matches!(options.mode, SearchMode::Semantic | SearchMode::Hybrid);
67 ensure_index_updated(&options.path, options.reindex, need_embeddings).await?;
68 }
69
70 match options.mode {
71 SearchMode::Regex => regex_search(options),
72 SearchMode::Lexical => lexical_search(options).await,
73 SearchMode::Semantic => {
74 semantic_search_v3_with_progress(options, progress_callback).await
76 },
77 SearchMode::Hybrid => hybrid_search_with_progress(options, progress_callback).await,
78 }
79}
80
81fn regex_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
82 let pattern = if options.fixed_string {
83 regex::escape(&options.query)
84 } else if options.whole_word {
85 format!(r"\b{}\b", regex::escape(&options.query))
86 } else {
87 options.query.clone()
88 };
89
90 let regex = RegexBuilder::new(&pattern)
91 .case_insensitive(options.case_insensitive)
92 .build()
93 .map_err(|e| CkError::Regex(e))?;
94
95 let should_recurse = options.path.is_dir() || options.recursive;
97 let files = collect_files(&options.path, should_recurse, &options.exclude_patterns)?;
98
99 let results: Vec<Vec<SearchResult>> = files
100 .par_iter()
101 .filter_map(|file_path| {
102 match search_file(®ex, file_path, options) {
103 Ok(matches) => {
104 if matches.is_empty() {
105 None
106 } else {
107 Some(matches)
108 }
109 }
110 Err(e) => {
111 tracing::debug!("Error searching {:?}: {}", file_path, e);
112 None
113 }
114 }
115 })
116 .collect();
117
118 let mut all_results: Vec<SearchResult> = results.into_iter().flatten().collect();
119 all_results.sort_by(|a, b| {
121 let path_cmp = a.file.cmp(&b.file);
122 if path_cmp != std::cmp::Ordering::Equal {
123 return path_cmp;
124 }
125 a.span.line_start.cmp(&b.span.line_start)
126 });
127
128 if let Some(top_k) = options.top_k {
129 all_results.truncate(top_k);
130 }
131
132 Ok(all_results)
133}
134
135fn search_file(regex: &Regex, file_path: &Path, options: &SearchOptions) -> Result<Vec<SearchResult>> {
136 let content = fs::read_to_string(file_path)?;
137 let lines: Vec<&str> = content.lines().collect();
138 let mut results = Vec::new();
139
140 let code_sections = if options.full_section {
142 extract_code_sections(file_path, &content)
143 } else {
144 None
145 };
146
147 let mut byte_offset = 0;
149
150 for (line_idx, line) in lines.iter().enumerate() {
151 let line_number = line_idx + 1;
152
153 for mat in regex.find_iter(line) {
155 let preview = if options.full_section {
156 if let Some(ref sections) = code_sections {
158 if let Some(section) = find_containing_section(sections, line_idx) {
159 section.clone()
160 } else {
161 get_context_preview(&lines, line_idx, options)
163 }
164 } else {
165 get_context_preview(&lines, line_idx, options)
166 }
167 } else {
168 get_context_preview(&lines, line_idx, options)
169 };
170
171 results.push(SearchResult {
172 file: file_path.to_path_buf(),
173 span: Span {
174 byte_start: byte_offset + mat.start(),
175 byte_end: byte_offset + mat.end(),
176 line_start: line_number,
177 line_end: line_number,
178 },
179 score: 1.0,
180 preview,
181 lang: detect_language(file_path),
182 symbol: None,
183 });
184 }
185
186 byte_offset += line.len();
188 if line_idx < lines.len() - 1 {
189 byte_offset += 1; }
191 }
192
193 Ok(results)
194}
195
196async fn lexical_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
197 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
199 if options.path.is_file() {
200 options.path.parent().unwrap_or(&options.path).to_path_buf()
201 } else {
202 options.path.clone()
203 }
204 });
205
206 let index_dir = index_root.join(".ck");
207 if !index_dir.exists() {
208 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
209 }
210
211 let tantivy_index_path = index_dir.join("tantivy_index");
212
213 if !tantivy_index_path.exists() {
214 return build_tantivy_index(options).await;
215 }
216
217 let mut schema_builder = Schema::builder();
218 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
219 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
220 let _schema = schema_builder.build();
221
222 let index = Index::open_in_dir(&tantivy_index_path)
223 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
224
225 let reader = index
226 .reader_builder()
227 .reload_policy(ReloadPolicy::OnCommitWithDelay)
228 .try_into()
229 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
230
231 let searcher = reader.searcher();
232 let query_parser = QueryParser::for_index(&index, vec![content_field]);
233
234 let query = query_parser
235 .parse_query(&options.query)
236 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
237
238 let top_docs = if let Some(top_k) = options.top_k {
239 searcher.search(&query, &TopDocs::with_limit(top_k))?
240 } else {
241 searcher.search(&query, &TopDocs::with_limit(100))?
242 };
243
244 let mut raw_results = Vec::new();
246 for (_score, doc_address) in top_docs {
247 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
248 let path_text = retrieved_doc
249 .get_first(path_field)
250 .map(|field_value| field_value.as_str().unwrap_or(""))
251 .unwrap_or("");
252 let content_text = retrieved_doc
253 .get_first(content_field)
254 .map(|field_value| field_value.as_str().unwrap_or(""))
255 .unwrap_or("");
256
257 let file_path = PathBuf::from(path_text);
258 let preview = if options.full_section {
259 content_text.to_string()
260 } else {
261 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
262 };
263
264 raw_results.push((_score, SearchResult {
265 file: file_path,
266 span: Span {
267 byte_start: 0,
268 byte_end: content_text.len(),
269 line_start: 1,
270 line_end: content_text.lines().count(),
271 },
272 score: _score,
273 preview,
274 lang: detect_language(&PathBuf::from(path_text)),
275 symbol: None,
276 }));
277 }
278
279 let mut results = Vec::new();
281 if !raw_results.is_empty() {
282 let max_score = raw_results.iter().map(|(score, _)| *score).fold(0.0f32, f32::max);
283 if max_score > 0.0 {
284 for (raw_score, mut result) in raw_results {
285 let normalized_score = raw_score / max_score;
286
287 if let Some(threshold) = options.threshold {
289 if normalized_score < threshold {
290 continue;
291 }
292 }
293
294 result.score = normalized_score;
295 results.push(result);
296 }
297 }
298 }
299
300 Ok(results)
301}
302
303async fn build_tantivy_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
304 let index_root = if options.path.is_file() {
306 options.path.parent().unwrap_or(&options.path)
307 } else {
308 &options.path
309 };
310
311 let index_dir = index_root.join(".ck");
312 let tantivy_index_path = index_dir.join("tantivy_index");
313
314 fs::create_dir_all(&tantivy_index_path)?;
315
316 let mut schema_builder = Schema::builder();
317 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
318 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
319 let schema = schema_builder.build();
320
321 let index = Index::create_in_dir(&tantivy_index_path, schema.clone())
322 .map_err(|e| CkError::Index(format!("Failed to create tantivy index: {}", e)))?;
323
324 let mut index_writer = index.writer(50_000_000)
325 .map_err(|e| CkError::Index(format!("Failed to create index writer: {}", e)))?;
326
327 let files = collect_files(&index_root, true, &options.exclude_patterns)?;
328
329 for file_path in &files {
330 if let Ok(content) = fs::read_to_string(file_path) {
331 let doc = doc!(
332 content_field => content,
333 path_field => file_path.display().to_string()
334 );
335 index_writer.add_document(doc)?;
336 }
337 }
338
339 index_writer.commit()
340 .map_err(|e| CkError::Index(format!("Failed to commit index: {}", e)))?;
341
342 let tantivy_index_path = index_root.join(".ck").join("tantivy_index");
344 let mut schema_builder = Schema::builder();
345 let content_field = schema_builder.add_text_field("content", TEXT | STORED);
346 let path_field = schema_builder.add_text_field("path", TEXT | STORED);
347 let _schema = schema_builder.build();
348
349 let index = Index::open_in_dir(&tantivy_index_path)
350 .map_err(|e| CkError::Index(format!("Failed to open tantivy index: {}", e)))?;
351
352 let reader = index
353 .reader_builder()
354 .reload_policy(ReloadPolicy::OnCommitWithDelay)
355 .try_into()
356 .map_err(|e| CkError::Index(format!("Failed to create index reader: {}", e)))?;
357
358 let searcher = reader.searcher();
359 let query_parser = QueryParser::for_index(&index, vec![content_field]);
360
361 let query = query_parser
362 .parse_query(&options.query)
363 .map_err(|e| CkError::Search(format!("Failed to parse query: {}", e)))?;
364
365 let top_docs = if let Some(top_k) = options.top_k {
366 searcher.search(&query, &TopDocs::with_limit(top_k))?
367 } else {
368 searcher.search(&query, &TopDocs::with_limit(100))?
369 };
370
371 let mut raw_results = Vec::new();
373 for (_score, doc_address) in top_docs {
374 let retrieved_doc: TantivyDocument = searcher.doc(doc_address)?;
375 let path_text = retrieved_doc
376 .get_first(path_field)
377 .map(|field_value| field_value.as_str().unwrap_or(""))
378 .unwrap_or("");
379 let content_text = retrieved_doc
380 .get_first(content_field)
381 .map(|field_value| field_value.as_str().unwrap_or(""))
382 .unwrap_or("");
383
384 let file_path = PathBuf::from(path_text);
385 let preview = if options.full_section {
386 content_text.to_string()
387 } else {
388 content_text.lines().take(3).collect::<Vec<_>>().join("\n")
389 };
390
391 raw_results.push((_score, SearchResult {
392 file: file_path,
393 span: Span {
394 byte_start: 0,
395 byte_end: content_text.len(),
396 line_start: 1,
397 line_end: content_text.lines().count(),
398 },
399 score: _score,
400 preview,
401 lang: detect_language(&PathBuf::from(path_text)),
402 symbol: None,
403 }));
404 }
405
406 let mut results = Vec::new();
408 if !raw_results.is_empty() {
409 let max_score = raw_results.iter().map(|(score, _)| *score).fold(0.0f32, f32::max);
410 if max_score > 0.0 {
411 for (raw_score, mut result) in raw_results {
412 let normalized_score = raw_score / max_score;
413
414 if let Some(threshold) = options.threshold {
416 if normalized_score < threshold {
417 continue;
418 }
419 }
420
421 result.score = normalized_score;
422 results.push(result);
423 }
424 }
425 }
426
427 Ok(results)
428}
429
430#[allow(dead_code)]
431async fn semantic_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
432 semantic_search_with_progress(options, None).await
433}
434
435async fn semantic_search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
436 let index_root = find_nearest_index_root(&options.path).unwrap_or_else(|| {
438 if options.path.is_file() {
439 options.path.parent().unwrap_or(&options.path).to_path_buf()
440 } else {
441 options.path.clone()
442 }
443 });
444
445 let index_dir = index_root.join(".ck");
446 if !index_dir.exists() {
447 return Err(CkError::Index("No index found. Run 'ck index' first.".to_string()).into());
448 }
449
450 let ann_index_path = index_dir.join("ann_index.bin");
451 let embeddings_path = index_dir.join("embeddings.json");
452
453 if !ann_index_path.exists() || !embeddings_path.exists() {
454 return build_semantic_index_with_progress(options, progress_callback).await;
455 }
456
457 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
459
460 let embeddings_data = fs::read_to_string(&embeddings_path)?;
462 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
463
464 if let Some(ref callback) = progress_callback {
466 callback("Loading embedding model...");
467 }
468
469 let mut embedder = if let Some(ref callback) = progress_callback {
470 let _cb = callback.as_ref();
471 let model_cb = Box::new(|msg: &str| {
472 eprintln!("Model: {}", msg);
475 }) as ck_embed::ModelDownloadCallback;
476 ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), Some(model_cb))?
477 } else {
478 ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?
479 };
480 let query_embeddings = embedder.embed(&[options.query.clone()])?;
481
482 if query_embeddings.is_empty() {
483 return Ok(Vec::new());
484 }
485
486 let query_embedding = &query_embeddings[0];
487
488 let top_k = options.top_k.unwrap_or(10);
490 let similar_docs = ann_index.search(query_embedding, top_k);
491
492 let mut results = Vec::new();
493
494 let filter_by_file = options.path.is_file();
496 let target_file = if filter_by_file {
497 Some(options.path.canonicalize().unwrap_or_else(|_| options.path.clone()))
498 } else {
499 None
500 };
501
502 for (doc_id, similarity) in similar_docs {
503 if let Some(threshold) = options.threshold {
505 if similarity < threshold {
506 continue;
507 }
508 }
509
510 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
511 if let Some(target) = &target_file {
513 let canonical_result = file_path.canonicalize().unwrap_or_else(|_| file_path.clone());
514 if canonical_result != *target {
515 continue; }
517 }
518
519 let preview = if options.full_section {
521 content.clone()
522 } else {
523 content.lines().take(3).collect::<Vec<_>>().join("\n")
524 };
525
526 results.push(SearchResult {
527 file: file_path.clone(),
528 span: Span {
529 byte_start: 0,
530 byte_end: content.len(),
531 line_start: 1,
532 line_end: content.lines().count(),
533 },
534 score: similarity,
535 preview,
536 lang: detect_language(file_path),
537 symbol: None,
538 });
539 }
540 }
541
542 Ok(results)
543}
544
545#[allow(dead_code)]
546async fn build_semantic_index(options: &SearchOptions) -> Result<Vec<SearchResult>> {
547 build_semantic_index_with_progress(options, None).await
548}
549
550async fn build_semantic_index_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
551 let index_root = if options.path.is_file() {
553 options.path.parent().unwrap_or(&options.path)
554 } else {
555 &options.path
556 };
557
558 let index_dir = index_root.join(".ck");
559 let ann_index_path = index_dir.join("ann_index.bin");
560 let embeddings_path = index_dir.join("embeddings.json");
561
562 fs::create_dir_all(&index_dir)?;
563
564 if let Some(ref callback) = progress_callback {
565 callback("Building semantic index (no index found)...");
566 }
567
568 eprintln!("Building semantic index (no existing index found)...");
570
571 let files = collect_files(&index_root, true, &options.exclude_patterns)?;
573
574 if let Some(ref callback) = progress_callback {
575 callback(&format!("Found {} files to index", files.len()));
576 }
577 eprintln!("Found {} files to embed and index", files.len());
578
579 let mut file_embeddings = Vec::new();
580 let mut embeddings = Vec::new();
581
582 if let Some(ref callback) = progress_callback {
584 callback("Loading embedding model...");
585 }
586
587 let model_callback = if progress_callback.is_some() {
588 Some(Box::new(|msg: &str| {
589 eprintln!("Model: {}", msg);
590 }) as ck_embed::ModelDownloadCallback)
591 } else {
592 None
593 };
594
595 let mut embedder = ck_embed::create_embedder_with_progress(Some("BAAI/bge-small-en-v1.5"), model_callback)?;
596
597 if let Some(ref callback) = progress_callback {
598 callback("Generating embeddings for code chunks...");
599 }
600
601 for (file_idx, file_path) in files.iter().enumerate() {
602 if let Ok(content) = fs::read_to_string(file_path) {
603 if let Some(ref callback) = progress_callback {
604 let file_name = file_path.file_name()
605 .map(|n| n.to_string_lossy().to_string())
606 .unwrap_or_else(|| file_path.to_string_lossy().to_string());
607 callback(&format!("Processing {}/{}: {}", file_idx + 1, files.len(), file_name));
608 }
609
610 let chunks = ck_chunk::chunk_text(&content, detect_language(file_path).as_deref())?;
612
613 for chunk in chunks {
614 let chunk_embeddings = embedder.embed(&[chunk.text.clone()])?;
615 if !chunk_embeddings.is_empty() {
616 embeddings.push(chunk_embeddings[0].clone());
617 file_embeddings.push((file_path.clone(), chunk.text));
618 }
619 }
620 }
621 }
622
623 if let Some(ref callback) = progress_callback {
624 callback(&format!("Built {} embeddings, creating search index...", embeddings.len()));
625 }
626 eprintln!("Generated {} embeddings, building search index...", embeddings.len());
627
628 let index = ck_ann::SimpleIndex::build(&embeddings)?;
630 index.save(&ann_index_path)?;
631
632 let embeddings_json = serde_json::to_string(&file_embeddings)?;
634 fs::write(&embeddings_path, embeddings_json)?;
635
636 if let Some(ref callback) = progress_callback {
637 callback("Semantic index built successfully, running search...");
638 }
639 eprintln!("Semantic index built successfully!");
640
641 let ann_index = ck_ann::SimpleIndex::load(&ann_index_path)?;
643
644 let embeddings_data = fs::read_to_string(&embeddings_path)?;
646 let file_embeddings: Vec<(PathBuf, String)> = serde_json::from_str(&embeddings_data)?;
647
648 let mut embedder = ck_embed::create_embedder(Some("BAAI/bge-small-en-v1.5"))?;
650 let query_embeddings = embedder.embed(&[options.query.clone()])?;
651
652 if query_embeddings.is_empty() {
653 return Ok(Vec::new());
654 }
655
656 let query_embedding = &query_embeddings[0];
657
658 let top_k = options.top_k.unwrap_or(10);
660 let similar_docs = ann_index.search(query_embedding, top_k);
661
662 let mut results = Vec::new();
663
664 let filter_by_file = options.path.is_file();
666 let target_file = if filter_by_file {
667 Some(options.path.canonicalize().unwrap_or_else(|_| options.path.clone()))
668 } else {
669 None
670 };
671
672 for (doc_id, similarity) in similar_docs {
673 if let Some(threshold) = options.threshold {
675 if similarity < threshold {
676 continue;
677 }
678 }
679
680 if let Some((file_path, content)) = file_embeddings.get(doc_id as usize) {
681 if let Some(target) = &target_file {
683 let canonical_result = file_path.canonicalize().unwrap_or_else(|_| file_path.clone());
684 if canonical_result != *target {
685 continue; }
687 }
688
689 let preview = if options.full_section {
691 content.clone()
692 } else {
693 content.lines().take(3).collect::<Vec<_>>().join("\n")
694 };
695
696 results.push(SearchResult {
697 file: file_path.clone(),
698 span: Span {
699 byte_start: 0,
700 byte_end: content.len(),
701 line_start: 1,
702 line_end: content.lines().count(),
703 },
704 score: similarity,
705 preview,
706 lang: detect_language(file_path),
707 symbol: None,
708 });
709 }
710 }
711
712 Ok(results)
713}
714
715#[allow(dead_code)]
716async fn hybrid_search(options: &SearchOptions) -> Result<Vec<SearchResult>> {
717 hybrid_search_with_progress(options, None).await
718}
719
720async fn hybrid_search_with_progress(options: &SearchOptions, progress_callback: Option<SearchProgressCallback>) -> Result<Vec<SearchResult>> {
721 if let Some(ref callback) = progress_callback {
722 callback("Running regex search...");
723 }
724 let regex_results = regex_search(options)?;
725
726 if let Some(ref callback) = progress_callback {
727 callback("Running semantic search...");
728 }
729 let semantic_results = semantic_search_v3_with_progress(options, progress_callback).await?;
730
731 let mut combined = HashMap::new();
732
733 for (rank, result) in regex_results.iter().enumerate() {
734 let key = format!("{}:{}", result.file.display(), result.span.line_start);
735 combined.entry(key).or_insert(Vec::new()).push((rank + 1, result.clone()));
736 }
737
738 for (rank, result) in semantic_results.iter().enumerate() {
739 let key = format!("{}:{}", result.file.display(), result.span.line_start);
740 combined.entry(key).or_insert(Vec::new()).push((rank + 1, result.clone()));
741 }
742
743 let mut rrf_results: Vec<SearchResult> = combined
745 .into_iter()
746 .map(|(_, ranks)| {
747 let mut result = ranks[0].1.clone();
748 let rrf_score = ranks.iter().map(|(rank, _)| 1.0 / (60.0 + *rank as f32)).sum();
749 result.score = rrf_score;
750 result
751 })
752 .filter(|result| {
753 if let Some(threshold) = options.threshold {
755 result.score >= threshold
756 } else {
757 true
758 }
759 })
760 .collect();
761
762 rrf_results.sort_by(|a, b| {
764 b.score
765 .partial_cmp(&a.score)
766 .unwrap_or(std::cmp::Ordering::Equal)
767 });
768
769 if let Some(top_k) = options.top_k {
770 rrf_results.truncate(top_k);
771 }
772
773 Ok(rrf_results)
774}
775
776fn build_globset(patterns: &[String]) -> GlobSet {
777 let mut builder = GlobSetBuilder::new();
778 for pat in patterns {
779 if let Ok(glob) = Glob::new(pat) {
781 builder.add(glob);
782 }
783 }
784 builder.build().unwrap_or_else(|_| GlobSet::empty())
785}
786
787fn should_exclude_path(path: &Path, exclude_patterns: &[String]) -> bool {
788 let globset = build_globset(exclude_patterns);
789 if globset.is_match(path) {
791 return true;
792 }
793 for component in path.components() {
794 if let std::path::Component::Normal(name) = component {
795 if globset.is_match(name) {
796 return true;
797 }
798 }
799 }
800 false
801}
802
803fn collect_files(path: &Path, recursive: bool, exclude_patterns: &[String]) -> Result<Vec<PathBuf>> {
804 let mut files = Vec::new();
805 let globset = build_globset(exclude_patterns);
806
807 if path.is_file() {
808 files.push(path.to_path_buf());
810 } else if recursive {
811 for entry in WalkDir::new(path)
812 .into_iter()
813 .filter_entry(|e| {
814 let name = e.file_name();
816 !globset.is_match(e.path()) && !globset.is_match(name)
817 }) {
818 match entry {
819 Ok(entry) => {
820 if entry.file_type().is_file() && !should_exclude_path(entry.path(), exclude_patterns) {
821 files.push(entry.path().to_path_buf());
822 }
823 }
824 Err(e) => {
825 tracing::debug!("Skipping path due to error: {}", e);
827 continue;
828 }
829 }
830 }
831 } else {
832 match fs::read_dir(path) {
833 Ok(read_dir) => {
834 for entry in read_dir {
835 match entry {
836 Ok(entry) => {
837 let path = entry.path();
838 if path.is_file() && !should_exclude_path(&path, exclude_patterns) {
839 files.push(path);
840 }
841 }
842 Err(e) => {
843 tracing::debug!("Skipping directory entry due to error: {}", e);
844 continue;
845 }
846 }
847 }
848 }
849 Err(e) => {
850 tracing::debug!("Cannot read directory {:?}: {}", path, e);
851 return Err(e.into());
852 }
853 }
854 }
855
856 Ok(files)
857}
858
859fn detect_language(path: &Path) -> Option<String> {
860 path.extension()
861 .and_then(|ext| ext.to_str())
862 .map(|ext| match ext {
863 "rs" => "rust",
864 "py" => "python",
865 "js" => "javascript",
866 "ts" => "typescript",
867 "hs" | "lhs" => "haskell",
868 "go" => "go",
869 "java" => "java",
870 "c" => "c",
871 "cpp" | "cc" | "cxx" => "cpp",
872 "h" | "hpp" => "cpp",
873 "cs" => "csharp",
874 "rb" => "ruby",
875 "php" => "php",
876 "swift" => "swift",
877 "kt" => "kotlin",
878 _ => ext,
879 })
880 .map(String::from)
881}
882
883async fn ensure_index_updated(path: &Path, force_reindex: bool, need_embeddings: bool) -> Result<()> {
884
885 let index_root_buf = find_nearest_index_root(path).unwrap_or_else(|| {
887 if path.is_file() {
888 path.parent().unwrap_or(path).to_path_buf()
889 } else {
890 path.to_path_buf()
891 }
892 });
893 let index_root = &index_root_buf;
894
895 if force_reindex {
897 let stats = ck_index::smart_update_index_with_progress(index_root, false, None, need_embeddings).await?;
898 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
899 tracing::info!("Index updated: {} files indexed, {} orphaned files removed",
900 stats.files_indexed, stats.orphaned_files_removed);
901 }
902 return Ok(());
903 }
904
905 let stats = ck_index::smart_update_index_with_progress(index_root, false, None, need_embeddings).await?;
907 if stats.files_indexed > 0 || stats.orphaned_files_removed > 0 {
908 tracing::info!("Index updated: {} files indexed, {} orphaned files removed",
909 stats.files_indexed, stats.orphaned_files_removed);
910 }
911
912 Ok(())
913}
914
915fn get_context_preview(lines: &[&str], line_idx: usize, options: &SearchOptions) -> String {
916 let before = options.before_context_lines.max(options.context_lines);
917 let after = options.after_context_lines.max(options.context_lines);
918
919 if before > 0 || after > 0 {
920 let start_idx = line_idx.saturating_sub(before);
921 let end_idx = (line_idx + after + 1).min(lines.len());
922 lines[start_idx..end_idx].join("\n")
923 } else {
924 lines[line_idx].to_string()
925 }
926}
927
928fn extract_code_sections(file_path: &Path, content: &str) -> Option<Vec<(usize, usize, String)>> {
929 let lang = match file_path.extension().and_then(|s| s.to_str()) {
931 Some("py") => Some("python"),
932 Some("js") => Some("javascript"),
933 Some("ts") | Some("tsx") => Some("typescript"),
934 Some("hs") | Some("lhs") => Some("haskell"),
935 _ => return None,
936 };
937
938 if let Ok(chunks) = ck_chunk::chunk_text(content, lang) {
940 let sections: Vec<(usize, usize, String)> = chunks
941 .into_iter()
942 .filter(|chunk| matches!(
943 chunk.chunk_type,
944 ck_chunk::ChunkType::Function |
945 ck_chunk::ChunkType::Class |
946 ck_chunk::ChunkType::Method
947 ))
948 .map(|chunk| {
949 (
950 chunk.span.line_start - 1, chunk.span.line_end - 1,
952 chunk.text,
953 )
954 })
955 .collect();
956
957 if sections.is_empty() {
958 None
959 } else {
960 Some(sections)
961 }
962 } else {
963 None
964 }
965}
966
967fn find_containing_section(sections: &[(usize, usize, String)], line_idx: usize) -> Option<&String> {
968 for (start, end, text) in sections {
969 if line_idx >= *start && line_idx <= *end {
970 return Some(text);
971 }
972 }
973 None
974}
975
976#[cfg(test)]
977mod tests {
978 use super::*;
979 use std::fs;
980 use tempfile::TempDir;
981
982 fn create_test_files(dir: &std::path::Path) -> Vec<PathBuf> {
983 let files = vec![
984 ("test1.txt", "hello world rust programming"),
985 ("test2.rs", "fn main() { println!(\"Hello Rust\"); }"),
986 ("test3.py", "print('Hello Python')"),
987 ("test4.txt", "machine learning artificial intelligence"),
988 ];
989
990 let mut paths = Vec::new();
991 for (name, content) in files {
992 let path = dir.join(name);
993 fs::write(&path, content).unwrap();
994 paths.push(path);
995 }
996 paths
997 }
998
999 #[test]
1000 fn test_detect_language() {
1001 assert_eq!(detect_language(&PathBuf::from("test.rs")), Some("rust".to_string()));
1002 assert_eq!(detect_language(&PathBuf::from("test.py")), Some("python".to_string()));
1003 assert_eq!(detect_language(&PathBuf::from("test.js")), Some("javascript".to_string()));
1004 assert_eq!(detect_language(&PathBuf::from("test.hs")), Some("haskell".to_string()));
1005 assert_eq!(detect_language(&PathBuf::from("test.lhs")), Some("haskell".to_string()));
1006 assert_eq!(detect_language(&PathBuf::from("test.unknown")), Some("unknown".to_string()));
1007 assert_eq!(detect_language(&PathBuf::from("noext")), None);
1008 }
1009
1010 #[test]
1011 fn test_collect_files() {
1012 let temp_dir = TempDir::new().unwrap();
1013 let test_files = create_test_files(temp_dir.path());
1014
1015 let files = collect_files(temp_dir.path(), false, &[]).unwrap();
1017 assert_eq!(files.len(), 4);
1018
1019 let files = collect_files(temp_dir.path(), true, &[]).unwrap();
1021 assert_eq!(files.len(), 4);
1022
1023 let files = collect_files(&test_files[0], false, &[]).unwrap();
1025 assert_eq!(files.len(), 1);
1026 assert_eq!(files[0], test_files[0]);
1027 }
1028
1029 #[test]
1030 fn test_regex_search() {
1031 let temp_dir = TempDir::new().unwrap();
1032 create_test_files(temp_dir.path());
1033
1034 let options = SearchOptions {
1035 mode: SearchMode::Regex,
1036 query: "rust".to_string(),
1037 path: temp_dir.path().to_path_buf(),
1038 recursive: true,
1039 ..Default::default()
1040 };
1041
1042 let results = regex_search(&options).unwrap();
1043 assert!(!results.is_empty());
1044
1045 let rust_matches: Vec<_> = results.iter()
1047 .filter(|r| r.preview.to_lowercase().contains("rust"))
1048 .collect();
1049 assert!(!rust_matches.is_empty());
1050 }
1051
1052 #[test]
1053 fn test_regex_search_case_insensitive() {
1054 let temp_dir = TempDir::new().unwrap();
1055 create_test_files(temp_dir.path());
1056
1057 let options = SearchOptions {
1058 mode: SearchMode::Regex,
1059 query: "HELLO".to_string(),
1060 path: temp_dir.path().to_path_buf(),
1061 recursive: true,
1062 case_insensitive: true,
1063 ..Default::default()
1064 };
1065
1066 let results = regex_search(&options).unwrap();
1067 assert!(!results.is_empty());
1068 }
1069
1070 #[test]
1071 fn test_regex_search_fixed_string() {
1072 let temp_dir = TempDir::new().unwrap();
1073 create_test_files(temp_dir.path());
1074
1075 let options = SearchOptions {
1076 mode: SearchMode::Regex,
1077 query: "fn main()".to_string(),
1078 path: temp_dir.path().to_path_buf(),
1079 recursive: true,
1080 fixed_string: true,
1081 ..Default::default()
1082 };
1083
1084 let results = regex_search(&options).unwrap();
1085 assert!(!results.is_empty());
1086 }
1087
1088 #[test]
1089 fn test_regex_search_whole_word() {
1090 let temp_dir = TempDir::new().unwrap();
1091 fs::write(temp_dir.path().join("word_test.txt"), "rust rusty rustacean").unwrap();
1092
1093 let options = SearchOptions {
1094 mode: SearchMode::Regex,
1095 query: "rust".to_string(),
1096 path: temp_dir.path().to_path_buf(),
1097 recursive: true,
1098 whole_word: true,
1099 ..Default::default()
1100 };
1101
1102 let results = regex_search(&options).unwrap();
1103 assert!(!results.is_empty());
1104 }
1106
1107 #[test]
1108 fn test_regex_search_top_k() {
1109 let temp_dir = TempDir::new().unwrap();
1110
1111 for i in 0..10 {
1113 fs::write(temp_dir.path().join(format!("file{}.txt", i)), "test content").unwrap();
1114 }
1115
1116 let options = SearchOptions {
1117 mode: SearchMode::Regex,
1118 query: "test".to_string(),
1119 path: temp_dir.path().to_path_buf(),
1120 recursive: true,
1121 top_k: Some(5),
1122 ..Default::default()
1123 };
1124
1125 let results = regex_search(&options).unwrap();
1126 assert!(results.len() <= 5);
1127 }
1128
1129 #[test]
1130 fn test_regex_search_span_offsets() {
1131 let temp_dir = TempDir::new().unwrap();
1133 let test_file = temp_dir.path().join("spans.txt");
1134 fs::write(&test_file, "test test test\nline two test\ntest end").unwrap();
1135
1136 let options = SearchOptions {
1137 mode: SearchMode::Regex,
1138 query: "test".to_string(),
1139 path: test_file.clone(),
1140 recursive: false,
1141 ..Default::default()
1142 };
1143
1144 let results = regex_search(&options).unwrap();
1145
1146 assert_eq!(results.len(), 5);
1148
1149 let line1_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 1).collect();
1151 assert_eq!(line1_matches.len(), 3);
1152 assert_eq!(line1_matches[0].span.byte_start, 0);
1153 assert_eq!(line1_matches[1].span.byte_start, 5);
1154 assert_eq!(line1_matches[2].span.byte_start, 10);
1155
1156 let line2_matches: Vec<_> = results.iter().filter(|r| r.span.line_start == 2).collect();
1158 assert_eq!(line2_matches.len(), 1);
1159 assert_eq!(line2_matches[0].span.byte_start, 24); let mut byte_starts: Vec<_> = results.iter().map(|r| r.span.byte_start).collect();
1163 byte_starts.sort();
1164 byte_starts.dedup();
1165 assert_eq!(byte_starts.len(), 5); }
1167
1168 #[test]
1169 fn test_search_file() {
1170 let temp_dir = TempDir::new().unwrap();
1171 let file_path = temp_dir.path().join("test.txt");
1172 fs::write(&file_path, "line 1: hello\nline 2: world\nline 3: rust programming").unwrap();
1173
1174 let regex = regex::Regex::new("rust").unwrap();
1175 let options = SearchOptions::default();
1176
1177 let results = search_file(®ex, &file_path, &options).unwrap();
1178 assert_eq!(results.len(), 1);
1179 assert_eq!(results[0].span.line_start, 3);
1180 assert!(results[0].preview.contains("rust"));
1181 }
1182
1183 #[test]
1184 fn test_search_file_with_context() {
1185 let temp_dir = TempDir::new().unwrap();
1186 let file_path = temp_dir.path().join("test.txt");
1187 fs::write(&file_path, "line 1\nline 2\ntarget line\nline 4\nline 5").unwrap();
1188
1189 let regex = regex::Regex::new("target").unwrap();
1190 let options = SearchOptions {
1191 context_lines: 1,
1192 ..Default::default()
1193 };
1194
1195 let results = search_file(®ex, &file_path, &options).unwrap();
1196 assert_eq!(results.len(), 1);
1197
1198 println!("Preview: '{}'", results[0].preview);
1199
1200 assert!(results[0].preview.contains("line 2"));
1203 assert!(results[0].preview.contains("target line"));
1204 assert!(results[0].preview.contains("line 4"));
1205 }
1206
1207 #[tokio::test]
1208 async fn test_search_main_function() {
1209 let temp_dir = TempDir::new().unwrap();
1210 create_test_files(temp_dir.path());
1211
1212 let options = SearchOptions {
1213 mode: SearchMode::Regex,
1214 query: "hello".to_string(),
1215 path: temp_dir.path().to_path_buf(),
1216 recursive: true,
1217 case_insensitive: true,
1218 ..Default::default()
1219 };
1220
1221 let results = search(&options).await.unwrap();
1222 assert!(!results.is_empty());
1223 }
1224}