1use anyhow::Result;
2use probe_code::search::file_list_cache;
3use std::collections::{HashMap, HashSet};
4use std::path::{Path, PathBuf};
5use std::time::{Duration, Instant};
6use probe_code::models::{LimitedSearchResults, SearchResult};
9use probe_code::path_resolver::resolve_path;
10use probe_code::search::{
11 cache,
12 file_processing::{process_file_with_results, FileProcessingParams},
14 query::{create_query_plan, create_structured_patterns, QueryPlan},
15 result_ranking::rank_search_results,
16 search_limiter::apply_limits,
17 search_options::SearchOptions,
18 timeout,
19};
20
21pub struct SearchTimings {
23 pub query_preprocessing: Option<Duration>,
24 pub pattern_generation: Option<Duration>,
25 pub file_searching: Option<Duration>,
26 pub filename_matching: Option<Duration>,
27 pub early_filtering: Option<Duration>,
28 pub early_caching: Option<Duration>,
29 pub result_processing: Option<Duration>,
30 pub result_processing_file_io: Option<Duration>,
32 pub result_processing_line_collection: Option<Duration>,
33 pub result_processing_ast_parsing: Option<Duration>,
34 pub result_processing_block_extraction: Option<Duration>,
35 pub result_processing_result_building: Option<Duration>,
36
37 pub result_processing_ast_parsing_language_init: Option<Duration>,
39 pub result_processing_ast_parsing_parser_init: Option<Duration>,
40 pub result_processing_ast_parsing_tree_parsing: Option<Duration>,
41 pub result_processing_ast_parsing_line_map_building: Option<Duration>,
42
43 pub result_processing_block_extraction_code_structure: Option<Duration>,
45 pub result_processing_block_extraction_filtering: Option<Duration>,
46 pub result_processing_block_extraction_result_building: Option<Duration>,
47
48 pub result_processing_term_matching: Option<Duration>,
50 pub result_processing_compound_processing: Option<Duration>,
51 pub result_processing_line_matching: Option<Duration>,
52 pub result_processing_result_creation: Option<Duration>,
53 pub result_processing_synchronization: Option<Duration>,
54 pub result_processing_uncovered_lines: Option<Duration>,
55
56 pub result_ranking: Option<Duration>,
57 pub limit_application: Option<Duration>,
58 pub block_merging: Option<Duration>,
59 pub final_caching: Option<Duration>,
60 pub total_search_time: Option<Duration>,
61}
62
63pub fn format_duration(duration: Duration) -> String {
65 if duration.as_millis() < 1000 {
66 let millis = duration.as_millis();
67 format!("{millis}ms")
68 } else {
69 let secs = duration.as_secs_f64();
70 format!("{secs:.2}s")
71 }
72}
73
74pub fn print_timings(timings: &SearchTimings) {
76 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
77 if !debug_mode {
78 return;
79 }
80
81 println!("\n=== SEARCH TIMING INFORMATION ===");
82
83 if let Some(duration) = timings.query_preprocessing {
84 println!("Query preprocessing: {}", format_duration(duration));
85 }
86
87 if let Some(duration) = timings.pattern_generation {
88 println!("Pattern generation: {}", format_duration(duration));
89 }
90
91 if let Some(duration) = timings.file_searching {
92 println!("File searching: {}", format_duration(duration));
93 }
94
95 if let Some(duration) = timings.filename_matching {
96 println!("Filename matching: {}", format_duration(duration));
97 }
98
99 if let Some(duration) = timings.early_filtering {
100 println!("Early AST filtering: {}", format_duration(duration));
101 }
102
103 if let Some(duration) = timings.early_caching {
104 println!("Early caching: {}", format_duration(duration));
105 }
106
107 if let Some(duration) = timings.result_processing {
108 println!("Result processing: {}", format_duration(duration));
109
110 if let Some(duration) = timings.result_processing_file_io {
112 println!(" - File I/O: {}", format_duration(duration));
113 }
114
115 if let Some(duration) = timings.result_processing_line_collection {
116 println!(" - Line collection: {}", format_duration(duration));
117 }
118
119 if let Some(duration) = timings.result_processing_ast_parsing {
120 println!(" - AST parsing: {}", format_duration(duration));
121
122 if let Some(d) = timings.result_processing_ast_parsing_language_init {
124 println!(" - Language init: {}", format_duration(d));
125 }
126 if let Some(d) = timings.result_processing_ast_parsing_parser_init {
127 println!(" - Parser init: {}", format_duration(d));
128 }
129 if let Some(d) = timings.result_processing_ast_parsing_tree_parsing {
130 println!(" - Tree parsing: {}", format_duration(d));
131 }
132 if let Some(d) = timings.result_processing_ast_parsing_line_map_building {
133 println!(" - Line map building: {}", format_duration(d));
134 }
135 }
136
137 if let Some(duration) = timings.result_processing_block_extraction {
138 println!(" - Block extraction: {}", format_duration(duration));
139
140 if let Some(d) = timings.result_processing_block_extraction_code_structure {
142 println!(" - Code structure: {}", format_duration(d));
143 }
144 if let Some(d) = timings.result_processing_block_extraction_filtering {
145 println!(" - Filtering: {}", format_duration(d));
146 }
147 if let Some(d) = timings.result_processing_block_extraction_result_building {
148 println!(" - Result building: {}", format_duration(d));
149 }
150 }
151
152 if let Some(duration) = timings.result_processing_result_building {
153 println!(" - Result building: {}", format_duration(duration));
154
155 if let Some(d) = timings.result_processing_term_matching {
157 println!(" - Term matching: {}", format_duration(d));
158 }
159 if let Some(d) = timings.result_processing_compound_processing {
160 println!(" - Compound processing: {}", format_duration(d));
161 }
162 if let Some(d) = timings.result_processing_line_matching {
163 println!(" - Line matching: {}", format_duration(d));
164 }
165 if let Some(d) = timings.result_processing_result_creation {
166 println!(" - Result creation: {}", format_duration(d));
167 }
168 if let Some(d) = timings.result_processing_synchronization {
169 println!(" - Synchronization: {}", format_duration(d));
170 }
171 if let Some(d) = timings.result_processing_uncovered_lines {
172 println!(" - Uncovered lines: {}", format_duration(d));
173 }
174 }
175 }
176
177 if let Some(duration) = timings.result_ranking {
178 println!("Result ranking: {}", format_duration(duration));
179 }
180
181 if let Some(duration) = timings.limit_application {
182 println!("Limit application: {}", format_duration(duration));
183 }
184
185 if let Some(duration) = timings.block_merging {
186 println!("Block merging: {}", format_duration(duration));
187 }
188
189 if let Some(duration) = timings.final_caching {
190 println!("Final caching: {}", format_duration(duration));
191 }
192
193 if let Some(duration) = timings.total_search_time {
194 println!("Total search time: {}", format_duration(duration));
195 }
196
197 println!("===================================\n");
198}
199
200pub fn perform_probe(options: &SearchOptions) -> Result<LimitedSearchResults> {
207 let total_start = Instant::now();
209
210 let SearchOptions {
211 path,
212 queries,
213 files_only,
214 custom_ignores,
215 exclude_filenames,
216 reranker,
217 frequency_search: _,
218 exact,
219 language,
220 max_results,
221 max_bytes,
222 max_tokens,
223 allow_tests,
224 no_merge,
225 merge_threshold,
226 dry_run: _, session,
228 timeout,
229 } = options;
230 let timeout_handle = timeout::start_timeout_thread(*timeout);
232
233 let include_filenames = !exclude_filenames;
234 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
235
236 let (effective_session, session_was_generated) = if let Some(s) = session {
239 if s.is_empty() || *s == "new" {
240 if let Ok(env_session_id) = std::env::var("PROBE_SESSION_ID") {
242 if !env_session_id.is_empty() {
243 if debug_mode {
244 println!("DEBUG: Using session ID from environment: {env_session_id}");
245 }
246 let static_id: &'static str = Box::leak(env_session_id.into_boxed_str());
248 (Some(static_id), false)
249 } else {
250 match cache::generate_session_id() {
252 Ok((new_id, _is_new)) => {
253 if debug_mode {
254 println!("DEBUG: Generated new session ID: {new_id}");
255 }
256 (Some(new_id), true)
257 }
258 Err(e) => {
259 eprintln!("Error generating session ID: {e}");
260 (None, false)
261 }
262 }
263 }
264 } else {
265 match cache::generate_session_id() {
267 Ok((new_id, _is_new)) => {
268 if debug_mode {
269 println!("DEBUG: Generated new session ID: {new_id}");
270 }
271 (Some(new_id), true)
272 }
273 Err(e) => {
274 eprintln!("Error generating session ID: {e}");
275 (None, false)
276 }
277 }
278 }
279 } else {
280 (Some(*s), false)
281 }
282 } else {
283 if let Ok(env_session_id) = std::env::var("PROBE_SESSION_ID") {
285 if !env_session_id.is_empty() {
286 if debug_mode {
287 println!("DEBUG: Using session ID from environment: {env_session_id}");
288 }
289 let static_id: &'static str = Box::leak(env_session_id.into_boxed_str());
291 (Some(static_id), false)
292 } else {
293 (None, false)
294 }
295 } else {
296 (None, false)
297 }
298 };
299
300 let mut timings = SearchTimings {
301 query_preprocessing: None,
302 pattern_generation: None,
303 file_searching: None,
304 filename_matching: None,
305 early_filtering: None,
306 early_caching: None,
307 result_processing: None,
308 result_processing_file_io: None,
309 result_processing_line_collection: None,
310 result_processing_ast_parsing: None,
311 result_processing_block_extraction: None,
312 result_processing_result_building: None,
313
314 result_processing_ast_parsing_language_init: None,
316 result_processing_ast_parsing_parser_init: None,
317 result_processing_ast_parsing_tree_parsing: None,
318 result_processing_ast_parsing_line_map_building: None,
319
320 result_processing_block_extraction_code_structure: None,
322 result_processing_block_extraction_filtering: None,
323 result_processing_block_extraction_result_building: None,
324
325 result_processing_term_matching: None,
327 result_processing_compound_processing: None,
328 result_processing_line_matching: None,
329 result_processing_result_creation: None,
330 result_processing_synchronization: None,
331 result_processing_uncovered_lines: None,
332
333 result_ranking: None,
334 limit_application: None,
335 block_merging: None,
336 final_caching: None,
337 total_search_time: None,
338 };
339
340 let qp_start = Instant::now();
342 if debug_mode {
343 println!("DEBUG: Starting query preprocessing...");
344 }
345
346 let parse_res = if queries.len() > 1 {
347 let combined_query = queries.join(" AND ");
349 create_query_plan(&combined_query, *exact)
350 } else {
351 create_query_plan(&queries[0], *exact)
352 };
353
354 let qp_duration = qp_start.elapsed();
355 timings.query_preprocessing = Some(qp_duration);
356
357 if debug_mode {
358 println!(
359 "DEBUG: Query preprocessing completed in {}",
360 format_duration(qp_duration)
361 );
362 }
363
364 if parse_res.is_err() {
366 println!("Failed to parse query as AST expression");
367 return Ok(LimitedSearchResults {
368 results: Vec::new(),
369 skipped_files: Vec::new(),
370 limits_applied: None,
371 cached_blocks_skipped: None,
372 });
373 }
374
375 let plan = parse_res.unwrap();
377
378 let pg_start = Instant::now();
380 if debug_mode {
381 println!("DEBUG: Starting pattern generation...");
382 println!("DEBUG: Using combined pattern approach for more efficient searching");
383 }
384
385 let structured_patterns = create_structured_patterns(&plan);
387
388 let pg_duration = pg_start.elapsed();
389 timings.pattern_generation = Some(pg_duration);
390
391 if debug_mode {
392 println!(
393 "DEBUG: Pattern generation completed in {}",
394 format_duration(pg_duration)
395 );
396 println!(
397 "DEBUG: Generated {patterns_len} patterns",
398 patterns_len = structured_patterns.len()
399 );
400 if structured_patterns.len() == 1 {
401 println!("DEBUG: Successfully created a single combined pattern for all terms");
402 }
403 }
404
405 let fs_start = Instant::now();
407 if debug_mode {
408 println!("DEBUG: Starting file searching...");
409 }
410
411 let lang_param = language.as_ref().map(|lang| normalize_language_alias(lang));
425
426 let mut file_term_map = search_with_structured_patterns(
427 path,
428 &plan,
429 &structured_patterns,
430 custom_ignores,
431 *allow_tests,
432 lang_param,
433 )?;
434
435 let fs_duration = fs_start.elapsed();
436 timings.file_searching = Some(fs_duration);
437
438 if debug_mode {
440 let total_matches: usize = file_term_map
442 .values()
443 .map(|term_map| term_map.values().map(|lines| lines.len()).sum::<usize>())
444 .sum();
445
446 let unique_files = file_term_map.keys().len();
448
449 println!(
450 "DEBUG: File searching completed in {} - Found {} matches in {} unique files",
451 format_duration(fs_duration),
452 total_matches,
453 unique_files
454 );
455 }
456
457 let mut all_files = file_term_map.keys().cloned().collect::<HashSet<_>>();
459
460 let fm_start = Instant::now();
462 if include_filenames && !exact {
463 if debug_mode {
464 println!("DEBUG: Starting filename matching...");
465 }
466 let resolved_path = if let Some(path_str) = path.to_str() {
469 match resolve_path(path_str) {
470 Ok(resolved_path) => {
471 if debug_mode {
472 println!(
473 "DEBUG: Resolved path '{}' to '{}'",
474 path_str,
475 resolved_path.display()
476 );
477 }
478 resolved_path
479 }
480 Err(err) => {
481 if debug_mode {
482 println!("DEBUG: Failed to resolve path '{path_str}': {err}");
483 }
484 path.to_path_buf()
486 }
487 }
488 } else {
489 path.to_path_buf()
491 };
492
493 let filename_matches: HashMap<PathBuf, HashSet<usize>> =
494 file_list_cache::find_matching_filenames(
495 &resolved_path,
496 queries,
497 &all_files,
498 custom_ignores,
499 *allow_tests,
500 &plan.term_indices,
501 lang_param,
502 )?;
503
504 if debug_mode {
505 println!(
506 "DEBUG: Found {} files matching by filename",
507 filename_matches.len()
508 );
509 }
510
511 for (pathbuf, matched_terms) in &filename_matches {
513 const MAX_FILE_SIZE: u64 = 1024 * 1024;
515
516 let resolved_path = match std::fs::canonicalize(pathbuf.as_path()) {
518 Ok(path) => path,
519 Err(e) => {
520 if debug_mode {
521 println!("DEBUG: Error resolving path for {pathbuf:?}: {e:?}");
522 }
523 continue;
524 }
525 };
526
527 let metadata = match std::fs::metadata(&resolved_path) {
529 Ok(meta) => meta,
530 Err(e) => {
531 if debug_mode {
532 println!("DEBUG: Error getting metadata for {resolved_path:?}: {e:?}");
533 }
534 continue;
535 }
536 };
537
538 if metadata.len() > MAX_FILE_SIZE {
540 if debug_mode {
541 println!(
542 "DEBUG: Skipping file {:?} - file too large ({} bytes > {} bytes limit)",
543 resolved_path,
544 metadata.len(),
545 MAX_FILE_SIZE
546 );
547 }
548 continue;
549 }
550
551 let file_content = match std::fs::read_to_string(&resolved_path) {
553 Ok(content) => content,
554 Err(e) => {
555 if debug_mode {
556 println!(
557 "DEBUG: Error reading file {:?}: {:?} (size: {} bytes)",
558 resolved_path,
559 e,
560 metadata.len()
561 );
562 }
563 continue;
564 }
565 };
566
567 let line_count = file_content.lines().count();
569 if line_count == 0 {
570 if debug_mode {
571 println!("DEBUG: File {pathbuf:?} is empty, skipping");
572 }
573 continue;
574 }
575
576 let all_line_numbers: HashSet<usize> = (1..=line_count).collect();
578
579 let mut term_map = if let Some(existing_map) = file_term_map.get(pathbuf) {
581 if debug_mode {
582 println!(
583 "DEBUG: File {pathbuf:?} already has term matches from content search, extending"
584 );
585 }
586 existing_map.clone()
587 } else {
588 if debug_mode {
589 println!("DEBUG: Creating new term map for file {pathbuf:?}");
590 }
591 HashMap::new()
592 };
593
594 for &term_idx in matched_terms {
596 term_map
597 .entry(term_idx)
598 .or_insert_with(HashSet::new)
599 .extend(&all_line_numbers);
600
601 if debug_mode {
602 println!(
603 "DEBUG: Added term index {term_idx} to file {pathbuf:?} with all lines"
604 );
605 }
606 }
607
608 file_term_map.insert(pathbuf.clone(), term_map);
610 all_files.insert(pathbuf.clone());
611
612 if debug_mode {
613 println!("DEBUG: Added file {pathbuf:?} with matching terms to file_term_map");
614 }
615 }
616 }
617
618 if debug_mode {
619 println!("DEBUG: all_files after filename matches: {all_files:?}");
620 }
621
622 let early_filter_start = Instant::now();
625 if debug_mode {
626 println!("DEBUG: Starting early AST filtering...");
627 println!("DEBUG: Before filtering: {} files", all_files.len());
628 }
629
630 let mut filtered_file_term_map = HashMap::new();
632 let mut filtered_all_files = HashSet::new();
633
634 for pathbuf in &all_files {
635 if let Some(term_map) = file_term_map.get(pathbuf) {
636 let matched_terms: HashSet<usize> = term_map.keys().copied().collect();
638
639 if plan.ast.evaluate(&matched_terms, &plan.term_indices, true) {
641 filtered_file_term_map.insert(pathbuf.clone(), term_map.clone());
642 filtered_all_files.insert(pathbuf.clone());
643 } else if debug_mode {
644 println!("DEBUG: Early filtering removed file: {pathbuf:?}");
645 }
646 } else if debug_mode {
647 println!("DEBUG: File {pathbuf:?} not found in file_term_map during early filtering");
648 }
649 }
650
651 file_term_map = filtered_file_term_map;
653 all_files = filtered_all_files;
654
655 if debug_mode {
656 println!(
657 "DEBUG: After early filtering: {} files remain",
658 all_files.len()
659 );
660 println!("DEBUG: all_files after early filtering: {all_files:?}");
661 }
662
663 let early_filter_duration = early_filter_start.elapsed();
664 timings.early_filtering = Some(early_filter_duration);
665
666 if debug_mode {
667 println!(
668 "DEBUG: Early AST filtering completed in {}",
669 format_duration(early_filter_duration)
670 );
671 }
672
673 let fm_duration = fm_start.elapsed();
674 timings.filename_matching = Some(fm_duration);
675
676 if debug_mode && include_filenames {
677 println!(
678 "DEBUG: Filename matching completed in {}",
679 format_duration(fm_duration)
680 );
681 }
682
683 if *files_only {
685 let mut res = Vec::new();
686 for f in all_files {
687 res.push(SearchResult {
688 file: f.to_string_lossy().to_string(),
689 lines: (1, 1),
690 node_type: "file".to_string(),
691 code: String::new(),
692 matched_by_filename: None,
693 rank: None,
694 score: None,
695 tfidf_score: None,
696 bm25_score: None,
697 tfidf_rank: None,
698 bm25_rank: None,
699 new_score: None,
700 hybrid2_rank: None,
701 combined_score_rank: None,
702 file_unique_terms: None,
703 file_total_matches: None,
704 file_match_rank: None,
705 block_unique_terms: None,
706 block_total_matches: None,
707 parent_file_id: None,
708 block_id: None,
709 matched_keywords: None,
710 tokenized_content: None,
711 });
712 }
713 let mut limited = apply_limits(res, *max_results, *max_bytes, *max_tokens);
714
715 limited.cached_blocks_skipped = None;
717
718 timings.total_search_time = Some(total_start.elapsed());
720
721 print_timings(&timings);
723
724 return Ok(limited);
725 }
726
727 let ec_start = Instant::now();
729 let mut early_skipped_count = 0;
730 if let Some(session_id) = effective_session {
731 let raw_query = if queries.len() > 1 {
733 queries.join(" AND ")
734 } else {
735 queries[0].clone()
736 };
737
738 if debug_mode {
739 println!(
740 "DEBUG: Starting early caching for session: {session_id} with query: {raw_query}"
741 );
742 if let Err(e) = cache::debug_print_cache(session_id, &raw_query) {
744 eprintln!("Error printing cache: {e}");
745 }
746 }
747
748 match cache::filter_matched_lines_with_cache(&mut file_term_map, session_id, &raw_query) {
750 Ok(skipped) => {
751 if debug_mode {
752 println!("DEBUG: Early caching skipped {skipped} matched lines");
753 }
754 early_skipped_count = skipped;
755 }
756 Err(e) => {
757 eprintln!("Error applying early cache: {e}");
759 }
760 }
761
762 let cached_files = file_term_map.keys().cloned().collect::<HashSet<_>>();
765 all_files = all_files.intersection(&cached_files).cloned().collect();
766
767 if debug_mode {
768 println!("DEBUG: all_files after caching: {all_files:?}");
769 }
770 }
771
772 let ec_duration = ec_start.elapsed();
773 timings.early_caching = Some(ec_duration);
774
775 if debug_mode && effective_session.is_some() {
776 println!(
777 "DEBUG: Early caching completed in {}",
778 format_duration(ec_duration)
779 );
780 }
781
782 let rp_start = Instant::now();
784 if debug_mode {
785 println!(
786 "DEBUG: Starting result processing for {} files after early caching...",
787 all_files.len()
788 );
789 }
790
791 let mut final_results = Vec::new();
792
793 let mut total_file_io_time = Duration::new(0, 0);
795 let mut total_line_collection_time = Duration::new(0, 0);
796 let mut total_ast_parsing_time = Duration::new(0, 0);
797 let mut total_block_extraction_time = Duration::new(0, 0);
798 let _total_result_building_time = Duration::new(0, 0);
799
800 let mut total_ast_parsing_language_init_time = Duration::new(0, 0);
802 let mut total_ast_parsing_parser_init_time = Duration::new(0, 0);
803 let mut total_ast_parsing_tree_parsing_time = Duration::new(0, 0);
804 let mut total_ast_parsing_line_map_building_time = Duration::new(0, 0);
805
806 let mut total_block_extraction_code_structure_time = Duration::new(0, 0);
808 let mut total_block_extraction_filtering_time = Duration::new(0, 0);
809 let mut total_block_extraction_result_building_time = Duration::new(0, 0);
810
811 let mut total_term_matching_time = Duration::new(0, 0);
813 let mut total_compound_processing_time = Duration::new(0, 0);
814 let mut total_line_matching_time = Duration::new(0, 0);
815 let mut total_result_creation_time = Duration::new(0, 0);
816 let mut total_synchronization_time = Duration::new(0, 0);
817 let mut total_uncovered_lines_time = Duration::new(0, 0);
818 for pathbuf in &all_files {
819 if debug_mode {
820 println!("DEBUG: Processing file: {pathbuf:?}");
821 }
822
823 if let Some(term_map) = file_term_map.get(pathbuf) {
825 if debug_mode {
826 println!("DEBUG: Term map for file: {term_map:?}");
827 }
828
829 let line_collection_start = Instant::now();
831 let mut all_lines = HashSet::new();
832 for lineset in term_map.values() {
833 all_lines.extend(lineset.iter());
834 }
835 let line_collection_duration = line_collection_start.elapsed();
836 total_line_collection_time += line_collection_duration;
837
838 if debug_mode {
839 println!(
840 "DEBUG: Found {} matched lines in file in {}",
841 all_lines.len(),
842 format_duration(line_collection_duration)
843 );
844 }
845
846 let filename_matched_queries = HashSet::new();
848
849 let term_pairs: Vec<(String, String)> = plan
851 .term_indices
852 .keys()
853 .map(|term| (term.clone(), term.clone()))
854 .collect();
855
856 let pparams = FileProcessingParams {
857 path: pathbuf,
858 line_numbers: &all_lines,
859 allow_tests: *allow_tests,
860 term_matches: term_map,
861 num_queries: plan.term_indices.len(),
862 filename_matched_queries,
863 queries_terms: &[term_pairs],
864 preprocessed_queries: None,
865 no_merge: *no_merge,
866 query_plan: &plan,
867 };
868
869 if debug_mode {
870 println!(
871 "DEBUG: Processing file with params: {}",
872 pparams.path.display()
873 );
874 }
875
876 match process_file_with_results(&pparams) {
878 Ok((mut file_res, file_timings)) => {
879 if let Some(duration) = file_timings.file_io {
881 total_file_io_time += duration;
882 }
883 if let Some(duration) = file_timings.ast_parsing {
884 total_ast_parsing_time += duration;
885 }
886 if let Some(duration) = file_timings.block_extraction {
887 total_block_extraction_time += duration;
888 }
889
890 if let Some(duration) = file_timings.ast_parsing_language_init {
892 total_ast_parsing_language_init_time += duration;
893 if debug_mode {
894 println!("DEBUG: - Language init: {}", format_duration(duration));
895 }
896 }
897 if let Some(duration) = file_timings.ast_parsing_parser_init {
898 total_ast_parsing_parser_init_time += duration;
899 if debug_mode {
900 println!("DEBUG: - Parser init: {}", format_duration(duration));
901 }
902 }
903 if let Some(duration) = file_timings.ast_parsing_tree_parsing {
904 total_ast_parsing_tree_parsing_time += duration;
905 if debug_mode {
906 println!("DEBUG: - Tree parsing: {}", format_duration(duration));
907 }
908 }
909 if let Some(duration) = file_timings.ast_parsing_line_map_building {
910 total_ast_parsing_line_map_building_time += duration;
911 if debug_mode {
912 println!(
913 "DEBUG: - Line map building: {}",
914 format_duration(duration)
915 );
916 }
917 }
918
919 if let Some(duration) = file_timings.block_extraction_code_structure {
921 total_block_extraction_code_structure_time += duration;
922 if debug_mode {
923 println!(
924 "DEBUG: - Code structure finding: {}",
925 format_duration(duration)
926 );
927 }
928 }
929 if let Some(duration) = file_timings.block_extraction_filtering {
930 total_block_extraction_filtering_time += duration;
931 if debug_mode {
932 println!("DEBUG: - Filtering: {}", format_duration(duration));
933 }
934 }
935 if let Some(duration) = file_timings.block_extraction_result_building {
936 total_block_extraction_result_building_time += duration;
937 if debug_mode {
938 println!(
939 "DEBUG: - Result building: {}",
940 format_duration(duration)
941 );
942 }
943 }
944
945 if let Some(duration) = file_timings.result_building_term_matching {
947 total_term_matching_time += duration;
948 if debug_mode {
949 println!("DEBUG: - Term matching: {}", format_duration(duration));
950 }
951 }
952 if let Some(duration) = file_timings.result_building_compound_processing {
953 total_compound_processing_time += duration;
954 if debug_mode {
955 println!(
956 "DEBUG: - Compound processing: {}",
957 format_duration(duration)
958 );
959 }
960 }
961 if let Some(duration) = file_timings.result_building_line_matching {
962 total_line_matching_time += duration;
963 if debug_mode {
964 println!("DEBUG: - Line matching: {}", format_duration(duration));
965 }
966 }
967 if let Some(duration) = file_timings.result_building_result_creation {
968 total_result_creation_time += duration;
969 if debug_mode {
970 println!(
971 "DEBUG: - Result creation: {}",
972 format_duration(duration)
973 );
974 }
975 }
976 if let Some(duration) = file_timings.result_building_synchronization {
977 total_synchronization_time += duration;
978 if debug_mode {
979 println!(
980 "DEBUG: - Synchronization: {}",
981 format_duration(duration)
982 );
983 }
984 }
985 if let Some(duration) = file_timings.result_building_uncovered_lines {
986 total_uncovered_lines_time += duration;
987 if debug_mode {
988 println!(
989 "DEBUG: - Uncovered lines: {}",
990 format_duration(duration)
991 );
992 }
993 }
994
995 if debug_mode {
996 println!("DEBUG: Got {} results from file processing", file_res.len());
997 if let Some(duration) = file_timings.file_io {
998 println!("DEBUG: File I/O time: {}", format_duration(duration));
999 }
1000 if let Some(duration) = file_timings.ast_parsing {
1001 println!("DEBUG: AST parsing time: {}", format_duration(duration));
1002 }
1003 if let Some(duration) = file_timings.block_extraction {
1004 println!(
1005 "DEBUG: Block extraction time: {}",
1006 format_duration(duration)
1007 );
1008 }
1009 if let Some(duration) = file_timings.block_extraction_result_building {
1010 println!(
1011 "DEBUG: Result building time: {}",
1012 format_duration(duration)
1013 );
1014 }
1015 }
1016 final_results.append(&mut file_res);
1017 }
1018 Err(e) => {
1019 if debug_mode {
1020 println!("DEBUG: Error processing file: {e:?}");
1021 }
1022 }
1023 }
1024 } else {
1025 if debug_mode {
1027 println!("DEBUG: ERROR - File {pathbuf:?} not found in file_term_map but was in all_files");
1028 }
1029 }
1030 }
1031
1032 let rp_duration = rp_start.elapsed();
1033 let detailed_result_building_time = total_term_matching_time
1035 + total_compound_processing_time
1036 + total_line_matching_time
1037 + total_result_creation_time
1038 + total_synchronization_time
1039 + total_uncovered_lines_time;
1040
1041 let accounted_time = total_file_io_time
1043 + total_line_collection_time
1044 + total_ast_parsing_time
1045 + total_block_extraction_time;
1046 let remaining_time = if rp_duration > accounted_time {
1047 rp_duration - accounted_time
1048 } else {
1049 if detailed_result_building_time > Duration::new(0, 0) {
1051 detailed_result_building_time
1052 } else {
1053 total_block_extraction_result_building_time
1054 }
1055 };
1056
1057 timings.result_processing = Some(rp_duration);
1058 timings.result_processing_file_io = Some(total_file_io_time);
1059 timings.result_processing_line_collection = Some(total_line_collection_time);
1060 timings.result_processing_ast_parsing = Some(total_ast_parsing_time);
1061 timings.result_processing_block_extraction = Some(total_block_extraction_time);
1062 timings.result_processing_result_building = Some(remaining_time);
1063
1064 timings.result_processing_term_matching = Some(total_term_matching_time);
1066 timings.result_processing_compound_processing = Some(total_compound_processing_time);
1067 timings.result_processing_line_matching = Some(total_line_matching_time);
1068 timings.result_processing_result_creation = Some(total_result_creation_time);
1069 timings.result_processing_synchronization = Some(total_synchronization_time);
1070 timings.result_processing_uncovered_lines = Some(total_uncovered_lines_time);
1071
1072 timings.result_processing_ast_parsing_language_init =
1074 Some(total_ast_parsing_language_init_time);
1075 timings.result_processing_ast_parsing_parser_init = Some(total_ast_parsing_parser_init_time);
1076 timings.result_processing_ast_parsing_tree_parsing = Some(total_ast_parsing_tree_parsing_time);
1077 timings.result_processing_ast_parsing_line_map_building =
1078 Some(total_ast_parsing_line_map_building_time);
1079
1080 timings.result_processing_block_extraction_code_structure =
1082 Some(total_block_extraction_code_structure_time);
1083 timings.result_processing_block_extraction_filtering =
1084 Some(total_block_extraction_filtering_time);
1085 timings.result_processing_block_extraction_result_building =
1086 Some(total_block_extraction_result_building_time);
1087
1088 if debug_mode {
1089 println!(
1090 "DEBUG: Result processing completed in {} - Generated {} results",
1091 format_duration(rp_duration),
1092 final_results.len()
1093 );
1094 println!("DEBUG: Granular result processing timings:");
1095 println!("DEBUG: File I/O: {}", format_duration(total_file_io_time));
1096 println!(
1097 "DEBUG: Line collection: {}",
1098 format_duration(total_line_collection_time)
1099 );
1100 println!(
1101 "DEBUG: AST parsing: {}",
1102 format_duration(total_ast_parsing_time)
1103 );
1104 println!(
1105 "DEBUG: - Language init: {}",
1106 format_duration(total_ast_parsing_language_init_time)
1107 );
1108 println!(
1109 "DEBUG: - Parser init: {}",
1110 format_duration(total_ast_parsing_parser_init_time)
1111 );
1112 println!(
1113 "DEBUG: - Tree parsing: {}",
1114 format_duration(total_ast_parsing_tree_parsing_time)
1115 );
1116 println!(
1117 "DEBUG: - Line map building: {}",
1118 format_duration(total_ast_parsing_line_map_building_time)
1119 );
1120 println!(
1121 "DEBUG: Block extraction: {}",
1122 format_duration(total_block_extraction_time)
1123 );
1124 println!(
1125 "DEBUG: - Code structure finding: {}",
1126 format_duration(total_block_extraction_code_structure_time)
1127 );
1128 println!(
1129 "DEBUG: - Filtering: {}",
1130 format_duration(total_block_extraction_filtering_time)
1131 );
1132 println!(
1133 "DEBUG: - Result building: {}",
1134 format_duration(total_block_extraction_result_building_time)
1135 );
1136 println!(
1137 "DEBUG: Result building: {}",
1138 format_duration(remaining_time)
1139 );
1140 }
1141 let rr_start = Instant::now();
1143 if debug_mode {
1144 if *exact {
1145 println!("DEBUG: Skipping result ranking due to exact flag being set");
1146 } else {
1147 println!("DEBUG: Starting result ranking...");
1148 }
1149 }
1150
1151 if !*exact {
1152 rank_search_results(&mut final_results, queries, reranker);
1154 }
1155
1156 let rr_duration = rr_start.elapsed();
1157 timings.result_ranking = Some(rr_duration);
1158
1159 if debug_mode {
1160 if *exact {
1161 println!(
1162 "DEBUG: Result ranking skipped in {}",
1163 format_duration(rr_duration)
1164 );
1165 } else {
1166 println!(
1167 "DEBUG: Result ranking completed in {}",
1168 format_duration(rr_duration)
1169 );
1170 }
1171 }
1172
1173 let mut skipped_count = early_skipped_count;
1175 let filtered_results = final_results;
1176
1177 let la_start = Instant::now();
1179 if debug_mode {
1180 println!("DEBUG: Starting limit application...");
1181 }
1182
1183 let mut limited = apply_limits(filtered_results, *max_results, *max_bytes, *max_tokens);
1185
1186 let fc_start = Instant::now();
1188
1189 if let Some(session_id) = effective_session {
1190 let raw_query = if queries.len() > 1 {
1192 queries.join(" AND ")
1193 } else {
1194 queries[0].clone()
1195 };
1196
1197 if debug_mode {
1198 println!(
1199 "DEBUG: Starting final caching for session: {session_id} with query: {raw_query}"
1200 );
1201 println!("DEBUG: Already skipped {early_skipped_count} lines in early caching");
1202 if let Err(e) = cache::debug_print_cache(session_id, &raw_query) {
1204 eprintln!("Error printing cache: {e}");
1205 }
1206 }
1207
1208 match cache::filter_results_with_cache(&limited.results, session_id, &raw_query) {
1210 Ok((_, cached_skipped)) => {
1211 if debug_mode {
1212 println!("DEBUG: Final caching found {cached_skipped} cached blocks");
1213 println!(
1214 "DEBUG: Total skipped (early + final): {}",
1215 early_skipped_count + cached_skipped
1216 );
1217 }
1218
1219 skipped_count += cached_skipped;
1220 }
1221 Err(e) => {
1222 eprintln!("Error checking cache: {e}");
1224 }
1225 }
1226
1227 if let Err(e) = cache::add_results_to_cache(&limited.results, session_id, &raw_query) {
1229 eprintln!("Error adding results to cache: {e}");
1230 }
1231
1232 if debug_mode {
1233 println!("DEBUG: Added limited results to cache before merging");
1234 if let Err(e) = cache::debug_print_cache(session_id, &raw_query) {
1236 eprintln!("Error printing updated cache: {e}");
1237 }
1238 }
1239 }
1240
1241 limited.cached_blocks_skipped = if skipped_count > 0 {
1243 Some(skipped_count)
1244 } else {
1245 None
1246 };
1247
1248 let fc_duration = fc_start.elapsed();
1249 timings.final_caching = Some(fc_duration);
1250
1251 if debug_mode && effective_session.is_some() {
1252 println!(
1253 "DEBUG: Final caching completed in {}",
1254 format_duration(fc_duration)
1255 );
1256 }
1257
1258 let la_duration = la_start.elapsed();
1259 timings.limit_application = Some(la_duration);
1260
1261 if debug_mode {
1262 println!(
1263 "DEBUG: Limit application completed in {} - Final result count: {}",
1264 format_duration(la_duration),
1265 limited.results.len()
1266 );
1267 }
1268
1269 let bm_start = Instant::now();
1271 if debug_mode && !limited.results.is_empty() && !*no_merge {
1272 println!("DEBUG: Starting block merging...");
1273 }
1274
1275 let final_results = if !limited.results.is_empty() && !*no_merge {
1276 use probe_code::search::block_merging::merge_ranked_blocks;
1277 let merged = merge_ranked_blocks(limited.results.clone(), *merge_threshold);
1278
1279 let bm_duration = bm_start.elapsed();
1280 timings.block_merging = Some(bm_duration);
1281
1282 if debug_mode {
1283 println!(
1284 "DEBUG: Block merging completed in {} - Merged result count: {}",
1285 format_duration(bm_duration),
1286 merged.len()
1287 );
1288 }
1289
1290 let merged_results = LimitedSearchResults {
1292 results: merged.clone(),
1293 skipped_files: limited.skipped_files,
1294 limits_applied: limited.limits_applied,
1295 cached_blocks_skipped: limited.cached_blocks_skipped,
1296 };
1297
1298 if let Some(session_id) = effective_session {
1300 let raw_query = if queries.len() > 1 {
1302 queries.join(" AND ")
1303 } else {
1304 queries[0].clone()
1305 };
1306
1307 if let Err(e) = cache::add_results_to_cache(&merged, session_id, &raw_query) {
1308 eprintln!("Error adding merged results to cache: {e}");
1309 }
1310
1311 if debug_mode {
1312 println!("DEBUG: Added merged results to cache after merging");
1313 if let Err(e) = cache::debug_print_cache(session_id, &raw_query) {
1315 eprintln!("Error printing updated cache: {e}");
1316 }
1317 }
1318 }
1319
1320 merged_results
1321 } else {
1322 let bm_duration = bm_start.elapsed();
1323 timings.block_merging = Some(bm_duration);
1324
1325 if debug_mode && !*no_merge {
1326 println!(
1327 "DEBUG: Block merging skipped (no results or disabled) - {}",
1328 format_duration(bm_duration)
1329 );
1330 }
1331
1332 limited
1333 };
1334
1335 if let Some(session_id) = effective_session {
1337 if session_was_generated {
1338 println!("Session ID: {session_id} (generated - ALWAYS USE IT in future sessions for caching)");
1339 } else {
1340 println!("Session ID: {session_id}");
1341 }
1342 }
1343
1344 timings.total_search_time = Some(total_start.elapsed());
1346
1347 print_timings(&timings);
1349
1350 timeout_handle.store(true, std::sync::atomic::Ordering::SeqCst);
1352
1353 Ok(final_results)
1354}
1355
1356pub fn search_with_structured_patterns(
1368 root_path_str: &Path,
1369 _plan: &QueryPlan,
1370 patterns: &[(String, HashSet<usize>)],
1371 custom_ignores: &[String],
1372 allow_tests: bool,
1373 language: Option<&str>,
1374) -> Result<HashMap<PathBuf, HashMap<usize, HashSet<usize>>>> {
1375 let root_path = if let Some(path_str) = root_path_str.to_str() {
1377 match resolve_path(path_str) {
1378 Ok(resolved_path) => {
1379 if std::env::var("DEBUG").unwrap_or_default() == "1" {
1380 println!(
1381 "DEBUG: Resolved path '{}' to '{}'",
1382 path_str,
1383 resolved_path.display()
1384 );
1385 }
1386 resolved_path
1387 }
1388 Err(err) => {
1389 if std::env::var("DEBUG").unwrap_or_default() == "1" {
1390 println!("DEBUG: Failed to resolve path '{path_str}': {err}");
1391 }
1392 root_path_str.to_path_buf()
1394 }
1395 }
1396 } else {
1397 root_path_str.to_path_buf()
1399 };
1400 use rayon::prelude::*;
1401 use regex::RegexSet;
1402 use std::sync::{Arc, Mutex};
1403
1404 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
1405 let search_start = Instant::now();
1406
1407 if debug_mode {
1409 println!("DEBUG: Starting parallel structured pattern search with RegexSet...");
1410 println!("DEBUG: Creating RegexSet from {} patterns", patterns.len());
1411 }
1412
1413 let pattern_strings: Vec<String> = patterns.iter().map(|(p, _)| format!("(?i){p}")).collect();
1415
1416 let regex_set = RegexSet::new(&pattern_strings)?;
1418
1419 let pattern_to_terms: Vec<HashSet<usize>> =
1421 patterns.iter().map(|(_, terms)| terms.clone()).collect();
1422
1423 if debug_mode {
1424 println!("DEBUG: RegexSet created successfully");
1425 }
1426
1427 if debug_mode {
1429 println!("DEBUG: Getting filtered file list from cache");
1430 println!("DEBUG: Custom ignore patterns: {custom_ignores:?}");
1431 }
1432
1433 let file_list = crate::search::file_list_cache::get_file_list_by_language(
1435 &root_path,
1436 allow_tests,
1437 custom_ignores,
1438 language,
1439 )?;
1440
1441 if debug_mode {
1442 println!("DEBUG: Got {} files from cache", file_list.files.len());
1443 println!("DEBUG: Starting parallel file processing with RegexSet");
1444 }
1445
1446 let regex_set = Arc::new(regex_set);
1449 let pattern_to_terms = Arc::new(pattern_to_terms);
1450 let file_term_maps = Arc::new(Mutex::new(HashMap::new()));
1451
1452 let individual_regexes: Vec<regex::Regex> = pattern_strings
1454 .iter()
1455 .map(|p| regex::Regex::new(p).unwrap())
1456 .collect();
1457 let individual_regexes = Arc::new(individual_regexes);
1458
1459 file_list.files.par_iter().for_each(|file_path| {
1460 let regex_set = Arc::clone(®ex_set);
1461 let pattern_to_terms = Arc::clone(&pattern_to_terms);
1462 let individual_regexes = Arc::clone(&individual_regexes);
1463
1464 match search_file_with_regex_set(
1466 file_path,
1467 ®ex_set,
1468 &individual_regexes,
1469 &pattern_to_terms,
1470 ) {
1471 Ok(term_map) => {
1472 if !term_map.is_empty() {
1473 if debug_mode {
1474 println!(
1475 "DEBUG: File {:?} matched patterns with {} term indices",
1476 file_path,
1477 term_map.len()
1478 );
1479 }
1480
1481 let mut maps = file_term_maps.lock().unwrap();
1483 maps.insert(file_path.clone(), term_map);
1484 }
1485 }
1486 Err(e) => {
1487 if debug_mode {
1488 println!("DEBUG: Error searching file {file_path:?}: {e:?}");
1489 }
1490 }
1491 }
1492 });
1493
1494 let total_duration = search_start.elapsed();
1495
1496 let result = Arc::try_unwrap(file_term_maps)
1498 .unwrap_or_else(|_| panic!("Failed to unwrap Arc"))
1499 .into_inner()
1500 .unwrap();
1501
1502 if debug_mode {
1503 println!(
1504 "DEBUG: Parallel search completed in {} - Found matches in {} files",
1505 format_duration(total_duration),
1506 result.len()
1507 );
1508 }
1509
1510 Ok(result)
1511}
1512
1513fn search_file_with_regex_set(
1520 file_path: &Path,
1521 regex_set: ®ex::RegexSet,
1522 individual_regexes: &[regex::Regex],
1523 pattern_to_terms: &[HashSet<usize>],
1524) -> Result<HashMap<usize, HashSet<usize>>> {
1525 let mut term_map = HashMap::new();
1526 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
1527
1528 const MAX_FILE_SIZE: u64 = 1024 * 1024;
1530
1531 let resolved_path = match std::fs::canonicalize(file_path) {
1533 Ok(path) => path,
1534 Err(e) => {
1535 if debug_mode {
1536 println!("DEBUG: Error resolving path for {file_path:?}: {e:?}");
1537 }
1538 return Err(anyhow::anyhow!("Failed to resolve file path: {}", e));
1539 }
1540 };
1541
1542 let metadata = match std::fs::metadata(&resolved_path) {
1544 Ok(meta) => meta,
1545 Err(e) => {
1546 if debug_mode {
1547 println!("DEBUG: Error getting metadata for {resolved_path:?}: {e:?}");
1548 }
1549 return Err(anyhow::anyhow!("Failed to get file metadata: {}", e));
1550 }
1551 };
1552
1553 if metadata.len() > MAX_FILE_SIZE {
1555 if debug_mode {
1556 println!(
1557 "DEBUG: Skipping file {:?} - file too large ({} bytes > {} bytes limit)",
1558 resolved_path,
1559 metadata.len(),
1560 MAX_FILE_SIZE
1561 );
1562 }
1563 return Err(anyhow::anyhow!(
1564 "File too large: {} bytes (limit: {} bytes)",
1565 metadata.len(),
1566 MAX_FILE_SIZE
1567 ));
1568 }
1569
1570 let content = match std::fs::read_to_string(&resolved_path) {
1572 Ok(content) => content,
1573 Err(e) => {
1574 if debug_mode {
1575 println!(
1576 "DEBUG: Error reading file {:?}: {:?} (size: {} bytes)",
1577 resolved_path,
1578 e,
1579 metadata.len()
1580 );
1581 }
1582 return Err(anyhow::anyhow!("Failed to read file: {}", e));
1583 }
1584 };
1585
1586 for (line_number, line) in content.lines().enumerate() {
1588 if line.len() > 2000 {
1590 if debug_mode {
1591 println!(
1592 "DEBUG: Skipping line {} in file {:?} - line too long ({} characters)",
1593 line_number + 1,
1594 file_path,
1595 line.len()
1596 );
1597 }
1598 continue;
1599 }
1600
1601 let matches = regex_set.matches(line);
1603 if matches.matched_any() {
1604 for pattern_idx in matches.iter() {
1606 if individual_regexes[pattern_idx].is_match(line) {
1608 for &term_idx in &pattern_to_terms[pattern_idx] {
1610 term_map
1611 .entry(term_idx)
1612 .or_insert_with(HashSet::new)
1613 .insert(line_number + 1); }
1615 }
1616 }
1617 }
1618 }
1619
1620 Ok(term_map)
1621}
1622
1623fn normalize_language_alias(lang: &str) -> &str {
1626 match lang.to_lowercase().as_str() {
1627 "rs" => "rust",
1628 "js" | "jsx" => "javascript",
1629 "ts" | "tsx" => "typescript",
1630 "py" => "python",
1631 "h" => "c",
1632 "cc" | "cxx" | "hpp" | "hxx" => "cpp",
1633 "rb" => "ruby",
1634 "cs" => "csharp",
1635 _ => lang, }
1637}