1mod file_paths;
8mod formatter;
9mod processor;
10mod prompts;
11pub mod symbol_finder;
12
13#[allow(unused_imports)]
15pub use file_paths::{
16 extract_file_paths_from_git_diff, extract_file_paths_from_text, is_git_diff_format,
17 parse_file_with_line,
18};
19#[allow(unused_imports)]
20pub use formatter::{
21 format_and_print_extraction_results, format_extraction_dry_run, format_extraction_results,
22};
23#[allow(unused_imports)]
24pub use processor::process_file_for_extraction;
25#[allow(unused_imports)]
26pub use prompts::PromptTemplate;
27
28use anyhow::Result;
29use probe_code::extract::file_paths::{set_custom_ignores, FilePathInfo};
30use probe_code::models::SearchResult;
31use std::collections::HashSet;
32use std::io::Read;
33#[allow(unused_imports)]
34use std::path::PathBuf;
35
36pub struct ExtractOptions {
38 pub files: Vec<String>,
40 pub custom_ignores: Vec<String>,
42 pub context_lines: usize,
44 pub format: String,
46 pub from_clipboard: bool,
48 pub input_file: Option<String>,
50 pub to_clipboard: bool,
52 pub dry_run: bool,
54 pub diff: bool,
56 pub allow_tests: bool,
58 pub keep_input: bool,
60 pub prompt: Option<prompts::PromptTemplate>,
62 pub instructions: Option<String>,
64}
65
66pub fn handle_extract(options: ExtractOptions) -> Result<()> {
68 use arboard::Clipboard;
69 use colored::*;
70
71 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
73
74 if debug_mode {
75 println!("\n[DEBUG] ===== Extract Command Started =====");
76 println!("[DEBUG] Files to process: {files:?}", files = options.files);
77 println!(
78 "[DEBUG] Custom ignores: {custom_ignores:?}",
79 custom_ignores = options.custom_ignores
80 );
81 println!(
82 "[DEBUG] Context lines: {context_lines}",
83 context_lines = options.context_lines
84 );
85 println!("[DEBUG] Output format: {format}", format = options.format);
86 println!(
87 "[DEBUG] Read from clipboard: {from_clipboard}",
88 from_clipboard = options.from_clipboard
89 );
90 println!(
91 "[DEBUG] Write to clipboard: {to_clipboard}",
92 to_clipboard = options.to_clipboard
93 );
94 println!("[DEBUG] Dry run: {dry_run}", dry_run = options.dry_run);
95 println!("[DEBUG] Parse as git diff: {diff}", diff = options.diff);
96 println!(
97 "[DEBUG] Allow tests: {allow_tests}",
98 allow_tests = options.allow_tests
99 );
100 println!(
101 "[DEBUG] Prompt template: {prompt:?}",
102 prompt = options.prompt
103 );
104 println!(
105 "[DEBUG] Instructions: {instructions:?}",
106 instructions = options.instructions
107 );
108 }
109
110 set_custom_ignores(&options.custom_ignores);
112
113 let mut file_paths: Vec<FilePathInfo> = Vec::new();
114
115 let mut original_input: Option<String> = None;
117
118 if options.from_clipboard {
119 println!("{}", "Reading from clipboard...".bold().blue());
121 let mut clipboard = Clipboard::new()?;
122 let buffer = clipboard.get_text()?;
123
124 if options.keep_input {
126 original_input = Some(buffer.clone());
127 if debug_mode {
128 println!(
129 "[DEBUG] Stored original clipboard input: {} bytes",
130 original_input.as_ref().map_or(0, |s| s.len())
131 );
132 }
133 }
134
135 if debug_mode {
136 println!(
137 "[DEBUG] Reading from clipboard, content length: {} bytes",
138 buffer.len()
139 );
140 }
141
142 let is_diff_format = options.diff || is_git_diff_format(&buffer);
144
145 if is_diff_format {
146 if debug_mode {
148 println!("[DEBUG] Parsing clipboard content as git diff format");
149 }
150 file_paths = extract_file_paths_from_git_diff(&buffer, options.allow_tests);
151 } else {
152 file_paths = file_paths::extract_file_paths_from_text(&buffer, options.allow_tests);
154 }
155
156 if debug_mode {
157 println!(
158 "[DEBUG] Extracted {} file paths from clipboard",
159 file_paths.len()
160 );
161 for (path, start, end, symbol, lines) in &file_paths {
162 println!(
163 "[DEBUG] - {:?} (lines: {:?}-{:?}, symbol: {:?}, specific lines: {:?})",
164 path,
165 start,
166 end,
167 symbol,
168 lines.as_ref().map(|l| l.len())
169 );
170 }
171 }
172
173 if file_paths.is_empty() {
174 println!("{}", "No file paths found in clipboard.".yellow().bold());
175 return Ok(());
176 }
177 } else if let Some(input_file_path) = &options.input_file {
178 println!(
180 "{}",
181 format!("Reading from file: {input_file_path}...")
182 .bold()
183 .blue()
184 );
185
186 let input_path = std::path::Path::new(input_file_path);
188 if !input_path.exists() {
189 return Err(anyhow::anyhow!(
190 "Input file does not exist: {}",
191 input_file_path
192 ));
193 }
194
195 let buffer = std::fs::read_to_string(input_path)?;
197
198 if options.keep_input {
200 original_input = Some(buffer.clone());
201 if debug_mode {
202 println!(
203 "[DEBUG] Stored original file input: {} bytes",
204 original_input.as_ref().map_or(0, |s| s.len())
205 );
206 }
207 }
208
209 if debug_mode {
210 println!(
211 "[DEBUG] Reading from file, content length: {} bytes",
212 buffer.len()
213 );
214 }
215
216 let is_diff_format = options.diff || is_git_diff_format(&buffer);
218
219 if is_diff_format {
220 if debug_mode {
222 println!("[DEBUG] Parsing file content as git diff format");
223 }
224 file_paths = extract_file_paths_from_git_diff(&buffer, options.allow_tests);
225 } else {
226 file_paths = file_paths::extract_file_paths_from_text(&buffer, options.allow_tests);
228 }
229
230 if debug_mode {
231 println!(
232 "[DEBUG] Extracted {} file paths from input file",
233 file_paths.len()
234 );
235 for (path, start, end, symbol, lines) in &file_paths {
236 println!(
237 "[DEBUG] - {:?} (lines: {:?}-{:?}, symbol: {:?}, specific lines: {:?})",
238 path,
239 start,
240 end,
241 symbol,
242 lines.as_ref().map(|l| l.len())
243 );
244 }
245 }
246
247 if file_paths.is_empty() {
248 println!(
249 "{}",
250 format!("No file paths found in input file: {input_file_path}")
251 .yellow()
252 .bold()
253 );
254 return Ok(());
255 }
256 } else if options.files.is_empty() {
257 let is_stdin_available = !atty::is(atty::Stream::Stdin);
259
260 if is_stdin_available {
261 println!("{}", "Reading from stdin...".bold().blue());
263 let mut buffer = String::new();
264 std::io::stdin().read_to_string(&mut buffer)?;
265
266 if options.keep_input {
268 original_input = Some(buffer.clone());
269 if debug_mode {
270 println!(
271 "[DEBUG] Stored original stdin input: {} bytes",
272 original_input.as_ref().map_or(0, |s| s.len())
273 );
274 }
275 }
276
277 if debug_mode {
278 println!(
279 "[DEBUG] Reading from stdin, content length: {} bytes",
280 buffer.len()
281 );
282 }
283
284 let is_diff_format = options.diff || is_git_diff_format(&buffer);
286
287 if is_diff_format {
288 if debug_mode {
290 println!("[DEBUG] Parsing stdin content as git diff format");
291 }
292 file_paths = extract_file_paths_from_git_diff(&buffer, options.allow_tests);
293 } else {
294 file_paths = file_paths::extract_file_paths_from_text(&buffer, options.allow_tests);
296 }
297 } else {
298 println!(
300 "{}",
301 "No files specified and no stdin input detected."
302 .yellow()
303 .bold()
304 );
305 println!("{}", "Use --help for usage information.".blue());
306 return Ok(());
307 }
308
309 if debug_mode {
310 println!(
311 "[DEBUG] Extracted {} file paths from stdin",
312 file_paths.len()
313 );
314 for (path, start, end, symbol, lines) in &file_paths {
315 println!(
316 "[DEBUG] - {:?} (lines: {:?}-{:?}, symbol: {:?}, specific lines: {:?})",
317 path,
318 start,
319 end,
320 symbol,
321 lines.as_ref().map(|l| l.len())
322 );
323 }
324 }
325
326 if file_paths.is_empty() {
327 println!("{}", "No file paths found in stdin.".yellow().bold());
328 return Ok(());
329 }
330 } else {
331 if debug_mode {
333 println!("[DEBUG] Parsing command-line arguments");
334 }
335
336 if options.keep_input {
338 original_input = Some(options.files.join(" "));
339 if debug_mode {
340 println!(
341 "[DEBUG] Stored original command-line input: {}",
342 original_input.as_ref().unwrap_or(&String::new())
343 );
344 }
345 }
346
347 for file in &options.files {
348 if debug_mode {
349 println!("[DEBUG] Parsing file argument: {file}");
350 }
351
352 let paths = file_paths::parse_file_with_line(file, options.allow_tests);
353
354 if debug_mode {
355 println!(
356 "[DEBUG] Parsed {} paths from argument '{}'",
357 paths.len(),
358 file
359 );
360 for (path, start, end, symbol, lines) in &paths {
361 println!(
362 "[DEBUG] - {:?} (lines: {:?}-{:?}, symbol: {:?}, specific lines: {:?})",
363 path,
364 start,
365 end,
366 symbol,
367 lines.as_ref().map(|l| l.len())
368 );
369 }
370 }
371
372 file_paths.extend(paths);
373 }
374 }
375
376 if options.format != "json" && options.format != "xml" {
378 println!("{text}", text = "Files to extract:".bold().green());
379
380 for (path, start_line, end_line, symbol, lines) in &file_paths {
381 if let (Some(start), Some(end)) = (start_line, end_line) {
382 println!(
383 " {path} (lines {start}-{end})",
384 path = path.display(),
385 start = start,
386 end = end
387 );
388 } else if let Some(line_num) = start_line {
389 println!(
390 " {path} (line {line_num})",
391 path = path.display(),
392 line_num = line_num
393 );
394 } else if let Some(sym) = symbol {
395 println!(" {path} (symbol: {sym})", path = path.display());
396 } else if let Some(lines_set) = lines {
397 println!(
398 " {path} (specific lines: {count} lines)",
399 path = path.display(),
400 count = lines_set.len()
401 );
402 } else {
403 println!(" {path}", path = path.display());
404 }
405 }
406
407 if options.context_lines > 0 {
408 println!(
409 "Context lines: {context_lines}",
410 context_lines = options.context_lines
411 );
412 }
413
414 if options.dry_run {
415 println!(
416 "{text}",
417 text = "Dry run (file names and lines only)".yellow()
418 );
419 }
420
421 println!("Format: {format}", format = options.format);
422 println!();
423 }
424
425 let system_prompt = if let Some(prompt_template) = &options.prompt {
427 if debug_mode {
428 println!("[DEBUG] Processing prompt template: {prompt_template:?}");
429 }
430 match prompt_template.get_content() {
431 Ok(content) => {
432 if debug_mode {
433 println!(
434 "[DEBUG] Loaded prompt template content ({} bytes)",
435 content.len()
436 );
437 }
438 Some(content)
439 }
440 Err(e) => {
441 eprintln!(
442 "{text}",
443 text = format!("Error loading prompt template: {e}").red()
444 );
445 if debug_mode {
446 println!("[DEBUG] Error loading prompt template: {e}");
447 }
448 None
449 }
450 }
451 } else {
452 None
453 };
454
455 use rayon::prelude::*;
457 use std::sync::{Arc, Mutex};
458
459 let results_mutex = Arc::new(Mutex::new(Vec::<SearchResult>::new()));
461 let errors_mutex = Arc::new(Mutex::new(Vec::<String>::new()));
462
463 struct FileProcessingParams {
465 path: std::path::PathBuf,
466 start_line: Option<usize>,
467 end_line: Option<usize>,
468 symbol: Option<String>,
469 specific_lines: Option<HashSet<usize>>,
470 allow_tests: bool,
471 context_lines: usize,
472 debug_mode: bool,
473 format: String,
474
475 #[allow(dead_code)]
476 original_input: Option<String>,
477 #[allow(dead_code)]
478 system_prompt: Option<String>,
479 #[allow(dead_code)]
480 user_instructions: Option<String>,
481 }
482
483 let file_params: Vec<FileProcessingParams> = file_paths
485 .into_iter()
486 .map(
487 |(path, start_line, end_line, symbol, specific_lines)| FileProcessingParams {
488 path,
489 start_line,
490 end_line,
491 symbol,
492 specific_lines,
493 allow_tests: options.allow_tests,
494 context_lines: options.context_lines,
495 debug_mode,
496 format: options.format.clone(),
497 original_input: original_input.clone(),
498 system_prompt: system_prompt.clone(),
499 user_instructions: options.instructions.clone(),
500 },
501 )
502 .collect();
503
504 file_params.par_iter().for_each(|params| {
506 if params.debug_mode {
507 println!("\n[DEBUG] Processing file: {:?}", params.path);
508 println!("[DEBUG] Start line: {:?}", params.start_line);
509 println!("[DEBUG] End line: {:?}", params.end_line);
510 println!("[DEBUG] Symbol: {:?}", params.symbol);
511 println!(
512 "[DEBUG] Specific lines: {:?}",
513 params.specific_lines.as_ref().map(|l| l.len())
514 );
515
516 if params.path.exists() {
518 println!("[DEBUG] File exists: Yes");
519
520 if let Some(ext) = params.path.extension().and_then(|e| e.to_str()) {
522 let language = formatter::get_language_from_extension(ext);
523 println!("[DEBUG] File extension: {ext}");
524 println!(
525 "[DEBUG] Detected language: {}",
526 if language.is_empty() {
527 "unknown"
528 } else {
529 language
530 }
531 );
532 } else {
533 println!("[DEBUG] File has no extension");
534 }
535 } else {
536 println!("[DEBUG] File exists: No");
537 }
538 }
539
540 if params.debug_mode && crate::language::is_test_file(¶ms.path) && !params.allow_tests {
543 println!("[DEBUG] Test file detected: {:?}", params.path);
544 }
545
546 match processor::process_file_for_extraction(
547 ¶ms.path,
548 params.start_line,
549 params.end_line,
550 params.symbol.as_deref(),
551 params.allow_tests,
552 params.context_lines,
553 params.specific_lines.as_ref(),
554 ) {
555 Ok(result) => {
556 if params.debug_mode {
557 println!("[DEBUG] Successfully extracted code from {:?}", params.path);
558 println!("[DEBUG] Extracted lines: {:?}", result.lines);
559 println!("[DEBUG] Node type: {}", result.node_type);
560 println!("[DEBUG] Code length: {} bytes", result.code.len());
561 println!(
562 "[DEBUG] Estimated tokens: {}",
563 crate::search::search_tokens::count_tokens(&result.code)
564 );
565 }
566
567 let mut results = results_mutex.lock().unwrap();
569 results.push(result);
570 }
571 Err(e) => {
572 let error_msg = format!(
573 "Error processing file {path:?}: {e}",
574 path = params.path,
575 e = e
576 );
577 if params.debug_mode {
578 println!("[DEBUG] Error: {error_msg}");
579 }
580 if params.format != "json" && params.format != "xml" {
582 eprintln!("{}", error_msg.red());
583 }
584 let mut errors = errors_mutex.lock().unwrap();
586 errors.push(error_msg);
587 }
588 }
589 });
590 let mut results = Arc::try_unwrap(results_mutex)
592 .expect("Failed to unwrap results mutex")
593 .into_inner()
594 .expect("Failed to get inner results");
595
596 let errors = Arc::try_unwrap(errors_mutex)
597 .expect("Failed to unwrap errors mutex")
598 .into_inner()
599 .expect("Failed to get inner errors");
600
601 if debug_mode {
603 println!(
604 "[DEBUG] Before deduplication: {len} results",
605 len = results.len()
606 );
607 }
608
609 results.sort_by(|a, b| {
612 let a_file = &a.file;
613 let b_file = &b.file;
614
615 if a_file != b_file {
617 return a_file.cmp(b_file);
618 }
619
620 let a_range_size = a.lines.1 - a.lines.0;
622 let b_range_size = b.lines.1 - b.lines.0;
623 b_range_size.cmp(&a_range_size)
624 });
625
626 if debug_mode {
627 println!("[DEBUG] Sorted results by file path and range size");
628 for (i, result) in results.iter().enumerate() {
629 println!(
630 "[DEBUG] Result {}: {} (lines {}-{}, size: {})",
631 i,
632 result.file,
633 result.lines.0,
634 result.lines.1,
635 result.lines.1 - result.lines.0
636 );
637 }
638 }
639
640 let mut to_retain = vec![true; results.len()];
642
643 let mut seen_exact = HashSet::new();
645
646 for i in 0..results.len() {
647 if !to_retain[i] {
648 continue; }
650
651 let result_i = &results[i];
652 let file_i = &result_i.file;
653 let start_i = result_i.lines.0;
654 let end_i = result_i.lines.1;
655
656 let key = format!("{file_i}:{start_i}:{end_i}");
658 if !seen_exact.insert(key) {
659 to_retain[i] = false;
660 if debug_mode {
661 println!("[DEBUG] Removing exact duplicate: {file_i} (lines {start_i}-{end_i})");
662 }
663 continue;
664 }
665
666 for j in i + 1..results.len() {
668 if !to_retain[j] {
669 continue; }
671
672 let result_j = &results[j];
673 let file_j = &result_j.file;
674 let start_j = result_j.lines.0;
675 let end_j = result_j.lines.1;
676
677 if file_i != file_j {
679 continue;
680 }
681
682 if start_j >= start_i && end_j <= end_i {
684 to_retain[j] = false;
685 if debug_mode {
686 println!("[DEBUG] Removing nested duplicate: {file_j} (lines {start_j}-{end_j}) contained within (lines {start_i}-{end_i})");
687 }
688 }
689 }
690 }
691
692 let original_len = results.len();
694 let mut new_results = Vec::with_capacity(original_len);
695
696 for i in 0..original_len {
697 if to_retain[i] {
698 new_results.push(results[i].clone());
699 }
700 }
701
702 results = new_results;
703
704 if debug_mode {
705 println!(
706 "[DEBUG] After deduplication: {len} results",
707 len = results.len()
708 );
709 }
710
711 if debug_mode {
712 println!("\n[DEBUG] ===== Extraction Summary =====");
713 println!("[DEBUG] Total results: {}", results.len());
714 println!("[DEBUG] Total errors: {}", errors.len());
715 println!("[DEBUG] Output format: {}", options.format);
716 println!("[DEBUG] Dry run: {}", options.dry_run);
717 }
718
719 let res = {
721 let colors_enabled = if options.to_clipboard {
723 let was_enabled = colored::control::SHOULD_COLORIZE.should_colorize();
724 colored::control::set_override(false);
725 was_enabled
726 } else {
727 false
728 };
729
730 let result = if options.dry_run {
732 formatter::format_extraction_dry_run(
733 &results,
734 &options.format,
735 original_input.as_deref(),
736 system_prompt.as_deref(),
737 options.instructions.as_deref(),
738 )
739 } else {
740 formatter::format_extraction_results(
741 &results,
742 &options.format,
743 original_input.as_deref(),
744 system_prompt.as_deref(),
745 options.instructions.as_deref(),
746 )
747 };
748
749 if options.to_clipboard && colors_enabled {
751 colored::control::set_override(true);
752 }
753
754 result
755 };
756 match res {
757 Ok(formatted_output) => {
758 if options.to_clipboard {
759 let mut clipboard = Clipboard::new()?;
761 clipboard.set_text(&formatted_output)?;
762 println!("{}", "Results copied to clipboard.".green().bold());
763
764 if debug_mode {
765 println!(
766 "[DEBUG] Wrote {} bytes to clipboard",
767 formatted_output.len()
768 );
769 }
770 } else {
771 println!("{formatted_output}");
773 }
774 }
775 Err(e) => {
776 if options.format != "json" && options.format != "xml" {
778 eprintln!("{}", format!("Error formatting results: {e}").red());
779 }
780 if debug_mode {
781 println!("[DEBUG] Error formatting results: {e}");
782 }
783 }
784 }
785
786 if !errors.is_empty() && options.format != "json" && options.format != "xml" {
788 println!();
789 println!(
790 "{} {} {}",
791 "Encountered".red().bold(),
792 errors.len(),
793 if errors.len() == 1 { "error" } else { "errors" }
794 );
795 }
796
797 if debug_mode {
798 println!("[DEBUG] ===== Extract Command Completed =====");
799 }
800
801 Ok(())
802}