1use glob::glob;
7use ignore::WalkBuilder;
8use probe_code::language::is_test_file;
9use probe_code::path_resolver::resolve_path;
10use regex::Regex;
11use std::collections::HashSet;
12use std::path::PathBuf;
13
14pub type FilePathInfo = (
22 PathBuf,
23 Option<usize>,
24 Option<usize>,
25 Option<String>,
26 Option<HashSet<usize>>,
27);
28pub fn is_git_diff_format(content: &str) -> bool {
33 content.trim_start().starts_with("diff --git")
34}
35
36pub fn extract_file_paths_from_git_diff(text: &str, allow_tests: bool) -> Vec<FilePathInfo> {
51 let mut results = Vec::new();
52 let mut processed_files = HashSet::new();
53 let mut current_file: Option<PathBuf> = None;
54 let mut current_file_lines = HashSet::new();
55
56 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
58
59 let lines: Vec<&str> = text.lines().collect();
61
62 let diff_header_regex = Regex::new(r"^diff --git a/(.*) b/(.*)$").unwrap();
64
65 let hunk_header_regex = Regex::new(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@").unwrap();
70
71 let finalize_file = |results: &mut Vec<FilePathInfo>,
73 processed_files: &mut HashSet<String>,
74 file_path: &PathBuf,
75 changed_lines: &HashSet<usize>,
76 allow_tests: bool,
77 debug_mode: bool| {
78 if !changed_lines.is_empty()
80 && !processed_files.contains(&file_path.to_string_lossy().to_string())
81 {
82 let is_test = is_test_file(file_path);
84 if !is_ignored_by_gitignore(file_path) && (allow_tests || !is_test) {
85 if debug_mode {
86 println!(
87 "[DEBUG] Adding file with {} changed lines: {:?}",
88 changed_lines.len(),
89 file_path
90 );
91 }
92 let start_line = changed_lines.iter().min().cloned();
94 let end_line = changed_lines.iter().max().cloned();
95
96 results.push((
98 file_path.clone(),
99 start_line,
100 end_line,
101 None,
102 Some(changed_lines.clone()),
103 ));
104 processed_files.insert(file_path.to_string_lossy().to_string());
105 } else if debug_mode {
106 if is_ignored_by_gitignore(file_path) {
107 println!("[DEBUG] Skipping ignored file: {file_path:?}");
108 } else if !allow_tests && is_test {
109 println!("[DEBUG] Skipping test file: {file_path:?}");
110 }
111 }
112 }
113 };
114
115 let mut i = 0;
117 while i < lines.len() {
118 let line = lines[i];
119
120 if let Some(cap) = diff_header_regex.captures(line) {
122 if let Some(file_path) = ¤t_file {
124 finalize_file(
125 &mut results,
126 &mut processed_files,
127 file_path,
128 ¤t_file_lines,
129 allow_tests,
130 debug_mode,
131 );
132 }
133
134 let file_path = cap.get(2).unwrap().as_str();
136 current_file = Some(PathBuf::from(file_path));
137 current_file_lines = HashSet::new(); if debug_mode {
140 println!("[DEBUG] Found file in git diff: {file_path:?}");
141 }
142
143 i += 1;
144 continue;
145 }
146 else if let Some(cap) = hunk_header_regex.captures(line) {
148 if let Some(file_path) = ¤t_file {
149 let new_start: usize = cap.get(3).unwrap().as_str().parse().unwrap_or(1);
151 let _new_len: usize = cap
152 .get(4)
153 .map(|m| m.as_str().parse().unwrap_or(1))
154 .unwrap_or(1);
155
156 if debug_mode {
157 println!(
158 "[DEBUG] Found hunk for file {file_path:?}: parsing for actual changed lines"
159 );
160 }
161
162 i += 1;
164
165 let mut current_line = new_start;
167 while i < lines.len() {
168 let hunk_line = lines[i];
169
170 if hunk_line.starts_with("@@") || hunk_line.starts_with("diff --git") {
172 break;
174 }
175
176 if hunk_line.starts_with('+') && !hunk_line.starts_with("+++") {
178 if debug_mode {
180 println!("[DEBUG] Found changed line at {current_line}: {hunk_line}");
181 }
182 current_file_lines.insert(current_line);
183 }
184
185 if !hunk_line.starts_with('-') {
187 current_line += 1;
188 }
189
190 i += 1;
191 }
192
193 continue;
195 }
196 }
197
198 i += 1;
200 }
201
202 if let Some(file_path) = ¤t_file {
204 finalize_file(
205 &mut results,
206 &mut processed_files,
207 file_path,
208 ¤t_file_lines,
209 allow_tests,
210 debug_mode,
211 );
212 }
213
214 results
215}
216
217pub fn extract_file_paths_from_text(text: &str, allow_tests: bool) -> Vec<FilePathInfo> {
233 let mut results = Vec::new();
234 let mut processed_paths = HashSet::new();
235
236 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
238
239 let mut preprocessed_text = String::with_capacity(text.len());
243 let mut in_quote = false;
244 let mut quote_char = ' ';
245 let mut prev_char = ' ';
246
247 for (i, c) in text.chars().enumerate() {
248 let next_char = text.chars().nth(i + 1).unwrap_or(' ');
249
250 let is_apostrophe_in_word =
255 c == '\'' && prev_char.is_alphanumeric() && next_char.is_alphanumeric();
256
257 if !in_quote && (c == '`' || c == '"' || (c == '\'' && !is_apostrophe_in_word)) {
258 in_quote = true;
260 quote_char = c;
261 preprocessed_text.push(' '); } else if in_quote && c == quote_char {
263 in_quote = false;
265 preprocessed_text.push(' '); } else {
267 preprocessed_text.push(c);
269 }
270
271 prev_char = c;
272 }
273
274 let text = &preprocessed_text;
276
277 let file_symbol_regex =
279 Regex::new(r"(?:^|[\s\r\n])([a-zA-Z0-9_\-./\*\{\}]+\.[a-zA-Z0-9]+)#([a-zA-Z0-9_]+)")
280 .unwrap();
281
282 for cap in file_symbol_regex.captures_iter(text) {
283 let file_path = cap.get(1).unwrap().as_str();
284 let symbol = cap.get(2).unwrap().as_str();
285
286 if file_path.contains('*') || file_path.contains('{') {
291 if let Ok(paths) = glob(file_path) {
292 for entry in paths.flatten() {
293 let is_test = is_test_file(&entry);
295 let should_include =
296 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
297 if should_include {
298 let path_str = entry.to_string_lossy().to_string();
299 processed_paths.insert(path_str.clone());
300 results.push((entry, None, None, Some(symbol.to_string()), None));
302 } else if debug_mode {
303 if is_ignored_by_gitignore(&entry) {
304 println!("DEBUG: Skipping ignored file: {entry:?}");
305 } else if !allow_tests && is_test {
306 println!("DEBUG: Skipping test file: {entry:?}");
307 }
308 }
309 }
310 }
311 } else {
312 match resolve_path(file_path) {
314 Ok(resolved_path) => {
315 let is_test = is_test_file(&resolved_path);
316 if !is_ignored_by_gitignore(&resolved_path) && (allow_tests || !is_test) {
317 processed_paths.insert(file_path.to_string());
318 results.push((resolved_path, None, None, Some(symbol.to_string()), None));
320 } else if debug_mode {
321 if is_ignored_by_gitignore(&resolved_path) {
322 println!("DEBUG: Skipping ignored file: {file_path:?}");
323 } else if !allow_tests && is_test {
324 println!("DEBUG: Skipping test file: {file_path:?}");
325 }
326 }
327 }
328 Err(err) => {
329 if debug_mode {
330 println!("DEBUG: Failed to resolve path '{file_path}': {err}");
331 }
332
333 let path = PathBuf::from(file_path);
335 let is_test = is_test_file(&path);
336 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
337 processed_paths.insert(file_path.to_string());
338 results.push((path, None, None, Some(symbol.to_string()), None));
340 } else if debug_mode {
341 if is_ignored_by_gitignore(&path) {
342 println!("DEBUG: Skipping ignored file: {file_path:?}");
343 } else if !allow_tests && is_test {
344 println!("DEBUG: Skipping test file: {file_path:?}");
345 }
346 }
347 }
348 }
349 }
350 }
351
352 let file_range_regex =
354 Regex::new(r"(?:^|[\s\r\n])([a-zA-Z0-9_\-./\*\{\}]+\.[a-zA-Z0-9]+):(\d+)-(\d+)").unwrap();
355
356 for cap in file_range_regex.captures_iter(text) {
357 let file_path = cap.get(1).unwrap().as_str();
358
359 if processed_paths.contains(file_path) {
361 continue;
362 }
363
364 let start_line = cap.get(2).and_then(|m| m.as_str().parse::<usize>().ok());
365 let end_line = cap.get(3).and_then(|m| m.as_str().parse::<usize>().ok());
366
367 if let (Some(start), Some(end)) = (start_line, end_line) {
368 if file_path.contains('*') || file_path.contains('{') {
370 if let Ok(paths) = glob(file_path) {
371 for entry in paths.flatten() {
372 let is_test = is_test_file(&entry);
374 let should_include =
375 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
376 if should_include {
377 processed_paths.insert(entry.to_string_lossy().to_string());
378 results.push((entry, Some(start), Some(end), None, None));
379 } else if debug_mode {
380 if is_ignored_by_gitignore(&entry) {
381 println!("DEBUG: Skipping ignored file: {entry:?}");
382 } else if !allow_tests && is_test {
383 println!("DEBUG: Skipping test file: {entry:?}");
384 }
385 }
386 }
387 }
388 } else {
389 match resolve_path(file_path) {
391 Ok(resolved_path) => {
392 let is_test = is_test_file(&resolved_path);
393 if !is_ignored_by_gitignore(&resolved_path) && (allow_tests || !is_test) {
394 processed_paths.insert(file_path.to_string());
395 results.push((resolved_path, Some(start), Some(end), None, None));
396 } else if debug_mode {
397 if is_ignored_by_gitignore(&resolved_path) {
398 println!("DEBUG: Skipping ignored file: {file_path:?}");
399 } else if !allow_tests && is_test {
400 println!("DEBUG: Skipping test file: {file_path:?}");
401 }
402 }
403 }
404 Err(err) => {
405 if debug_mode {
406 println!("DEBUG: Failed to resolve path '{file_path}': {err}");
407 }
408
409 let path = PathBuf::from(file_path);
411 let is_test = is_test_file(&path);
412 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
413 processed_paths.insert(file_path.to_string());
414 results.push((path, Some(start), Some(end), None, None));
415 } else if debug_mode {
416 if is_ignored_by_gitignore(&path) {
417 println!("DEBUG: Skipping ignored file: {file_path:?}");
418 } else if !allow_tests && is_test {
419 println!("DEBUG: Skipping test file: {file_path:?}");
420 }
421 }
422 }
423 }
424 }
425 }
426 }
427
428 let file_line_regex =
430 Regex::new(r"(?:^|[\s\r\n])([a-zA-Z0-9_\-./\*\{\}]+\.[a-zA-Z0-9]+):(\d+)(?::\d+)?")
431 .unwrap();
432
433 for cap in file_line_regex.captures_iter(text) {
434 let file_path = cap.get(1).unwrap().as_str();
435
436 if processed_paths.contains(file_path) {
438 continue;
439 }
440
441 let line_num = cap.get(2).and_then(|m| m.as_str().parse::<usize>().ok());
442
443 if file_path.contains('*') || file_path.contains('{') {
445 if let Ok(paths) = glob(file_path) {
446 for entry in paths.flatten() {
447 let path_str = entry.to_string_lossy().to_string();
448 if !processed_paths.contains(&path_str) {
449 let is_test = is_test_file(&entry);
451 let should_include =
452 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
453 if should_include {
454 processed_paths.insert(path_str);
455 results.push((entry, line_num, None, None, None));
456 } else if debug_mode {
457 if is_ignored_by_gitignore(&entry) {
458 println!("DEBUG: Skipping ignored file: {entry:?}");
459 } else if !allow_tests && is_test {
460 println!("DEBUG: Skipping test file: {entry:?}");
461 }
462 }
463 }
464 }
465 }
466 } else {
467 match resolve_path(file_path) {
469 Ok(path) => {
470 let is_test = is_test_file(&path);
471 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
472 processed_paths.insert(file_path.to_string());
473 results.push((path, line_num, None, None, None));
474 } else if debug_mode {
475 if is_ignored_by_gitignore(&path) {
476 println!("DEBUG: Skipping ignored file: {file_path:?}");
477 } else if !allow_tests && is_test {
478 println!("DEBUG: Skipping test file: {file_path:?}");
479 }
480 }
481 }
482 Err(err) => {
483 if debug_mode {
484 println!("DEBUG: Failed to resolve path '{file_path}': {err}");
485 }
486
487 let path = PathBuf::from(file_path);
489 let is_test = is_test_file(&path);
490 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
491 processed_paths.insert(file_path.to_string());
492 results.push((path, line_num, None, None, None));
493 } else if debug_mode {
494 if is_ignored_by_gitignore(&path) {
495 println!("DEBUG: Skipping ignored file: {file_path:?}");
496 } else if !allow_tests && is_test {
497 println!("DEBUG: Skipping test file: {file_path:?}");
498 }
499 }
500 }
501 }
502 }
503 }
504
505 let simple_file_regex =
508 Regex::new(r"(?:^|[\s\r\n])([a-zA-Z0-9_\-./\*\{\}]+\.[a-zA-Z0-9]+)").unwrap();
509
510 for cap in simple_file_regex.captures_iter(text) {
511 let file_path = cap.get(1).unwrap().as_str();
512
513 if !processed_paths.contains(file_path) {
515 if file_path.contains('*') || file_path.contains('{') {
517 if let Ok(paths) = glob(file_path) {
518 for entry in paths.flatten() {
519 let path_str = entry.to_string_lossy().to_string();
520 if !processed_paths.contains(&path_str) {
521 let is_test = is_test_file(&entry);
523 let should_include =
524 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
525 if should_include {
526 processed_paths.insert(path_str);
527 results.push((entry, None, None, None, None));
528 } else if debug_mode {
529 if is_ignored_by_gitignore(&entry) {
530 println!("DEBUG: Skipping ignored file: {entry:?}");
531 } else if !allow_tests && is_test {
532 println!("DEBUG: Skipping test file: {entry:?}");
533 }
534 }
535 }
536 }
537 }
538 } else {
539 match resolve_path(file_path) {
541 Ok(path) => {
542 let is_test = is_test_file(&path);
543 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
544 results.push((path, None, None, None, None));
545 processed_paths.insert(file_path.to_string());
546 } else if debug_mode {
547 if is_ignored_by_gitignore(&path) {
548 println!("DEBUG: Skipping ignored file: {file_path:?}");
549 } else if !allow_tests && is_test {
550 println!("DEBUG: Skipping test file: {file_path:?}");
551 }
552 }
553 }
554 Err(err) => {
555 if debug_mode {
556 println!("DEBUG: Failed to resolve path '{file_path}': {err}");
557 }
558
559 let path = PathBuf::from(file_path);
561 let is_test = is_test_file(&path);
562 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
563 results.push((path, None, None, None, None));
564 processed_paths.insert(file_path.to_string());
565 } else if debug_mode {
566 if is_ignored_by_gitignore(&path) {
567 println!("DEBUG: Skipping ignored file: {file_path:?}");
568 } else if !allow_tests && is_test {
569 println!("DEBUG: Skipping test file: {file_path:?}");
570 }
571 }
572 }
573 }
574 }
575 }
576 }
577
578 results
579}
580
581pub fn parse_file_with_line(input: &str, allow_tests: bool) -> Vec<FilePathInfo> {
585 let mut results = Vec::new();
586
587 let first_char = input.chars().next().unwrap_or(' ');
590 let last_char = input.chars().last().unwrap_or(' ');
591
592 let cleaned_input = if (first_char == '`' || first_char == '\'' || first_char == '"')
593 && first_char == last_char
594 {
595 &input[1..input.len() - 1]
597 } else {
598 input.trim_matches(|c| c == '`' || c == '"')
600 };
601
602 if let Some((file_part, symbol)) = cleaned_input.split_once('#') {
604 match resolve_path(file_part) {
607 Ok(path) => {
608 let is_test = is_test_file(&path);
609 if allow_tests || !is_test {
610 results.push((path, None, None, Some(symbol.to_string()), None));
612 }
613 }
614 Err(err) => {
615 if std::env::var("DEBUG").unwrap_or_default() == "1" {
616 println!("DEBUG: Failed to resolve path '{file_part}': {err}");
617 }
618
619 let path = PathBuf::from(file_part);
621 let is_test = is_test_file(&path);
622 if allow_tests || !is_test {
623 results.push((path, None, None, Some(symbol.to_string()), None));
625 }
626 }
627 }
628 return results;
629 } else if let Some((file_part, rest)) = cleaned_input.split_once(':') {
630 let line_spec = rest.split(':').next().unwrap_or("");
632
633 if let Some((start_str, end_str)) = line_spec.split_once('-') {
635 let start_num = start_str.parse::<usize>().ok();
636 let end_num = end_str.parse::<usize>().ok();
637
638 if let (Some(start), Some(end)) = (start_num, end_num) {
639 if file_part.contains('*') || file_part.contains('{') {
641 let base_dir = std::path::Path::new(".");
643 let mut builder = WalkBuilder::new(base_dir);
644 builder.git_ignore(true);
645 builder.git_global(true);
646 builder.git_exclude(true);
647
648 if let Ok(paths) = glob(file_part) {
650 for entry in paths.flatten() {
651 let is_test = is_test_file(&entry);
653 let should_include =
654 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
655 if should_include {
656 results.push((entry, Some(start), Some(end), None, None));
657 }
658 }
659 }
660 } else {
661 match resolve_path(file_part) {
663 Ok(path) => {
664 let is_test = is_test_file(&path);
665 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
666 results.push((path, Some(start), Some(end), None, None));
667 }
668 }
669 Err(err) => {
670 if std::env::var("DEBUG").unwrap_or_default() == "1" {
671 println!("DEBUG: Failed to resolve path '{file_part}': {err}");
672 }
673
674 let path = PathBuf::from(file_part);
676 let is_test = is_test_file(&path);
677 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
678 results.push((path, Some(start), Some(end), None, None));
679 }
680 }
681 }
682 }
683 }
684 } else {
685 let line_num = line_spec.parse::<usize>().ok();
687
688 if let Some(num) = line_num {
689 if file_part.contains('*') || file_part.contains('{') {
691 if let Ok(paths) = glob(file_part) {
693 for entry in paths.flatten() {
694 let is_test = is_test_file(&entry);
696 let should_include =
697 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
698 if should_include {
699 let mut lines_set = HashSet::new();
701 lines_set.insert(num);
702 results.push((entry, Some(num), None, None, Some(lines_set)));
703 }
704 }
705 }
706 } else {
707 match resolve_path(file_part) {
709 Ok(path) => {
710 let is_test = is_test_file(&path);
711 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
712 let mut lines_set = HashSet::new();
714 lines_set.insert(num);
715 results.push((path, Some(num), None, None, Some(lines_set)));
716 }
717 }
718 Err(err) => {
719 if std::env::var("DEBUG").unwrap_or_default() == "1" {
720 println!("DEBUG: Failed to resolve path '{file_part}': {err}");
721 }
722
723 let path = PathBuf::from(file_part);
725 let is_test = is_test_file(&path);
726 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
727 let mut lines_set = HashSet::new();
729 lines_set.insert(num);
730 results.push((path, Some(num), None, None, Some(lines_set)));
731 }
732 }
733 }
734 }
735 }
736 }
737 } else {
738 if cleaned_input.contains('*') || cleaned_input.contains('{') {
741 if let Ok(paths) = glob(cleaned_input) {
742 for entry in paths.flatten() {
743 let is_test = is_test_file(&entry);
745 let should_include =
746 !is_ignored_by_gitignore(&entry) && (allow_tests || !is_test);
747 if should_include {
748 results.push((entry, None, None, None, None));
749 }
750 }
751 }
752 } else {
753 match resolve_path(cleaned_input) {
755 Ok(path) => {
756 let is_test = is_test_file(&path);
757 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
758 results.push((path, None, None, None, None));
759 }
760 }
761 Err(err) => {
762 if std::env::var("DEBUG").unwrap_or_default() == "1" {
764 println!("DEBUG: Failed to resolve path '{cleaned_input}': {err}");
765 }
766
767 let path = PathBuf::from(cleaned_input);
769 let is_test = is_test_file(&path);
770 if !is_ignored_by_gitignore(&path) && (allow_tests || !is_test) {
771 results.push((path, None, None, None, None));
772 }
773 }
774 }
775 }
776 }
777
778 results
779}
780
781thread_local! {
783 static CUSTOM_IGNORES: std::cell::RefCell<Vec<String>> = const { std::cell::RefCell::new(Vec::new()) };
784}
785
786pub fn set_custom_ignores(patterns: &[String]) {
788 CUSTOM_IGNORES.with(|cell| {
789 let mut ignores = cell.borrow_mut();
790 ignores.clear();
791 ignores.extend(patterns.iter().cloned());
792 });
793}
794
795fn is_ignored_by_gitignore(path: &PathBuf) -> bool {
797 let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
799
800 let path_str = path.to_string_lossy().to_lowercase();
802
803 let common_ignore_patterns = [
805 "node_modules",
806 "vendor",
807 "target",
808 "dist",
809 "build",
810 ".git",
811 ".svn",
812 ".hg",
813 ".idea",
814 ".vscode",
815 "__pycache__",
816 ];
817
818 let mut custom_patterns = Vec::new();
820 CUSTOM_IGNORES.with(|cell| {
821 let ignores = cell.borrow();
822 custom_patterns.extend(ignores.iter().cloned());
823 });
824
825 for pattern in &common_ignore_patterns {
827 if path_str.contains(pattern) {
828 if debug_mode {
829 println!("DEBUG: File {path:?} is ignored (contains pattern '{pattern}')");
830 }
831 return true;
832 }
833 }
834
835 for pattern in &custom_patterns {
837 if path_str.contains(pattern) {
838 if debug_mode {
839 println!("DEBUG: File {path:?} is ignored (contains custom pattern '{pattern}')");
840 }
841 return true;
842 }
843 }
844
845 false
846}