1use regex::Regex;
12use std::collections::HashSet;
13use std::sync::LazyLock;
14
15#[derive(Debug, Clone, PartialEq)]
20pub struct Location {
21 pub line: usize,
23 pub column: usize,
25 pub offset: usize,
27}
28
29#[derive(Debug, Clone, PartialEq)]
31pub enum Severity {
32 Error,
34 Warning,
36 Info,
38}
39
40#[derive(Debug, Clone, PartialEq)]
45pub enum AntiPattern {
46 FormatHeredoc { location: Location, format_name: String, heredoc_delimiter: String },
48 BeginTimeHeredoc { location: Location, heredoc_content: String, side_effects: Vec<String> },
50 DynamicHeredocDelimiter { location: Location, expression: String },
52 SourceFilterHeredoc { location: Location, module: String },
54 RegexCodeBlockHeredoc { location: Location },
56 EvalStringHeredoc { location: Location },
58 TiedHandleHeredoc { location: Location, handle_name: String },
60}
61
62#[derive(Debug, Clone, PartialEq)]
68pub struct Diagnostic {
69 pub severity: Severity,
71 pub pattern: AntiPattern,
73 pub message: String,
75 pub explanation: String,
77 pub suggested_fix: Option<String>,
79 pub references: Vec<String>,
81}
82
83pub struct AntiPatternDetector {
91 patterns: Vec<Box<dyn PatternDetector>>,
92}
93
94trait PatternDetector: Send + Sync {
95 fn detect(
96 &self,
97 code: &str,
98 offset: usize,
99 line_starts: &[usize],
100 ) -> Vec<(AntiPattern, Location)>;
101 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic>;
102}
103
104fn build_line_starts(code: &str) -> Vec<usize> {
105 let mut line_starts = Vec::new();
106 line_starts.push(0);
107
108 for (idx, byte) in code.bytes().enumerate() {
109 if byte == b'\n' {
110 line_starts.push(idx + 1);
111 }
112 }
113
114 line_starts
115}
116
117fn location_from_start(line_starts: &[usize], offset: usize, start: usize) -> Location {
118 let insertion = line_starts.partition_point(|&line_start| line_start <= start);
119 let line = insertion.saturating_sub(1);
120 let line_start = line_starts.get(line).copied().unwrap_or(0);
121 let column = start.saturating_sub(line_start);
122
123 Location { line, column, offset: offset + start }
124}
125
126fn mask_non_code_regions(code: &str) -> String {
127 fn push_masked_char(masked: &mut String, ch: char) {
128 for _ in 0..ch.len_utf8() {
129 masked.push(' ');
130 }
131 }
132
133 let mut masked = String::with_capacity(code.len());
134 let mut in_single_quote = false;
135 let mut in_double_quote = false;
136 let mut in_line_comment = false;
137 let mut escaped = false;
138
139 for ch in code.chars() {
140 if in_line_comment {
141 if ch == '\n' {
142 in_line_comment = false;
143 masked.push('\n');
144 } else {
145 push_masked_char(&mut masked, ch);
146 }
147 continue;
148 }
149
150 if in_single_quote {
151 if escaped {
152 escaped = false;
153 } else if ch == '\\' {
154 escaped = true;
155 } else if ch == '\'' {
156 in_single_quote = false;
157 }
158 push_masked_char(&mut masked, ch);
159 continue;
160 }
161
162 if in_double_quote {
163 if escaped {
164 escaped = false;
165 } else if ch == '\\' {
166 escaped = true;
167 } else if ch == '"' {
168 in_double_quote = false;
169 }
170 push_masked_char(&mut masked, ch);
171 continue;
172 }
173
174 match ch {
175 '#' => {
176 in_line_comment = true;
177 push_masked_char(&mut masked, ch);
178 }
179 '\'' => {
180 in_single_quote = true;
181 push_masked_char(&mut masked, ch);
182 }
183 '"' => {
184 in_double_quote = true;
185 push_masked_char(&mut masked, ch);
186 }
187 _ => masked.push(ch),
188 }
189 }
190
191 masked
192}
193
194struct FormatHeredocDetector;
196
197static FORMAT_PATTERN: LazyLock<Regex> =
199 LazyLock::new(|| match Regex::new(r"(?m)^\s*format\s+(\w+)\s*=\s*$") {
200 Ok(re) => re,
201 Err(_) => unreachable!("FORMAT_PATTERN regex failed to compile"),
202 });
203
204impl PatternDetector for FormatHeredocDetector {
205 fn detect(
206 &self,
207 code: &str,
208 offset: usize,
209 line_starts: &[usize],
210 ) -> Vec<(AntiPattern, Location)> {
211 let mut results = Vec::new();
212 let scan_code = mask_non_code_regions(code);
213
214 for cap in FORMAT_PATTERN.captures_iter(&scan_code) {
215 if let (Some(match_pos), Some(name_match)) = (cap.get(0), cap.get(1)) {
216 let format_name = name_match.as_str().to_string();
217 let location = location_from_start(line_starts, offset, match_pos.start());
218
219 let body_start = match_pos.end();
221 let body_end = code[body_start..].find("\n.").unwrap_or(code.len() - body_start);
222 let body = &scan_code[body_start..body_start + body_end];
223
224 if body.contains("<<") {
225 results.push((
226 AntiPattern::FormatHeredoc {
227 location: location.clone(),
228 format_name,
229 heredoc_delimiter: "UNKNOWN".to_string(), },
231 location,
232 ));
233 }
234 }
235 }
236
237 results
238 }
239
240 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
241 let AntiPattern::FormatHeredoc { format_name, .. } = pattern else {
242 return None;
243 };
244
245 Some(Diagnostic {
246 severity: Severity::Warning,
247 pattern: pattern.clone(),
248 message: format!("Heredoc declared inside format '{}'", format_name),
249 explanation: "Heredocs inside format declarations are often handled specially by the Perl interpreter and can be difficult to parse statically.".to_string(),
250 suggested_fix: Some("Consider moving the heredoc outside the format or using a simple string if possible.".to_string()),
251 references: vec!["perldoc perlform".to_string()],
252 })
253 }
254}
255
256struct BeginTimeHeredocDetector;
258
259static BEGIN_BLOCK_START_PATTERN: LazyLock<Regex> =
261 LazyLock::new(|| match Regex::new(r"\bBEGIN\s*\{") {
262 Ok(re) => re,
263 Err(_) => unreachable!("BEGIN_BLOCK_START_PATTERN regex failed to compile"),
264 });
265
266fn find_matching_brace(code: &str, opening_brace_idx: usize) -> Option<usize> {
267 let bytes = code.as_bytes();
268 let mut depth = 0usize;
269 let mut in_single_quote = false;
270 let mut in_double_quote = false;
271 let mut escaped = false;
272
273 for (idx, &byte) in bytes.iter().enumerate().skip(opening_brace_idx) {
274 let ch = byte as char;
275
276 if escaped {
277 escaped = false;
278 continue;
279 }
280
281 if in_single_quote {
282 if ch == '\\' {
283 escaped = true;
284 } else if ch == '\'' {
285 in_single_quote = false;
286 }
287 continue;
288 }
289
290 if in_double_quote {
291 if ch == '\\' {
292 escaped = true;
293 } else if ch == '"' {
294 in_double_quote = false;
295 }
296 continue;
297 }
298
299 match ch {
300 '\'' => in_single_quote = true,
301 '"' => in_double_quote = true,
302 '{' => depth += 1,
303 '}' => {
304 if depth == 0 {
305 return None;
306 }
307 depth -= 1;
308 if depth == 0 {
309 return Some(idx);
310 }
311 }
312 _ => {}
313 }
314 }
315
316 None
317}
318
319impl PatternDetector for BeginTimeHeredocDetector {
320 fn detect(
321 &self,
322 code: &str,
323 offset: usize,
324 line_starts: &[usize],
325 ) -> Vec<(AntiPattern, Location)> {
326 let mut results = Vec::new();
327 let scan_code = mask_non_code_regions(code);
328
329 for begin_match in BEGIN_BLOCK_START_PATTERN.find_iter(&scan_code) {
330 let Some(opening_brace_rel) = begin_match.as_str().rfind('{') else {
331 continue;
332 };
333 let opening_brace_idx = begin_match.start() + opening_brace_rel;
334 let Some(closing_brace_idx) = find_matching_brace(&scan_code, opening_brace_idx) else {
335 continue;
336 };
337 let block_content = &scan_code[opening_brace_idx + 1..closing_brace_idx];
338
339 if !block_content.contains("<<") {
340 continue;
341 }
342
343 let location = location_from_start(line_starts, offset, begin_match.start());
344
345 results.push((
346 AntiPattern::BeginTimeHeredoc {
347 location: location.clone(),
348 heredoc_content: block_content.to_string(),
349 side_effects: vec!["Phase-dependent parsing".to_string()],
350 },
351 location,
352 ));
353 }
354
355 results
356 }
357
358 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
359 if let AntiPattern::BeginTimeHeredoc { .. } = pattern {
360 Some(Diagnostic {
361 severity: Severity::Error,
362 pattern: pattern.clone(),
363 message: "Heredoc declared during BEGIN-time".to_string(),
364 explanation: "Heredocs declared inside BEGIN blocks are evaluated during the compilation phase. This can lead to complex side effects that are difficult to track statically.".to_string(),
365 suggested_fix: Some("Move the heredoc declaration out of the BEGIN block if it doesn't need to be evaluated during compilation.".to_string()),
366 references: vec!["perldoc perlmod".to_string()],
367 })
368 } else {
369 None
370 }
371 }
372}
373
374struct DynamicDelimiterDetector;
376
377static DYNAMIC_DELIMITER_PATTERN: LazyLock<Regex> =
379 LazyLock::new(|| match Regex::new(r"<<\s*\$\{[^}]+\}|<<\s*\$\w+|<<\s*`[^`]+`") {
380 Ok(re) => re,
381 Err(_) => unreachable!("DYNAMIC_DELIMITER_PATTERN regex failed to compile"),
382 });
383
384impl PatternDetector for DynamicDelimiterDetector {
385 fn detect(
386 &self,
387 code: &str,
388 offset: usize,
389 line_starts: &[usize],
390 ) -> Vec<(AntiPattern, Location)> {
391 let mut results = Vec::new();
392 let scan_code = mask_non_code_regions(code);
393
394 for cap in DYNAMIC_DELIMITER_PATTERN.captures_iter(&scan_code) {
395 if let Some(match_pos) = cap.get(0) {
396 let expression = match_pos.as_str().to_string();
397 let location = location_from_start(line_starts, offset, match_pos.start());
398
399 results.push((
400 AntiPattern::DynamicHeredocDelimiter { location: location.clone(), expression },
401 location,
402 ));
403 }
404 }
405
406 results
407 }
408
409 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
410 let AntiPattern::DynamicHeredocDelimiter { expression, .. } = pattern else {
411 return None;
412 };
413
414 Some(Diagnostic {
415 severity: Severity::Warning,
416 pattern: pattern.clone(),
417 message: format!("Dynamic heredoc delimiter: {}", expression),
418 explanation: "Using variables or expressions as heredoc delimiters makes it impossible to know the terminator without executing the code.".to_string(),
419 suggested_fix: Some("Use a literal string as the heredoc terminator.".to_string()),
420 references: vec!["perldoc perlop".to_string()],
421 })
422 }
423}
424
425struct SourceFilterDetector;
427
428static SOURCE_FILTER_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
430 match Regex::new(r"use\s+Filter::(Simple|Util::Call|cpp|exec|sh|decrypt|tee)") {
431 Ok(re) => re,
432 Err(_) => unreachable!("SOURCE_FILTER_PATTERN regex failed to compile"),
433 }
434});
435
436impl PatternDetector for SourceFilterDetector {
437 fn detect(
438 &self,
439 code: &str,
440 offset: usize,
441 line_starts: &[usize],
442 ) -> Vec<(AntiPattern, Location)> {
443 let mut results = Vec::new();
444 let scan_code = mask_non_code_regions(code);
445
446 for cap in SOURCE_FILTER_PATTERN.captures_iter(&scan_code) {
447 if let (Some(match_pos), Some(module_match)) = (cap.get(0), cap.get(1)) {
448 let filter_module = module_match.as_str().to_string();
449 let location = location_from_start(line_starts, offset, match_pos.start());
450
451 results.push((
452 AntiPattern::SourceFilterHeredoc {
453 location: location.clone(),
454 module: filter_module,
455 },
456 location,
457 ));
458 }
459 }
460
461 results
462 }
463
464 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
465 let AntiPattern::SourceFilterHeredoc { module, .. } = pattern else {
466 return None;
467 };
468
469 Some(Diagnostic {
470 severity: Severity::Error,
471 pattern: pattern.clone(),
472 message: format!("Source filter detected: Filter::{}", module),
473 explanation: "Source filters rewrite the source code before it's parsed. Static analysis cannot reliably predict the state of the code after filtering.".to_string(),
474 suggested_fix: Some("Avoid using source filters. They are considered problematic and often replaced by better alternatives like Devel::Declare or modern Perl features.".to_string()),
475 references: vec!["perldoc Filter::Simple".to_string()],
476 })
477 }
478}
479
480struct RegexHeredocDetector;
482
483static REGEX_HEREDOC_PATTERN: LazyLock<Regex> =
485 LazyLock::new(|| match Regex::new(r"\(\?\{[^}]*<<[^}]*\}") {
486 Ok(re) => re,
487 Err(_) => unreachable!("REGEX_HEREDOC_PATTERN regex failed to compile"),
488 });
489
490impl PatternDetector for RegexHeredocDetector {
491 fn detect(
492 &self,
493 code: &str,
494 offset: usize,
495 line_starts: &[usize],
496 ) -> Vec<(AntiPattern, Location)> {
497 let mut results = Vec::new();
498 let scan_code = mask_non_code_regions(code);
499
500 for cap in REGEX_HEREDOC_PATTERN.captures_iter(&scan_code) {
501 if let Some(match_pos) = cap.get(0) {
502 let location = location_from_start(line_starts, offset, match_pos.start());
503
504 results.push((
505 AntiPattern::RegexCodeBlockHeredoc { location: location.clone() },
506 location,
507 ));
508 }
509 }
510
511 results
512 }
513
514 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
515 if let AntiPattern::RegexCodeBlockHeredoc { .. } = pattern {
516 Some(Diagnostic {
517 severity: Severity::Warning,
518 pattern: pattern.clone(),
519 message: "Heredoc inside regex code block".to_string(),
520 explanation: "Declaring heredocs inside (?{ ... }) or (??{ ... }) blocks is extremely rare and difficult to parse correctly.".to_string(),
521 suggested_fix: None,
522 references: vec!["perldoc perlre".to_string()],
523 })
524 } else {
525 None
526 }
527 }
528}
529
530struct EvalHeredocDetector;
532
533static EVAL_HEREDOC_PATTERN: LazyLock<Regex> =
535 LazyLock::new(|| match Regex::new(r#"eval\s+(?:'[^']*<<[^']*'|"[^"]*<<[^"]*")"#) {
536 Ok(re) => re,
537 Err(_) => unreachable!("EVAL_HEREDOC_PATTERN regex failed to compile"),
538 });
539
540impl PatternDetector for EvalHeredocDetector {
541 fn detect(
542 &self,
543 code: &str,
544 offset: usize,
545 line_starts: &[usize],
546 ) -> Vec<(AntiPattern, Location)> {
547 let mut results = Vec::new();
548
549 for cap in EVAL_HEREDOC_PATTERN.captures_iter(code) {
550 if let Some(match_pos) = cap.get(0) {
551 let location = location_from_start(line_starts, offset, match_pos.start());
552
553 results.push((
554 AntiPattern::EvalStringHeredoc { location: location.clone() },
555 location,
556 ));
557 }
558 }
559
560 results
561 }
562
563 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
564 if let AntiPattern::EvalStringHeredoc { .. } = pattern {
565 Some(Diagnostic {
566 severity: Severity::Warning,
567 pattern: pattern.clone(),
568 message: "Heredoc inside eval string".to_string(),
569 explanation: "Heredocs declared inside strings passed to eval require double parsing and can hide malicious or complex code.".to_string(),
570 suggested_fix: Some("Consider using a block eval or moving the heredoc outside the eval string.".to_string()),
571 references: vec!["perldoc -f eval".to_string()],
572 })
573 } else {
574 None
575 }
576 }
577}
578
579struct TiedHandleDetector;
581
582static TIE_PATTERN: LazyLock<Regex> = LazyLock::new(|| match Regex::new(r"tie\s+([*$]\w+)") {
584 Ok(re) => re,
585 Err(_) => unreachable!("TIE_PATTERN regex failed to compile"),
586});
587
588static PRINT_HEREDOC_PATTERN: LazyLock<Regex> =
590 LazyLock::new(|| match Regex::new(r"print\s+([*$]?\w+)\s+<<") {
591 Ok(re) => re,
592 Err(_) => unreachable!("PRINT_HEREDOC_PATTERN regex failed to compile"),
593 });
594
595impl PatternDetector for TiedHandleDetector {
596 fn detect(
597 &self,
598 code: &str,
599 offset: usize,
600 line_starts: &[usize],
601 ) -> Vec<(AntiPattern, Location)> {
602 let mut results = Vec::new();
603 let scan_code = mask_non_code_regions(code);
604
605 let mut tied_handles = HashSet::new();
608 for cap in TIE_PATTERN.captures_iter(&scan_code) {
609 if let Some(handle_match) = cap.get(1) {
610 let raw_handle = handle_match.as_str();
611 let normalized = raw_handle.strip_prefix('*').unwrap_or(raw_handle);
612 tied_handles.insert(normalized.to_string());
613 }
614 }
615
616 for cap in PRINT_HEREDOC_PATTERN.captures_iter(&scan_code) {
620 let (Some(match_pos), Some(handle_match)) = (cap.get(0), cap.get(1)) else {
621 continue;
622 };
623
624 let raw_print_handle = handle_match.as_str();
625 let normalized_print_handle =
626 raw_print_handle.strip_prefix('*').unwrap_or(raw_print_handle);
627
628 if tied_handles.contains(normalized_print_handle) {
629 let location = location_from_start(line_starts, offset, match_pos.start());
630 results.push((
631 AntiPattern::TiedHandleHeredoc {
632 location: location.clone(),
633 handle_name: normalized_print_handle.to_string(),
634 },
635 location,
636 ));
637 }
638 }
639
640 results
641 }
642
643 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
644 let AntiPattern::TiedHandleHeredoc { handle_name, .. } = pattern else {
645 return None;
646 };
647
648 Some(Diagnostic {
649 severity: Severity::Info,
650 pattern: pattern.clone(),
651 message: format!("Heredoc written to tied handle '{}'", handle_name),
652 explanation: "Writing to a tied handle invokes custom code. The behavior of heredoc output depends on the tied class implementation.".to_string(),
653 suggested_fix: None,
654 references: vec!["perldoc -f tie".to_string()],
655 })
656 }
657}
658
659impl Default for AntiPatternDetector {
660 fn default() -> Self {
661 Self::new()
662 }
663}
664
665impl AntiPatternDetector {
666 pub fn new() -> Self {
668 Self {
669 patterns: vec![
670 Box::new(FormatHeredocDetector),
671 Box::new(BeginTimeHeredocDetector),
672 Box::new(DynamicDelimiterDetector),
673 Box::new(SourceFilterDetector),
674 Box::new(RegexHeredocDetector),
675 Box::new(EvalHeredocDetector),
676 Box::new(TiedHandleDetector),
677 ],
678 }
679 }
680
681 pub fn detect_all(&self, code: &str) -> Vec<Diagnostic> {
683 let mut diagnostics = Vec::new();
684 let line_starts = build_line_starts(code);
685
686 for detector in &self.patterns {
687 let patterns = detector.detect(code, 0, &line_starts);
688 for (pattern, _) in patterns {
689 if let Some(diagnostic) = detector.diagnose(&pattern) {
690 diagnostics.push(diagnostic);
691 }
692 }
693 }
694
695 diagnostics.sort_by_key(|d| match &d.pattern {
696 AntiPattern::FormatHeredoc { location, .. }
697 | AntiPattern::BeginTimeHeredoc { location, .. }
698 | AntiPattern::DynamicHeredocDelimiter { location, .. }
699 | AntiPattern::SourceFilterHeredoc { location, .. }
700 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
701 | AntiPattern::EvalStringHeredoc { location, .. }
702 | AntiPattern::TiedHandleHeredoc { location, .. } => location.offset,
703 });
704
705 diagnostics
706 }
707
708 pub fn format_report(&self, diagnostics: &[Diagnostic]) -> String {
713 let mut report = String::from("Anti-Pattern Analysis Report\n");
714 report.push_str("============================\n\n");
715
716 if diagnostics.is_empty() {
717 report.push_str("No problematic patterns detected.\n");
718 return report;
719 }
720
721 report.push_str(&format!("Found {} problematic patterns:\n\n", diagnostics.len()));
722
723 for (i, diag) in diagnostics.iter().enumerate() {
724 report.push_str(&format!(
725 "{}. {} ({})\n",
726 i + 1,
727 diag.message,
728 match diag.severity {
729 Severity::Error => "ERROR",
730 Severity::Warning => "WARNING",
731 Severity::Info => "INFO",
732 }
733 ));
734
735 report.push_str(&format!(
736 " Location: {}\n",
737 match &diag.pattern {
738 AntiPattern::FormatHeredoc { location, .. }
739 | AntiPattern::BeginTimeHeredoc { location, .. }
740 | AntiPattern::DynamicHeredocDelimiter { location, .. }
741 | AntiPattern::SourceFilterHeredoc { location, .. }
742 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
743 | AntiPattern::EvalStringHeredoc { location, .. }
744 | AntiPattern::TiedHandleHeredoc { location, .. } =>
745 format!("line {}, column {}", location.line, location.column),
746 }
747 ));
748
749 report.push_str(&format!(" Explanation: {}\n", diag.explanation));
750
751 if let Some(fix) = &diag.suggested_fix {
752 report.push_str(&format!(
753 " Suggested fix:\n {}\n",
754 fix.lines().collect::<Vec<_>>().join("\n ")
755 ));
756 }
757
758 if !diag.references.is_empty() {
759 report.push_str(&format!(" References: {}\n", diag.references.join(", ")));
760 }
761
762 report.push('\n');
763 }
764
765 report
766 }
767}
768
769#[cfg(test)]
770mod tests {
771 use super::*;
772
773 #[test]
774 fn test_format_heredoc_detection() {
775 let detector = AntiPatternDetector::new();
776 let code = r#"
777format REPORT =
778<<'END'
779Name: @<<<<<<<<<<<<
780$name
781END
782.
783"#;
784
785 let diagnostics = detector.detect_all(code);
786 assert!(!diagnostics.is_empty());
790 assert!(matches!(diagnostics[0].pattern, AntiPattern::FormatHeredoc { .. }));
791 }
792
793 #[test]
794 fn test_begin_heredoc_detection() {
795 let detector = AntiPatternDetector::new();
796 let code = r###"
797BEGIN {
798 $config = <<'END';
799 server = localhost
800END
801}
802"###;
803
804 let diagnostics = detector.detect_all(code);
805 assert_eq!(diagnostics.len(), 1);
806 assert!(matches!(diagnostics[0].pattern, AntiPattern::BeginTimeHeredoc { .. }));
807 }
808
809 #[test]
810 fn test_begin_heredoc_detection_with_nested_braces() {
811 let detector = AntiPatternDetector::new();
812 let code = r###"
813BEGIN {
814 if ($ENV{DEV}) {
815 $config = <<'END';
816 server = localhost
817END
818 }
819}
820"###;
821
822 let diagnostics = detector.detect_all(code);
823 let begin_count = diagnostics
824 .iter()
825 .filter(|diag| matches!(diag.pattern, AntiPattern::BeginTimeHeredoc { .. }))
826 .count();
827 assert_eq!(begin_count, 1);
828 }
829
830 #[test]
831 fn test_dynamic_delimiter_detection() {
832 let detector = AntiPatternDetector::new();
833 let code = r###"
834my $delimiter = "EOF";
835my $content = <<$delimiter;
836This is dynamic
837EOF
838"###;
839
840 let diagnostics = detector.detect_all(code);
841 assert_eq!(diagnostics.len(), 1);
842 assert!(matches!(diagnostics[0].pattern, AntiPattern::DynamicHeredocDelimiter { .. }));
843 }
844
845 #[test]
846 fn test_source_filter_detection() {
847 let detector = AntiPatternDetector::new();
848 let code = r###"
849use Filter::Simple;
850print <<EOF;
851Filtered content
852EOF
853"###;
854 let diagnostics = detector.detect_all(code);
855 assert_eq!(diagnostics.len(), 1);
856 assert!(matches!(diagnostics[0].pattern, AntiPattern::SourceFilterHeredoc { .. }));
857 }
858
859 #[test]
860 fn test_regex_heredoc_detection() {
861 let detector = AntiPatternDetector::new();
862 let code = r###"
863m/pattern(?{
864 print <<'MATCH';
865 Match text
866MATCH
867})/
868"###;
869 let diagnostics = detector.detect_all(code);
870 assert_eq!(diagnostics.len(), 1);
871 assert!(matches!(diagnostics[0].pattern, AntiPattern::RegexCodeBlockHeredoc { .. }));
872 }
873
874 #[test]
875 fn test_eval_heredoc_detection() {
876 let detector = AntiPatternDetector::new();
877 let code = r###"
878eval 'print <<"EVAL";
879Eval content
880EVAL';
881"###;
882 let diagnostics = detector.detect_all(code);
883 assert_eq!(diagnostics.len(), 1);
884 assert!(matches!(diagnostics[0].pattern, AntiPattern::EvalStringHeredoc { .. }));
885 }
886
887 #[test]
888 fn test_tied_handle_detection() {
889 let detector = AntiPatternDetector::new();
890 let code = r###"
891tie *FH, 'Tie::Handle';
892print FH <<'DATA';
893Tied output
894DATA
895"###;
896 let diagnostics = detector.detect_all(code);
897 assert_eq!(diagnostics.len(), 1);
898 assert!(matches!(diagnostics[0].pattern, AntiPattern::TiedHandleHeredoc { .. }));
899 }
900
901 #[test]
902 fn test_tied_scalar_handle_detection() {
903 let detector = AntiPatternDetector::new();
904 let code = r###"
905tie $fh, 'Tie::Handle';
906print $fh <<'DATA';
907Tied output
908DATA
909"###;
910 let diagnostics = detector.detect_all(code);
911 assert_eq!(diagnostics.len(), 1);
912 assert!(matches!(diagnostics[0].pattern, AntiPattern::TiedHandleHeredoc { .. }));
913 }
914
915 #[test]
916 fn test_tied_handle_reports_multiple_writes() {
917 let detector = AntiPatternDetector::new();
918 let code = r###"
919tie *FH, 'Tie::Handle';
920print FH <<'FIRST';
921One
922FIRST
923print FH <<'SECOND';
924Two
925SECOND
926"###;
927
928 let diagnostics = detector.detect_all(code);
929 let tied_handle_count = diagnostics
930 .iter()
931 .filter(|diag| matches!(diag.pattern, AntiPattern::TiedHandleHeredoc { .. }))
932 .count();
933 assert_eq!(tied_handle_count, 2);
934 }
935
936 #[test]
937 fn test_tied_handle_does_not_report_other_handles() {
938 let detector = AntiPatternDetector::new();
941 let code = r###"
942tie *FH, 'Tie::Handle';
943print OTHER <<'DATA';
944Not tied
945DATA
946"###;
947
948 let diagnostics = detector.detect_all(code);
949 let tied_handle_count = diagnostics
950 .iter()
951 .filter(|diag| matches!(diag.pattern, AntiPattern::TiedHandleHeredoc { .. }))
952 .count();
953 assert_eq!(tied_handle_count, 0);
954 }
955
956 #[test]
957 fn test_location_column_is_zero_based_for_new_line_matches() {
958 let detector = AntiPatternDetector::new();
959 let code = "my $x = 1;\nuse Filter::Simple;\n";
960
961 let diagnostics = detector.detect_all(code);
962 assert_eq!(diagnostics.len(), 1);
963
964 assert!(
965 matches!(diagnostics[0].pattern, AntiPattern::SourceFilterHeredoc { .. }),
966 "expected SourceFilterHeredoc pattern, got: {:?}",
967 diagnostics[0].pattern
968 );
969 let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
970 return;
971 };
972
973 assert_eq!(location.line, 1);
974 assert_eq!(location.column, 0);
975 assert_eq!(location.offset, 11);
976 }
977
978 #[test]
979 fn test_location_first_byte_is_line_zero_column_zero() {
980 let detector = AntiPatternDetector::new();
982 let code = "use Filter::Simple;\n";
983
984 let diagnostics = detector.detect_all(code);
985 assert_eq!(diagnostics.len(), 1);
986 let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
987 unreachable!("expected SourceFilterHeredoc");
988 };
989 assert_eq!(location.line, 0, "first-byte match must be on line 0");
990 assert_eq!(location.column, 0, "first-byte match must be at column 0");
991 assert_eq!(location.offset, 0);
992 }
993
994 #[test]
995 fn test_location_third_line_accurate() {
996 let detector = AntiPatternDetector::new();
998 let code = "my $a = 1;\nmy $b = 2;\nuse Filter::Simple;\n";
1002
1003 let diagnostics = detector.detect_all(code);
1004 assert_eq!(diagnostics.len(), 1);
1005 let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
1006 unreachable!("expected SourceFilterHeredoc");
1007 };
1008 assert_eq!(location.line, 2, "match on third line must report line 2");
1009 assert_eq!(location.column, 0, "match at start of line must report column 0");
1010 assert_eq!(location.offset, 22, "byte offset of third-line start");
1011 }
1012
1013 #[test]
1014 fn test_location_mid_line_column_nonzero() {
1015 let detector = AntiPatternDetector::new();
1019 let code = "# comment\n use Filter::Simple;\n";
1020
1021 let diagnostics = detector.detect_all(code);
1022 assert_eq!(diagnostics.len(), 1);
1024 let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
1025 unreachable!("expected SourceFilterHeredoc");
1026 };
1027 assert_eq!(location.line, 1);
1028 assert_eq!(location.column, 4, "mid-line match must report correct column");
1029 assert_eq!(location.offset, 14, "byte offset = 10 (first line) + 4 spaces");
1030 }
1031
1032 #[test]
1033 fn test_source_filter_detection_ignores_comments_and_strings() {
1034 let detector = AntiPatternDetector::new();
1035 let code = r#"
1036# use Filter::Simple;
1037my $s = "use Filter::Simple";
1038"#;
1039
1040 let diagnostics = detector.detect_all(code);
1041 assert!(diagnostics.is_empty());
1042 }
1043
1044 #[test]
1045 fn test_begin_detection_ignores_comments_and_strings() {
1046 let detector = AntiPatternDetector::new();
1047 let code = r#"
1048# BEGIN { my $x = <<'END'; END }
1049my $s = "BEGIN { my $x = <<'END'; END }";
1050"#;
1051
1052 let diagnostics = detector.detect_all(code);
1053 assert!(diagnostics.is_empty());
1054 }
1055
1056 #[test]
1057 fn test_format_detection_handles_utf8_in_masked_regions() {
1058 let detector = AntiPatternDetector::new();
1059 let code = r#"# comment with emoji 😀
1060format REPORT =
1061<<'END'
1062Body
1063END
1064.
1065"#;
1066
1067 let diagnostics = detector.detect_all(code);
1068 assert!(
1069 diagnostics
1070 .iter()
1071 .any(|diag| matches!(diag.pattern, AntiPattern::FormatHeredoc { .. }))
1072 );
1073 }
1074}