1use regex::Regex;
12use std::sync::LazyLock;
13
14#[derive(Debug, Clone, PartialEq)]
15pub struct Location {
16 pub line: usize,
17 pub column: usize,
18 pub offset: usize,
19}
20
21#[derive(Debug, Clone, PartialEq)]
22pub enum Severity {
23 Error, Warning, Info, }
27
28#[derive(Debug, Clone, PartialEq)]
29pub enum AntiPattern {
30 FormatHeredoc { location: Location, format_name: String, heredoc_delimiter: String },
31 BeginTimeHeredoc { location: Location, heredoc_content: String, side_effects: Vec<String> },
32 DynamicHeredocDelimiter { location: Location, expression: String },
33 SourceFilterHeredoc { location: Location, module: String },
34 RegexCodeBlockHeredoc { location: Location },
35 EvalStringHeredoc { location: Location },
36 TiedHandleHeredoc { location: Location, handle_name: String },
37}
38
39#[derive(Debug, Clone, PartialEq)]
40pub struct Diagnostic {
41 pub severity: Severity,
42 pub pattern: AntiPattern,
43 pub message: String,
44 pub explanation: String,
45 pub suggested_fix: Option<String>,
46 pub references: Vec<String>,
47}
48
49pub struct AntiPatternDetector {
50 patterns: Vec<Box<dyn PatternDetector>>,
51}
52
53trait PatternDetector: Send + Sync {
54 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)>;
55 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic>;
56}
57
58struct FormatHeredocDetector;
60
61static FORMAT_PATTERN: LazyLock<Regex> =
63 LazyLock::new(|| match Regex::new(r"(?m)^\s*format\s+(\w+)\s*=\s*$") {
64 Ok(re) => re,
65 Err(_) => unreachable!("FORMAT_PATTERN regex failed to compile"),
66 });
67
68impl PatternDetector for FormatHeredocDetector {
69 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
70 let mut results = Vec::new();
71
72 for cap in FORMAT_PATTERN.captures_iter(code) {
73 if let (Some(match_pos), Some(name_match)) = (cap.get(0), cap.get(1)) {
74 let format_name = name_match.as_str().to_string();
75 let location = Location {
76 line: code[..match_pos.start()].lines().count(),
77 column: match_pos.start() - code[..match_pos.start()].rfind('\n').unwrap_or(0),
78 offset: offset + match_pos.start(),
79 };
80
81 let body_start = match_pos.end();
83 let body_end = code[body_start..].find("\n.").unwrap_or(code.len() - body_start);
84 let body = &code[body_start..body_start + body_end];
85
86 if body.contains("<<") {
87 results.push((
88 AntiPattern::FormatHeredoc {
89 location: location.clone(),
90 format_name,
91 heredoc_delimiter: "UNKNOWN".to_string(), },
93 location,
94 ));
95 }
96 }
97 }
98
99 results
100 }
101
102 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
103 let AntiPattern::FormatHeredoc { format_name, .. } = pattern else {
104 return None;
105 };
106
107 Some(Diagnostic {
108 severity: Severity::Warning,
109 pattern: pattern.clone(),
110 message: format!("Heredoc declared inside format '{}'", format_name),
111 explanation: "Heredocs inside format declarations are often handled specially by the Perl interpreter and can be difficult to parse statically.".to_string(),
112 suggested_fix: Some("Consider moving the heredoc outside the format or using a simple string if possible.".to_string()),
113 references: vec!["perldoc perlform".to_string()],
114 })
115 }
116}
117
118struct BeginTimeHeredocDetector;
120
121static BEGIN_BLOCK_PATTERN: LazyLock<Regex> =
123 LazyLock::new(|| match Regex::new(r"(?s)\bBEGIN\s*\{([^}]*<<[^}]*)\}") {
124 Ok(re) => re,
125 Err(_) => unreachable!("BEGIN_BLOCK_PATTERN regex failed to compile"),
126 });
127
128impl PatternDetector for BeginTimeHeredocDetector {
129 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
130 let mut results = Vec::new();
131
132 for cap in BEGIN_BLOCK_PATTERN.captures_iter(code) {
133 if let (Some(match_pos), Some(content_match)) = (cap.get(0), cap.get(1)) {
134 let block_content = content_match.as_str();
135 let location = Location {
136 line: code[..match_pos.start()].lines().count(),
137 column: match_pos.start() - code[..match_pos.start()].rfind('\n').unwrap_or(0),
138 offset: offset + match_pos.start(),
139 };
140
141 results.push((
142 AntiPattern::BeginTimeHeredoc {
143 location: location.clone(),
144 heredoc_content: block_content.to_string(),
145 side_effects: vec!["Phase-dependent parsing".to_string()],
146 },
147 location,
148 ));
149 }
150 }
151
152 results
153 }
154
155 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
156 if let AntiPattern::BeginTimeHeredoc { .. } = pattern {
157 Some(Diagnostic {
158 severity: Severity::Error,
159 pattern: pattern.clone(),
160 message: "Heredoc declared during BEGIN-time".to_string(),
161 explanation: "Heredocs declared inside BEGIN blocks are evaluated during the compilation phase. This can lead to complex side effects that are difficult to track statically.".to_string(),
162 suggested_fix: Some("Move the heredoc declaration out of the BEGIN block if it doesn't need to be evaluated during compilation.".to_string()),
163 references: vec!["perldoc perlmod".to_string()],
164 })
165 } else {
166 None
167 }
168 }
169}
170
171struct DynamicDelimiterDetector;
173
174static DYNAMIC_DELIMITER_PATTERN: LazyLock<Regex> =
176 LazyLock::new(|| match Regex::new(r"<<\s*\$\{[^}]+\}|<<\s*\$\w+|<<\s*`[^`]+`") {
177 Ok(re) => re,
178 Err(_) => unreachable!("DYNAMIC_DELIMITER_PATTERN regex failed to compile"),
179 });
180
181impl PatternDetector for DynamicDelimiterDetector {
182 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
183 let mut results = Vec::new();
184
185 for cap in DYNAMIC_DELIMITER_PATTERN.captures_iter(code) {
186 if let Some(match_pos) = cap.get(0) {
187 let expression = match_pos.as_str().to_string();
188 let location = Location {
189 line: code[..match_pos.start()].lines().count(),
190 column: match_pos.start() - code[..match_pos.start()].rfind('\n').unwrap_or(0),
191 offset: offset + match_pos.start(),
192 };
193
194 results.push((
195 AntiPattern::DynamicHeredocDelimiter { location: location.clone(), expression },
196 location,
197 ));
198 }
199 }
200
201 results
202 }
203
204 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
205 let AntiPattern::DynamicHeredocDelimiter { expression, .. } = pattern else {
206 return None;
207 };
208
209 Some(Diagnostic {
210 severity: Severity::Warning,
211 pattern: pattern.clone(),
212 message: format!("Dynamic heredoc delimiter: {}", expression),
213 explanation: "Using variables or expressions as heredoc delimiters makes it impossible to know the terminator without executing the code.".to_string(),
214 suggested_fix: Some("Use a literal string as the heredoc terminator.".to_string()),
215 references: vec!["perldoc perlop".to_string()],
216 })
217 }
218}
219
220struct SourceFilterDetector;
222
223static SOURCE_FILTER_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
225 match Regex::new(r"use\s+Filter::(Simple|Util::Call|cpp|exec|sh|decrypt|tee)") {
226 Ok(re) => re,
227 Err(_) => unreachable!("SOURCE_FILTER_PATTERN regex failed to compile"),
228 }
229});
230
231impl PatternDetector for SourceFilterDetector {
232 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
233 let mut results = Vec::new();
234
235 for cap in SOURCE_FILTER_PATTERN.captures_iter(code) {
236 if let (Some(match_pos), Some(module_match)) = (cap.get(0), cap.get(1)) {
237 let filter_module = module_match.as_str().to_string();
238 let location = Location {
239 line: code[..match_pos.start()].lines().count(),
240 column: match_pos.start() - code[..match_pos.start()].rfind('\n').unwrap_or(0),
241 offset: offset + match_pos.start(),
242 };
243
244 results.push((
245 AntiPattern::SourceFilterHeredoc {
246 location: location.clone(),
247 module: filter_module,
248 },
249 location,
250 ));
251 }
252 }
253
254 results
255 }
256
257 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
258 let AntiPattern::SourceFilterHeredoc { module, .. } = pattern else {
259 return None;
260 };
261
262 Some(Diagnostic {
263 severity: Severity::Error,
264 pattern: pattern.clone(),
265 message: format!("Source filter detected: Filter::{}", module),
266 explanation: "Source filters rewrite the source code before it's parsed. Static analysis cannot reliably predict the state of the code after filtering.".to_string(),
267 suggested_fix: Some("Avoid using source filters. They are considered problematic and often replaced by better alternatives like Devel::Declare or modern Perl features.".to_string()),
268 references: vec!["perldoc Filter::Simple".to_string()],
269 })
270 }
271}
272
273struct RegexHeredocDetector;
275
276static REGEX_HEREDOC_PATTERN: LazyLock<Regex> =
278 LazyLock::new(|| match Regex::new(r"\(\?\{[^}]*<<[^}]*\}") {
279 Ok(re) => re,
280 Err(_) => unreachable!("REGEX_HEREDOC_PATTERN regex failed to compile"),
281 });
282
283impl PatternDetector for RegexHeredocDetector {
284 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
285 let mut results = Vec::new();
286
287 for cap in REGEX_HEREDOC_PATTERN.captures_iter(code) {
288 if let Some(match_pos) = cap.get(0) {
289 let location = Location {
290 line: code[..match_pos.start()].lines().count(),
291 column: match_pos.start() - code[..match_pos.start()].rfind('\n').unwrap_or(0),
292 offset: offset + match_pos.start(),
293 };
294
295 results.push((
296 AntiPattern::RegexCodeBlockHeredoc { location: location.clone() },
297 location,
298 ));
299 }
300 }
301
302 results
303 }
304
305 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
306 if let AntiPattern::RegexCodeBlockHeredoc { .. } = pattern {
307 Some(Diagnostic {
308 severity: Severity::Warning,
309 pattern: pattern.clone(),
310 message: "Heredoc inside regex code block".to_string(),
311 explanation: "Declaring heredocs inside (?{ ... }) or (??{ ... }) blocks is extremely rare and difficult to parse correctly.".to_string(),
312 suggested_fix: None,
313 references: vec!["perldoc perlre".to_string()],
314 })
315 } else {
316 None
317 }
318 }
319}
320
321struct EvalHeredocDetector;
323
324static EVAL_HEREDOC_PATTERN: LazyLock<Regex> =
326 LazyLock::new(|| match Regex::new(r#"eval\s+(?:'[^']*<<[^']*'|"[^"]*<<[^"]*")"#) {
327 Ok(re) => re,
328 Err(_) => unreachable!("EVAL_HEREDOC_PATTERN regex failed to compile"),
329 });
330
331impl PatternDetector for EvalHeredocDetector {
332 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
333 let mut results = Vec::new();
334
335 for cap in EVAL_HEREDOC_PATTERN.captures_iter(code) {
336 if let Some(match_pos) = cap.get(0) {
337 let location = Location {
338 line: code[..match_pos.start()].lines().count(),
339 column: match_pos.start() - code[..match_pos.start()].rfind('\n').unwrap_or(0),
340 offset: offset + match_pos.start(),
341 };
342
343 results.push((
344 AntiPattern::EvalStringHeredoc { location: location.clone() },
345 location,
346 ));
347 }
348 }
349
350 results
351 }
352
353 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
354 if let AntiPattern::EvalStringHeredoc { .. } = pattern {
355 Some(Diagnostic {
356 severity: Severity::Warning,
357 pattern: pattern.clone(),
358 message: "Heredoc inside eval string".to_string(),
359 explanation: "Heredocs declared inside strings passed to eval require double parsing and can hide malicious or complex code.".to_string(),
360 suggested_fix: Some("Consider using a block eval or moving the heredoc outside the eval string.".to_string()),
361 references: vec!["perldoc -f eval".to_string()],
362 })
363 } else {
364 None
365 }
366 }
367}
368
369struct TiedHandleDetector;
371
372static TIE_PATTERN: LazyLock<Regex> = LazyLock::new(|| match Regex::new(r"tie\s+([*$]\w+)") {
374 Ok(re) => re,
375 Err(_) => unreachable!("TIE_PATTERN regex failed to compile"),
376});
377
378impl PatternDetector for TiedHandleDetector {
379 fn detect(&self, code: &str, offset: usize) -> Vec<(AntiPattern, Location)> {
380 let mut results = Vec::new();
381
382 let mut tied_handles = Vec::new();
384 for cap in TIE_PATTERN.captures_iter(code) {
385 if let Some(handle_match) = cap.get(1) {
386 tied_handles.push(handle_match.as_str());
387 }
388 }
389
390 for raw_handle in tied_handles {
391 let handle_to_search = raw_handle.strip_prefix('*').unwrap_or(raw_handle);
394
395 let usage_pattern = format!(r"print\s+{}\s+<<", regex::escape(handle_to_search));
397 if let Ok(re) = Regex::new(&usage_pattern)
398 && let Some(usage_match) = re.find(code)
399 {
400 let location = Location {
401 line: code[..usage_match.start()].lines().count(),
402 column: usage_match.start()
403 - code[..usage_match.start()].rfind('\n').unwrap_or(0),
404 offset: offset + usage_match.start(),
405 };
406
407 results.push((
408 AntiPattern::TiedHandleHeredoc {
409 location: location.clone(),
410 handle_name: handle_to_search.to_string(),
411 },
412 location,
413 ));
414 }
415 }
416
417 results
418 }
419
420 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
421 let AntiPattern::TiedHandleHeredoc { handle_name, .. } = pattern else {
422 return None;
423 };
424
425 Some(Diagnostic {
426 severity: Severity::Info,
427 pattern: pattern.clone(),
428 message: format!("Heredoc written to tied handle '{}'", handle_name),
429 explanation: "Writing to a tied handle invokes custom code. The behavior of heredoc output depends on the tied class implementation.".to_string(),
430 suggested_fix: None,
431 references: vec!["perldoc -f tie".to_string()],
432 })
433 }
434}
435
436impl Default for AntiPatternDetector {
437 fn default() -> Self {
438 Self::new()
439 }
440}
441
442impl AntiPatternDetector {
443 pub fn new() -> Self {
444 Self {
445 patterns: vec![
446 Box::new(FormatHeredocDetector),
447 Box::new(BeginTimeHeredocDetector),
448 Box::new(DynamicDelimiterDetector),
449 Box::new(SourceFilterDetector),
450 Box::new(RegexHeredocDetector),
451 Box::new(EvalHeredocDetector),
452 Box::new(TiedHandleDetector),
453 ],
454 }
455 }
456
457 pub fn detect_all(&self, code: &str) -> Vec<Diagnostic> {
458 let mut diagnostics = Vec::new();
459
460 for detector in &self.patterns {
461 let patterns = detector.detect(code, 0);
462 for (pattern, _) in patterns {
463 if let Some(diagnostic) = detector.diagnose(&pattern) {
464 diagnostics.push(diagnostic);
465 }
466 }
467 }
468
469 diagnostics.sort_by_key(|d| match &d.pattern {
470 AntiPattern::FormatHeredoc { location, .. }
471 | AntiPattern::BeginTimeHeredoc { location, .. }
472 | AntiPattern::DynamicHeredocDelimiter { location, .. }
473 | AntiPattern::SourceFilterHeredoc { location, .. }
474 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
475 | AntiPattern::EvalStringHeredoc { location, .. }
476 | AntiPattern::TiedHandleHeredoc { location, .. } => location.offset,
477 });
478
479 diagnostics
480 }
481
482 pub fn format_report(&self, diagnostics: &[Diagnostic]) -> String {
483 let mut report = String::from("Anti-Pattern Analysis Report\n");
484 report.push_str("============================\n\n");
485
486 if diagnostics.is_empty() {
487 report.push_str("No problematic patterns detected.\n");
488 return report;
489 }
490
491 report.push_str(&format!("Found {} problematic patterns:\n\n", diagnostics.len()));
492
493 for (i, diag) in diagnostics.iter().enumerate() {
494 report.push_str(&format!(
495 "{}. {} ({})\n",
496 i + 1,
497 diag.message,
498 match diag.severity {
499 Severity::Error => "ERROR",
500 Severity::Warning => "WARNING",
501 Severity::Info => "INFO",
502 }
503 ));
504
505 report.push_str(&format!(
506 " Location: {}\n",
507 match &diag.pattern {
508 AntiPattern::FormatHeredoc { location, .. }
509 | AntiPattern::BeginTimeHeredoc { location, .. }
510 | AntiPattern::DynamicHeredocDelimiter { location, .. }
511 | AntiPattern::SourceFilterHeredoc { location, .. }
512 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
513 | AntiPattern::EvalStringHeredoc { location, .. }
514 | AntiPattern::TiedHandleHeredoc { location, .. } =>
515 format!("line {}, column {}", location.line, location.column),
516 }
517 ));
518
519 report.push_str(&format!(" Explanation: {}\n", diag.explanation));
520
521 if let Some(fix) = &diag.suggested_fix {
522 report.push_str(&format!(
523 " Suggested fix:\n {}\n",
524 fix.lines().collect::<Vec<_>>().join("\n ")
525 ));
526 }
527
528 if !diag.references.is_empty() {
529 report.push_str(&format!(" References: {}\n", diag.references.join(", ")));
530 }
531
532 report.push('\n');
533 }
534
535 report
536 }
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542
543 #[test]
544 fn test_format_heredoc_detection() {
545 let detector = AntiPatternDetector::new();
546 let code = r#"
547format REPORT =
548<<'END'
549Name: @<<<<<<<<<<<<
550$name
551END
552.
553"#;
554
555 let diagnostics = detector.detect_all(code);
556 assert!(!diagnostics.is_empty());
560 assert!(matches!(diagnostics[0].pattern, AntiPattern::FormatHeredoc { .. }));
561 }
562
563 #[test]
564 fn test_begin_heredoc_detection() {
565 let detector = AntiPatternDetector::new();
566 let code = r###"
567BEGIN {
568 $config = <<'END';
569 server = localhost
570END
571}
572"###;
573
574 let diagnostics = detector.detect_all(code);
575 assert_eq!(diagnostics.len(), 1);
576 assert!(matches!(diagnostics[0].pattern, AntiPattern::BeginTimeHeredoc { .. }));
577 }
578
579 #[test]
580 fn test_dynamic_delimiter_detection() {
581 let detector = AntiPatternDetector::new();
582 let code = r###"
583my $delimiter = "EOF";
584my $content = <<$delimiter;
585This is dynamic
586EOF
587"###;
588
589 let diagnostics = detector.detect_all(code);
590 assert_eq!(diagnostics.len(), 1);
591 assert!(matches!(diagnostics[0].pattern, AntiPattern::DynamicHeredocDelimiter { .. }));
592 }
593
594 #[test]
595 fn test_source_filter_detection() {
596 let detector = AntiPatternDetector::new();
597 let code = r###"
598use Filter::Simple;
599print <<EOF;
600Filtered content
601EOF
602"###;
603 let diagnostics = detector.detect_all(code);
604 assert_eq!(diagnostics.len(), 1);
605 assert!(matches!(diagnostics[0].pattern, AntiPattern::SourceFilterHeredoc { .. }));
606 }
607
608 #[test]
609 fn test_regex_heredoc_detection() {
610 let detector = AntiPatternDetector::new();
611 let code = r###"
612m/pattern(?{
613 print <<'MATCH';
614 Match text
615MATCH
616})/
617"###;
618 let diagnostics = detector.detect_all(code);
619 assert_eq!(diagnostics.len(), 1);
620 assert!(matches!(diagnostics[0].pattern, AntiPattern::RegexCodeBlockHeredoc { .. }));
621 }
622
623 #[test]
624 fn test_eval_heredoc_detection() {
625 let detector = AntiPatternDetector::new();
626 let code = r###"
627eval 'print <<"EVAL";
628Eval content
629EVAL';
630"###;
631 let diagnostics = detector.detect_all(code);
632 assert_eq!(diagnostics.len(), 1);
633 assert!(matches!(diagnostics[0].pattern, AntiPattern::EvalStringHeredoc { .. }));
634 }
635
636 #[test]
637 fn test_tied_handle_detection() {
638 let detector = AntiPatternDetector::new();
639 let code = r###"
640tie *FH, 'Tie::Handle';
641print FH <<'DATA';
642Tied output
643DATA
644"###;
645 let diagnostics = detector.detect_all(code);
646 assert_eq!(diagnostics.len(), 1);
647 assert!(matches!(diagnostics[0].pattern, AntiPattern::TiedHandleHeredoc { .. }));
648 }
649
650 #[test]
651 fn test_tied_scalar_handle_detection() {
652 let detector = AntiPatternDetector::new();
653 let code = r###"
654tie $fh, 'Tie::Handle';
655print $fh <<'DATA';
656Tied output
657DATA
658"###;
659 let diagnostics = detector.detect_all(code);
660 assert_eq!(diagnostics.len(), 1);
661 assert!(matches!(diagnostics[0].pattern, AntiPattern::TiedHandleHeredoc { .. }));
662 }
663}