1use regex::Regex;
2use std::collections::HashSet;
3use std::sync::LazyLock;
4
5use crate::heredoc_anti_patterns::model::{AntiPattern, Diagnostic, Location, Severity};
6use crate::heredoc_anti_patterns::utils::{
7 build_line_starts, location_from_start, mask_non_code_regions,
8};
9
10pub struct AntiPatternDetector {
18 patterns: Vec<Box<dyn PatternDetector>>,
19}
20
21trait PatternDetector: Send + Sync {
22 fn detect(
23 &self,
24 code: &str,
25 offset: usize,
26 line_starts: &[usize],
27 ) -> Vec<(AntiPattern, Location)>;
28 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic>;
29}
30
31struct FormatHeredocDetector;
33
34static FORMAT_PATTERN: LazyLock<Regex> =
36 LazyLock::new(|| match Regex::new(r"(?m)^\s*format\s+(\w+)\s*=\s*$") {
37 Ok(re) => re,
38 Err(_) => unreachable!("FORMAT_PATTERN regex failed to compile"),
39 });
40
41impl PatternDetector for FormatHeredocDetector {
42 fn detect(
43 &self,
44 code: &str,
45 offset: usize,
46 line_starts: &[usize],
47 ) -> Vec<(AntiPattern, Location)> {
48 let mut results = Vec::new();
49 let scan_code = mask_non_code_regions(code);
50
51 for cap in FORMAT_PATTERN.captures_iter(&scan_code) {
52 if let (Some(match_pos), Some(name_match)) = (cap.get(0), cap.get(1)) {
53 let format_name = name_match.as_str().to_string();
54 let location = location_from_start(line_starts, offset, match_pos.start());
55
56 let body_start = match_pos.end();
58 let body_end = code[body_start..].find("\n.").unwrap_or(code.len() - body_start);
59 let body = &scan_code[body_start..body_start + body_end];
60
61 if body.contains("<<") {
62 results.push((
63 AntiPattern::FormatHeredoc {
64 location: location.clone(),
65 format_name,
66 heredoc_delimiter: "UNKNOWN".to_string(), },
68 location,
69 ));
70 }
71 }
72 }
73
74 results
75 }
76
77 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
78 let AntiPattern::FormatHeredoc { format_name, .. } = pattern else {
79 return None;
80 };
81
82 Some(Diagnostic {
83 severity: Severity::Warning,
84 pattern: pattern.clone(),
85 message: format!("Heredoc declared inside format '{}'", format_name),
86 explanation: "Heredocs inside format declarations are often handled specially by the Perl interpreter and can be difficult to parse statically.".to_string(),
87 suggested_fix: Some("Consider moving the heredoc outside the format or using a simple string if possible.".to_string()),
88 references: vec!["perldoc perlform".to_string()],
89 })
90 }
91}
92
93struct BeginTimeHeredocDetector;
95
96static BEGIN_BLOCK_START_PATTERN: LazyLock<Regex> =
98 LazyLock::new(|| match Regex::new(r"\bBEGIN\s*\{") {
99 Ok(re) => re,
100 Err(_) => unreachable!("BEGIN_BLOCK_START_PATTERN regex failed to compile"),
101 });
102
103fn find_matching_brace(code: &str, opening_brace_idx: usize) -> Option<usize> {
104 let bytes = code.as_bytes();
105 let mut depth = 0usize;
106 let mut in_single_quote = false;
107 let mut in_double_quote = false;
108 let mut escaped = false;
109
110 for (idx, &byte) in bytes.iter().enumerate().skip(opening_brace_idx) {
111 let ch = byte as char;
112
113 if escaped {
114 escaped = false;
115 continue;
116 }
117
118 if in_single_quote {
119 if ch == '\\' {
120 escaped = true;
121 } else if ch == '\'' {
122 in_single_quote = false;
123 }
124 continue;
125 }
126
127 if in_double_quote {
128 if ch == '\\' {
129 escaped = true;
130 } else if ch == '"' {
131 in_double_quote = false;
132 }
133 continue;
134 }
135
136 match ch {
137 '\'' => in_single_quote = true,
138 '"' => in_double_quote = true,
139 '{' => depth += 1,
140 '}' => {
141 if depth == 0 {
142 return None;
143 }
144 depth -= 1;
145 if depth == 0 {
146 return Some(idx);
147 }
148 }
149 _ => {}
150 }
151 }
152
153 None
154}
155
156impl PatternDetector for BeginTimeHeredocDetector {
157 fn detect(
158 &self,
159 code: &str,
160 offset: usize,
161 line_starts: &[usize],
162 ) -> Vec<(AntiPattern, Location)> {
163 let mut results = Vec::new();
164 let scan_code = mask_non_code_regions(code);
165
166 for begin_match in BEGIN_BLOCK_START_PATTERN.find_iter(&scan_code) {
167 let Some(opening_brace_rel) = begin_match.as_str().rfind('{') else {
168 continue;
169 };
170 let opening_brace_idx = begin_match.start() + opening_brace_rel;
171 let Some(closing_brace_idx) = find_matching_brace(&scan_code, opening_brace_idx) else {
172 continue;
173 };
174 let block_content = &scan_code[opening_brace_idx + 1..closing_brace_idx];
175
176 if !block_content.contains("<<") {
177 continue;
178 }
179
180 let location = location_from_start(line_starts, offset, begin_match.start());
181
182 results.push((
183 AntiPattern::BeginTimeHeredoc {
184 location: location.clone(),
185 heredoc_content: block_content.to_string(),
186 side_effects: vec!["Phase-dependent parsing".to_string()],
187 },
188 location,
189 ));
190 }
191
192 results
193 }
194
195 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
196 if let AntiPattern::BeginTimeHeredoc { .. } = pattern {
197 Some(Diagnostic {
198 severity: Severity::Error,
199 pattern: pattern.clone(),
200 message: "Heredoc declared during BEGIN-time".to_string(),
201 explanation: "Heredocs declared inside BEGIN blocks are evaluated during the compilation phase. This can lead to complex side effects that are difficult to track statically.".to_string(),
202 suggested_fix: Some("Move the heredoc declaration out of the BEGIN block if it doesn't need to be evaluated during compilation.".to_string()),
203 references: vec!["perldoc perlmod".to_string()],
204 })
205 } else {
206 None
207 }
208 }
209}
210
211struct DynamicDelimiterDetector;
213
214static DYNAMIC_DELIMITER_PATTERN: LazyLock<Regex> =
216 LazyLock::new(|| match Regex::new(r"<<\s*\$\{[^}]+\}|<<\s*\$\w+|<<\s*`[^`]+`") {
217 Ok(re) => re,
218 Err(_) => unreachable!("DYNAMIC_DELIMITER_PATTERN regex failed to compile"),
219 });
220
221impl PatternDetector for DynamicDelimiterDetector {
222 fn detect(
223 &self,
224 code: &str,
225 offset: usize,
226 line_starts: &[usize],
227 ) -> Vec<(AntiPattern, Location)> {
228 let mut results = Vec::new();
229 let scan_code = mask_non_code_regions(code);
230
231 for cap in DYNAMIC_DELIMITER_PATTERN.captures_iter(&scan_code) {
232 if let Some(match_pos) = cap.get(0) {
233 let expression = match_pos.as_str().to_string();
234 let location = location_from_start(line_starts, offset, match_pos.start());
235
236 results.push((
237 AntiPattern::DynamicHeredocDelimiter { location: location.clone(), expression },
238 location,
239 ));
240 }
241 }
242
243 results
244 }
245
246 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
247 let AntiPattern::DynamicHeredocDelimiter { expression, .. } = pattern else {
248 return None;
249 };
250
251 Some(Diagnostic {
252 severity: Severity::Warning,
253 pattern: pattern.clone(),
254 message: format!("Dynamic heredoc delimiter: {}", expression),
255 explanation: "Using variables or expressions as heredoc delimiters makes it impossible to know the terminator without executing the code.".to_string(),
256 suggested_fix: Some("Use a literal string as the heredoc terminator.".to_string()),
257 references: vec!["perldoc perlop".to_string()],
258 })
259 }
260}
261
262struct SourceFilterDetector;
264
265static SOURCE_FILTER_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
267 match Regex::new(r"use\s+Filter::(Simple|Util::Call|cpp|exec|sh|decrypt|tee)") {
268 Ok(re) => re,
269 Err(_) => unreachable!("SOURCE_FILTER_PATTERN regex failed to compile"),
270 }
271});
272
273impl PatternDetector for SourceFilterDetector {
274 fn detect(
275 &self,
276 code: &str,
277 offset: usize,
278 line_starts: &[usize],
279 ) -> Vec<(AntiPattern, Location)> {
280 let mut results = Vec::new();
281 let scan_code = mask_non_code_regions(code);
282
283 for cap in SOURCE_FILTER_PATTERN.captures_iter(&scan_code) {
284 if let (Some(match_pos), Some(module_match)) = (cap.get(0), cap.get(1)) {
285 let filter_module = module_match.as_str().to_string();
286 let location = location_from_start(line_starts, offset, match_pos.start());
287
288 results.push((
289 AntiPattern::SourceFilterHeredoc {
290 location: location.clone(),
291 module: filter_module,
292 },
293 location,
294 ));
295 }
296 }
297
298 results
299 }
300
301 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
302 let AntiPattern::SourceFilterHeredoc { module, .. } = pattern else {
303 return None;
304 };
305
306 Some(Diagnostic {
307 severity: Severity::Error,
308 pattern: pattern.clone(),
309 message: format!("Source filter detected: Filter::{}", module),
310 explanation: "Source filters rewrite the source code before it's parsed. Static analysis cannot reliably predict the state of the code after filtering.".to_string(),
311 suggested_fix: Some("Avoid using source filters. They are considered problematic and often replaced by better alternatives like Devel::Declare or modern Perl features.".to_string()),
312 references: vec!["perldoc Filter::Simple".to_string()],
313 })
314 }
315}
316
317struct RegexHeredocDetector;
319
320static REGEX_HEREDOC_PATTERN: LazyLock<Regex> =
322 LazyLock::new(|| match Regex::new(r"\(\?\{[^}]*<<[^}]*\}") {
323 Ok(re) => re,
324 Err(_) => unreachable!("REGEX_HEREDOC_PATTERN regex failed to compile"),
325 });
326
327impl PatternDetector for RegexHeredocDetector {
328 fn detect(
329 &self,
330 code: &str,
331 offset: usize,
332 line_starts: &[usize],
333 ) -> Vec<(AntiPattern, Location)> {
334 let mut results = Vec::new();
335 let scan_code = mask_non_code_regions(code);
336
337 for cap in REGEX_HEREDOC_PATTERN.captures_iter(&scan_code) {
338 if let Some(match_pos) = cap.get(0) {
339 let location = location_from_start(line_starts, offset, match_pos.start());
340
341 results.push((
342 AntiPattern::RegexCodeBlockHeredoc { location: location.clone() },
343 location,
344 ));
345 }
346 }
347
348 results
349 }
350
351 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
352 if let AntiPattern::RegexCodeBlockHeredoc { .. } = pattern {
353 Some(Diagnostic {
354 severity: Severity::Warning,
355 pattern: pattern.clone(),
356 message: "Heredoc inside regex code block".to_string(),
357 explanation: "Declaring heredocs inside (?{ ... }) or (??{ ... }) blocks is extremely rare and difficult to parse correctly.".to_string(),
358 suggested_fix: None,
359 references: vec!["perldoc perlre".to_string()],
360 })
361 } else {
362 None
363 }
364 }
365}
366
367struct EvalHeredocDetector;
369
370static EVAL_HEREDOC_PATTERN: LazyLock<Regex> =
372 LazyLock::new(|| match Regex::new(r#"eval\s+(?:'[^']*<<[^']*'|"[^"]*<<[^"]*")"#) {
373 Ok(re) => re,
374 Err(_) => unreachable!("EVAL_HEREDOC_PATTERN regex failed to compile"),
375 });
376
377impl PatternDetector for EvalHeredocDetector {
378 fn detect(
379 &self,
380 code: &str,
381 offset: usize,
382 line_starts: &[usize],
383 ) -> Vec<(AntiPattern, Location)> {
384 let mut results = Vec::new();
385
386 for cap in EVAL_HEREDOC_PATTERN.captures_iter(code) {
387 if let Some(match_pos) = cap.get(0) {
388 let location = location_from_start(line_starts, offset, match_pos.start());
389
390 results.push((
391 AntiPattern::EvalStringHeredoc { location: location.clone() },
392 location,
393 ));
394 }
395 }
396
397 results
398 }
399
400 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
401 if let AntiPattern::EvalStringHeredoc { .. } = pattern {
402 Some(Diagnostic {
403 severity: Severity::Warning,
404 pattern: pattern.clone(),
405 message: "Heredoc inside eval string".to_string(),
406 explanation: "Heredocs declared inside strings passed to eval require double parsing and can hide malicious or complex code.".to_string(),
407 suggested_fix: Some("Consider using a block eval or moving the heredoc outside the eval string.".to_string()),
408 references: vec!["perldoc -f eval".to_string()],
409 })
410 } else {
411 None
412 }
413 }
414}
415
416struct TiedHandleDetector;
418
419static TIE_PATTERN: LazyLock<Regex> = LazyLock::new(|| match Regex::new(r"tie\s+([*$]\w+)") {
421 Ok(re) => re,
422 Err(_) => unreachable!("TIE_PATTERN regex failed to compile"),
423});
424
425static PRINT_HEREDOC_PATTERN: LazyLock<Regex> =
427 LazyLock::new(|| match Regex::new(r"print\s+([*$]?\w+)\s+<<") {
428 Ok(re) => re,
429 Err(_) => unreachable!("PRINT_HEREDOC_PATTERN regex failed to compile"),
430 });
431
432impl PatternDetector for TiedHandleDetector {
433 fn detect(
434 &self,
435 code: &str,
436 offset: usize,
437 line_starts: &[usize],
438 ) -> Vec<(AntiPattern, Location)> {
439 let mut results = Vec::new();
440 let scan_code = mask_non_code_regions(code);
441
442 let mut tied_handles = HashSet::new();
445 for cap in TIE_PATTERN.captures_iter(&scan_code) {
446 if let Some(handle_match) = cap.get(1) {
447 let raw_handle = handle_match.as_str();
448 let normalized = raw_handle.strip_prefix('*').unwrap_or(raw_handle);
449 tied_handles.insert(normalized.to_string());
450 }
451 }
452
453 for cap in PRINT_HEREDOC_PATTERN.captures_iter(&scan_code) {
457 let (Some(match_pos), Some(handle_match)) = (cap.get(0), cap.get(1)) else {
458 continue;
459 };
460
461 let raw_print_handle = handle_match.as_str();
462 let normalized_print_handle =
463 raw_print_handle.strip_prefix('*').unwrap_or(raw_print_handle);
464
465 if tied_handles.contains(normalized_print_handle) {
466 let location = location_from_start(line_starts, offset, match_pos.start());
467 results.push((
468 AntiPattern::TiedHandleHeredoc {
469 location: location.clone(),
470 handle_name: normalized_print_handle.to_string(),
471 },
472 location,
473 ));
474 }
475 }
476
477 results
478 }
479
480 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
481 let AntiPattern::TiedHandleHeredoc { handle_name, .. } = pattern else {
482 return None;
483 };
484
485 Some(Diagnostic {
486 severity: Severity::Info,
487 pattern: pattern.clone(),
488 message: format!("Heredoc written to tied handle '{}'", handle_name),
489 explanation: "Writing to a tied handle invokes custom code. The behavior of heredoc output depends on the tied class implementation.".to_string(),
490 suggested_fix: None,
491 references: vec!["perldoc -f tie".to_string()],
492 })
493 }
494}
495
496impl Default for AntiPatternDetector {
497 fn default() -> Self {
498 Self::new()
499 }
500}
501
502impl AntiPatternDetector {
503 pub fn new() -> Self {
505 Self {
506 patterns: vec![
507 Box::new(FormatHeredocDetector),
508 Box::new(BeginTimeHeredocDetector),
509 Box::new(DynamicDelimiterDetector),
510 Box::new(SourceFilterDetector),
511 Box::new(RegexHeredocDetector),
512 Box::new(EvalHeredocDetector),
513 Box::new(TiedHandleDetector),
514 ],
515 }
516 }
517
518 pub fn detect_all(&self, code: &str) -> Vec<Diagnostic> {
520 let mut diagnostics = Vec::new();
521 let line_starts = build_line_starts(code);
522
523 for detector in &self.patterns {
524 let patterns = detector.detect(code, 0, &line_starts);
525 for (pattern, _) in patterns {
526 if let Some(diagnostic) = detector.diagnose(&pattern) {
527 diagnostics.push(diagnostic);
528 }
529 }
530 }
531
532 diagnostics.sort_by_key(|d| match &d.pattern {
533 AntiPattern::FormatHeredoc { location, .. }
534 | AntiPattern::BeginTimeHeredoc { location, .. }
535 | AntiPattern::DynamicHeredocDelimiter { location, .. }
536 | AntiPattern::SourceFilterHeredoc { location, .. }
537 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
538 | AntiPattern::EvalStringHeredoc { location, .. }
539 | AntiPattern::TiedHandleHeredoc { location, .. } => location.offset,
540 });
541
542 diagnostics
543 }
544
545 pub fn format_report(&self, diagnostics: &[Diagnostic]) -> String {
550 let mut report = String::from("Anti-Pattern Analysis Report\n");
551 report.push_str("============================\n\n");
552
553 if diagnostics.is_empty() {
554 report.push_str("No problematic patterns detected.\n");
555 return report;
556 }
557
558 report.push_str(&format!("Found {} problematic patterns:\n\n", diagnostics.len()));
559
560 for (i, diag) in diagnostics.iter().enumerate() {
561 report.push_str(&format!(
562 "{}. {} ({})\n",
563 i + 1,
564 diag.message,
565 match diag.severity {
566 Severity::Error => "ERROR",
567 Severity::Warning => "WARNING",
568 Severity::Info => "INFO",
569 }
570 ));
571
572 report.push_str(&format!(
573 " Location: {}\n",
574 match &diag.pattern {
575 AntiPattern::FormatHeredoc { location, .. }
576 | AntiPattern::BeginTimeHeredoc { location, .. }
577 | AntiPattern::DynamicHeredocDelimiter { location, .. }
578 | AntiPattern::SourceFilterHeredoc { location, .. }
579 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
580 | AntiPattern::EvalStringHeredoc { location, .. }
581 | AntiPattern::TiedHandleHeredoc { location, .. } =>
582 format!("line {}, column {}", location.line, location.column),
583 }
584 ));
585
586 report.push_str(&format!(" Explanation: {}\n", diag.explanation));
587
588 if let Some(fix) = &diag.suggested_fix {
589 report.push_str(&format!(
590 " Suggested fix:\n {}\n",
591 fix.lines().collect::<Vec<_>>().join("\n ")
592 ));
593 }
594
595 if !diag.references.is_empty() {
596 report.push_str(&format!(" References: {}\n", diag.references.join(", ")));
597 }
598
599 report.push('\n');
600 }
601
602 report
603 }
604}
605
606#[cfg(test)]
607mod tests;