1use regex::Regex;
2use std::collections::HashSet;
3use std::sync::LazyLock;
4
5use crate::heredoc_anti_patterns::model::{AntiPattern, Diagnostic, Location, Severity};
6use crate::heredoc_anti_patterns::utils::{
7 build_line_starts, location_from_start, mask_non_code_regions,
8};
9
10pub struct AntiPatternDetector {
18 patterns: Vec<Box<dyn PatternDetector>>,
19}
20
21trait PatternDetector: Send + Sync {
22 fn detect(
23 &self,
24 code: &str,
25 offset: usize,
26 line_starts: &[usize],
27 ) -> Vec<(AntiPattern, Location)>;
28 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic>;
29}
30
31struct FormatHeredocDetector;
33
34static FORMAT_PATTERN: LazyLock<Regex> =
36 LazyLock::new(|| match Regex::new(r"(?m)^\s*format\s+(\w+)\s*=\s*$") {
37 Ok(re) => re,
38 Err(_) => unreachable!("FORMAT_PATTERN regex failed to compile"),
39 });
40
41static HEREDOC_DELIMITER_PATTERN: LazyLock<Regex> =
43 LazyLock::new(|| match Regex::new(r#"<<\s*['"`]?([A-Za-z_][A-Za-z0-9_]*)['"`]?"#) {
44 Ok(re) => re,
45 Err(_) => unreachable!("HEREDOC_DELIMITER_PATTERN regex failed to compile"),
46 });
47
48impl PatternDetector for FormatHeredocDetector {
49 fn detect(
50 &self,
51 code: &str,
52 offset: usize,
53 line_starts: &[usize],
54 ) -> Vec<(AntiPattern, Location)> {
55 let mut results = Vec::new();
56 let scan_code = mask_non_code_regions(code);
57
58 for cap in FORMAT_PATTERN.captures_iter(&scan_code) {
59 if let (Some(match_pos), Some(name_match)) = (cap.get(0), cap.get(1)) {
60 let format_name = name_match.as_str().to_string();
61 let location = location_from_start(line_starts, offset, match_pos.start());
62
63 let body_start = match_pos.end();
65 let body_end = code[body_start..].find("\n.").unwrap_or(code.len() - body_start);
66 let body = &scan_code[body_start..body_start + body_end];
67 let source_body = &code[body_start..body_start + body_end];
68
69 if body.contains("<<") {
70 results.push((
71 AntiPattern::FormatHeredoc {
72 location: location.clone(),
73 format_name,
74 heredoc_delimiter: extract_heredoc_delimiter(source_body),
75 },
76 location,
77 ));
78 }
79 }
80 }
81
82 results
83 }
84
85 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
86 let AntiPattern::FormatHeredoc { format_name, .. } = pattern else {
87 return None;
88 };
89
90 Some(Diagnostic {
91 severity: Severity::Warning,
92 pattern: pattern.clone(),
93 message: format!("Heredoc declared inside format '{}'", format_name),
94 explanation: "Heredocs inside format declarations are often handled specially by the Perl interpreter and can be difficult to parse statically.".to_string(),
95 suggested_fix: Some("Consider moving the heredoc outside the format or using a simple string if possible.".to_string()),
96 references: vec!["perldoc perlform".to_string()],
97 })
98 }
99}
100
101struct BeginTimeHeredocDetector;
103
104static BEGIN_BLOCK_START_PATTERN: LazyLock<Regex> =
106 LazyLock::new(|| match Regex::new(r"\bBEGIN\s*\{") {
107 Ok(re) => re,
108 Err(_) => unreachable!("BEGIN_BLOCK_START_PATTERN regex failed to compile"),
109 });
110
111fn extract_heredoc_delimiter(body: &str) -> String {
112 HEREDOC_DELIMITER_PATTERN
113 .captures(body)
114 .and_then(|captures| captures.get(1).map(|delimiter| delimiter.as_str().to_string()))
115 .unwrap_or_else(|| "UNKNOWN".to_string())
116}
117
118fn find_matching_brace(code: &str, opening_brace_idx: usize) -> Option<usize> {
119 let bytes = code.as_bytes();
120 let mut depth = 0usize;
121 let mut in_single_quote = false;
122 let mut in_double_quote = false;
123 let mut escaped = false;
124
125 for (idx, &byte) in bytes.iter().enumerate().skip(opening_brace_idx) {
126 let ch = byte as char;
127
128 if escaped {
129 escaped = false;
130 continue;
131 }
132
133 if in_single_quote {
134 if ch == '\\' {
135 escaped = true;
136 } else if ch == '\'' {
137 in_single_quote = false;
138 }
139 continue;
140 }
141
142 if in_double_quote {
143 if ch == '\\' {
144 escaped = true;
145 } else if ch == '"' {
146 in_double_quote = false;
147 }
148 continue;
149 }
150
151 match ch {
152 '\'' => in_single_quote = true,
153 '"' => in_double_quote = true,
154 '{' => depth += 1,
155 '}' => {
156 if depth == 0 {
157 return None;
158 }
159 depth -= 1;
160 if depth == 0 {
161 return Some(idx);
162 }
163 }
164 _ => {}
165 }
166 }
167
168 None
169}
170
171impl PatternDetector for BeginTimeHeredocDetector {
172 fn detect(
173 &self,
174 code: &str,
175 offset: usize,
176 line_starts: &[usize],
177 ) -> Vec<(AntiPattern, Location)> {
178 let mut results = Vec::new();
179 let scan_code = mask_non_code_regions(code);
180
181 for begin_match in BEGIN_BLOCK_START_PATTERN.find_iter(&scan_code) {
182 let Some(opening_brace_rel) = begin_match.as_str().rfind('{') else {
183 continue;
184 };
185 let opening_brace_idx = begin_match.start() + opening_brace_rel;
186 let Some(closing_brace_idx) = find_matching_brace(&scan_code, opening_brace_idx) else {
187 continue;
188 };
189 let block_content = &scan_code[opening_brace_idx + 1..closing_brace_idx];
190
191 if !block_content.contains("<<") {
192 continue;
193 }
194
195 let location = location_from_start(line_starts, offset, begin_match.start());
196
197 results.push((
198 AntiPattern::BeginTimeHeredoc {
199 location: location.clone(),
200 heredoc_content: block_content.to_string(),
201 side_effects: vec!["Phase-dependent parsing".to_string()],
202 },
203 location,
204 ));
205 }
206
207 results
208 }
209
210 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
211 if let AntiPattern::BeginTimeHeredoc { .. } = pattern {
212 Some(Diagnostic {
213 severity: Severity::Error,
214 pattern: pattern.clone(),
215 message: "Heredoc declared during BEGIN-time".to_string(),
216 explanation: "Heredocs declared inside BEGIN blocks are evaluated during the compilation phase. This can lead to complex side effects that are difficult to track statically.".to_string(),
217 suggested_fix: Some("Move the heredoc declaration out of the BEGIN block if it doesn't need to be evaluated during compilation.".to_string()),
218 references: vec!["perldoc perlmod".to_string()],
219 })
220 } else {
221 None
222 }
223 }
224}
225
226struct DynamicDelimiterDetector;
228
229static DYNAMIC_DELIMITER_PATTERN: LazyLock<Regex> =
231 LazyLock::new(|| match Regex::new(r"<<\s*\$\{[^}]+\}|<<\s*\$\w+|<<\s*`[^`]+`") {
232 Ok(re) => re,
233 Err(_) => unreachable!("DYNAMIC_DELIMITER_PATTERN regex failed to compile"),
234 });
235
236impl PatternDetector for DynamicDelimiterDetector {
237 fn detect(
238 &self,
239 code: &str,
240 offset: usize,
241 line_starts: &[usize],
242 ) -> Vec<(AntiPattern, Location)> {
243 let mut results = Vec::new();
244 let scan_code = mask_non_code_regions(code);
245
246 for cap in DYNAMIC_DELIMITER_PATTERN.captures_iter(&scan_code) {
247 if let Some(match_pos) = cap.get(0) {
248 let expression = match_pos.as_str().to_string();
249 let location = location_from_start(line_starts, offset, match_pos.start());
250
251 results.push((
252 AntiPattern::DynamicHeredocDelimiter { location: location.clone(), expression },
253 location,
254 ));
255 }
256 }
257
258 results
259 }
260
261 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
262 let AntiPattern::DynamicHeredocDelimiter { expression, .. } = pattern else {
263 return None;
264 };
265
266 Some(Diagnostic {
267 severity: Severity::Warning,
268 pattern: pattern.clone(),
269 message: format!("Dynamic heredoc delimiter: {}", expression),
270 explanation: "Using variables or expressions as heredoc delimiters makes it impossible to know the terminator without executing the code.".to_string(),
271 suggested_fix: Some("Use a literal string as the heredoc terminator.".to_string()),
272 references: vec!["perldoc perlop".to_string()],
273 })
274 }
275}
276
277struct SourceFilterDetector;
279
280static SOURCE_FILTER_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
282 match Regex::new(r"use\s+Filter::(Simple|Util::Call|cpp|exec|sh|decrypt|tee)") {
283 Ok(re) => re,
284 Err(_) => unreachable!("SOURCE_FILTER_PATTERN regex failed to compile"),
285 }
286});
287
288impl PatternDetector for SourceFilterDetector {
289 fn detect(
290 &self,
291 code: &str,
292 offset: usize,
293 line_starts: &[usize],
294 ) -> Vec<(AntiPattern, Location)> {
295 let mut results = Vec::new();
296 let scan_code = mask_non_code_regions(code);
297
298 for cap in SOURCE_FILTER_PATTERN.captures_iter(&scan_code) {
299 if let (Some(match_pos), Some(module_match)) = (cap.get(0), cap.get(1)) {
300 let filter_module = module_match.as_str().to_string();
301 let location = location_from_start(line_starts, offset, match_pos.start());
302
303 results.push((
304 AntiPattern::SourceFilterHeredoc {
305 location: location.clone(),
306 module: filter_module,
307 },
308 location,
309 ));
310 }
311 }
312
313 results
314 }
315
316 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
317 let AntiPattern::SourceFilterHeredoc { module, .. } = pattern else {
318 return None;
319 };
320
321 Some(Diagnostic {
322 severity: Severity::Error,
323 pattern: pattern.clone(),
324 message: format!("Source filter detected: Filter::{}", module),
325 explanation: "Source filters rewrite the source code before it's parsed. Static analysis cannot reliably predict the state of the code after filtering.".to_string(),
326 suggested_fix: Some("Avoid using source filters. They are considered problematic and often replaced by better alternatives like Devel::Declare or modern Perl features.".to_string()),
327 references: vec!["perldoc Filter::Simple".to_string()],
328 })
329 }
330}
331
332struct RegexHeredocDetector;
334
335static REGEX_HEREDOC_PATTERN: LazyLock<Regex> =
337 LazyLock::new(|| match Regex::new(r"\(\?\{[^}]*<<[^}]*\}") {
338 Ok(re) => re,
339 Err(_) => unreachable!("REGEX_HEREDOC_PATTERN regex failed to compile"),
340 });
341
342impl PatternDetector for RegexHeredocDetector {
343 fn detect(
344 &self,
345 code: &str,
346 offset: usize,
347 line_starts: &[usize],
348 ) -> Vec<(AntiPattern, Location)> {
349 let mut results = Vec::new();
350 let scan_code = mask_non_code_regions(code);
351
352 for cap in REGEX_HEREDOC_PATTERN.captures_iter(&scan_code) {
353 if let Some(match_pos) = cap.get(0) {
354 let location = location_from_start(line_starts, offset, match_pos.start());
355
356 results.push((
357 AntiPattern::RegexCodeBlockHeredoc { location: location.clone() },
358 location,
359 ));
360 }
361 }
362
363 results
364 }
365
366 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
367 if let AntiPattern::RegexCodeBlockHeredoc { .. } = pattern {
368 Some(Diagnostic {
369 severity: Severity::Warning,
370 pattern: pattern.clone(),
371 message: "Heredoc inside regex code block".to_string(),
372 explanation: "Declaring heredocs inside (?{ ... }) or (??{ ... }) blocks is extremely rare and difficult to parse correctly.".to_string(),
373 suggested_fix: None,
374 references: vec!["perldoc perlre".to_string()],
375 })
376 } else {
377 None
378 }
379 }
380}
381
382struct EvalHeredocDetector;
384
385static EVAL_HEREDOC_PATTERN: LazyLock<Regex> =
387 LazyLock::new(|| match Regex::new(r#"eval\s+(?:'[^']*<<[^']*'|"[^"]*<<[^"]*")"#) {
388 Ok(re) => re,
389 Err(_) => unreachable!("EVAL_HEREDOC_PATTERN regex failed to compile"),
390 });
391
392impl PatternDetector for EvalHeredocDetector {
393 fn detect(
394 &self,
395 code: &str,
396 offset: usize,
397 line_starts: &[usize],
398 ) -> Vec<(AntiPattern, Location)> {
399 let mut results = Vec::new();
400
401 for cap in EVAL_HEREDOC_PATTERN.captures_iter(code) {
402 if let Some(match_pos) = cap.get(0) {
403 let location = location_from_start(line_starts, offset, match_pos.start());
404
405 results.push((
406 AntiPattern::EvalStringHeredoc { location: location.clone() },
407 location,
408 ));
409 }
410 }
411
412 results
413 }
414
415 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
416 if let AntiPattern::EvalStringHeredoc { .. } = pattern {
417 Some(Diagnostic {
418 severity: Severity::Warning,
419 pattern: pattern.clone(),
420 message: "Heredoc inside eval string".to_string(),
421 explanation: "Heredocs declared inside strings passed to eval require double parsing and can hide malicious or complex code.".to_string(),
422 suggested_fix: Some("Consider using a block eval or moving the heredoc outside the eval string.".to_string()),
423 references: vec!["perldoc -f eval".to_string()],
424 })
425 } else {
426 None
427 }
428 }
429}
430
431struct TiedHandleDetector;
433
434static TIE_PATTERN: LazyLock<Regex> = LazyLock::new(|| match Regex::new(r"tie\s+([*$]\w+)") {
436 Ok(re) => re,
437 Err(_) => unreachable!("TIE_PATTERN regex failed to compile"),
438});
439
440static PRINT_HEREDOC_PATTERN: LazyLock<Regex> =
442 LazyLock::new(|| match Regex::new(r"print\s+([*$]?\w+)\s+<<") {
443 Ok(re) => re,
444 Err(_) => unreachable!("PRINT_HEREDOC_PATTERN regex failed to compile"),
445 });
446
447impl PatternDetector for TiedHandleDetector {
448 fn detect(
449 &self,
450 code: &str,
451 offset: usize,
452 line_starts: &[usize],
453 ) -> Vec<(AntiPattern, Location)> {
454 let mut results = Vec::new();
455 let scan_code = mask_non_code_regions(code);
456
457 let mut tied_handles = HashSet::new();
460 for cap in TIE_PATTERN.captures_iter(&scan_code) {
461 if let Some(handle_match) = cap.get(1) {
462 let raw_handle = handle_match.as_str();
463 let normalized = raw_handle.strip_prefix('*').unwrap_or(raw_handle);
464 tied_handles.insert(normalized.to_string());
465 }
466 }
467
468 for cap in PRINT_HEREDOC_PATTERN.captures_iter(&scan_code) {
472 let (Some(match_pos), Some(handle_match)) = (cap.get(0), cap.get(1)) else {
473 continue;
474 };
475
476 let raw_print_handle = handle_match.as_str();
477 let normalized_print_handle =
478 raw_print_handle.strip_prefix('*').unwrap_or(raw_print_handle);
479
480 if tied_handles.contains(normalized_print_handle) {
481 let location = location_from_start(line_starts, offset, match_pos.start());
482 results.push((
483 AntiPattern::TiedHandleHeredoc {
484 location: location.clone(),
485 handle_name: normalized_print_handle.to_string(),
486 },
487 location,
488 ));
489 }
490 }
491
492 results
493 }
494
495 fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
496 let AntiPattern::TiedHandleHeredoc { handle_name, .. } = pattern else {
497 return None;
498 };
499
500 Some(Diagnostic {
501 severity: Severity::Info,
502 pattern: pattern.clone(),
503 message: format!("Heredoc written to tied handle '{}'", handle_name),
504 explanation: "Writing to a tied handle invokes custom code. The behavior of heredoc output depends on the tied class implementation.".to_string(),
505 suggested_fix: None,
506 references: vec!["perldoc -f tie".to_string()],
507 })
508 }
509}
510
511impl Default for AntiPatternDetector {
512 fn default() -> Self {
513 Self::new()
514 }
515}
516
517impl AntiPatternDetector {
518 pub fn new() -> Self {
520 Self {
521 patterns: vec![
522 Box::new(FormatHeredocDetector),
523 Box::new(BeginTimeHeredocDetector),
524 Box::new(DynamicDelimiterDetector),
525 Box::new(SourceFilterDetector),
526 Box::new(RegexHeredocDetector),
527 Box::new(EvalHeredocDetector),
528 Box::new(TiedHandleDetector),
529 ],
530 }
531 }
532
533 pub fn detect_all(&self, code: &str) -> Vec<Diagnostic> {
535 let mut diagnostics = Vec::new();
536 let line_starts = build_line_starts(code);
537
538 for detector in &self.patterns {
539 let patterns = detector.detect(code, 0, &line_starts);
540 for (pattern, _) in patterns {
541 if let Some(diagnostic) = detector.diagnose(&pattern) {
542 diagnostics.push(diagnostic);
543 }
544 }
545 }
546
547 diagnostics.sort_by_key(|d| match &d.pattern {
548 AntiPattern::FormatHeredoc { location, .. }
549 | AntiPattern::BeginTimeHeredoc { location, .. }
550 | AntiPattern::DynamicHeredocDelimiter { location, .. }
551 | AntiPattern::SourceFilterHeredoc { location, .. }
552 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
553 | AntiPattern::EvalStringHeredoc { location, .. }
554 | AntiPattern::TiedHandleHeredoc { location, .. } => location.offset,
555 });
556
557 diagnostics
558 }
559
560 pub fn format_report(&self, diagnostics: &[Diagnostic]) -> String {
565 let mut report = String::from("Anti-Pattern Analysis Report\n");
566 report.push_str("============================\n\n");
567
568 if diagnostics.is_empty() {
569 report.push_str("No problematic patterns detected.\n");
570 return report;
571 }
572
573 report.push_str(&format!("Found {} problematic patterns:\n\n", diagnostics.len()));
574
575 for (i, diag) in diagnostics.iter().enumerate() {
576 report.push_str(&format!(
577 "{}. {} ({})\n",
578 i + 1,
579 diag.message,
580 match diag.severity {
581 Severity::Error => "ERROR",
582 Severity::Warning => "WARNING",
583 Severity::Info => "INFO",
584 }
585 ));
586
587 report.push_str(&format!(
588 " Location: {}\n",
589 match &diag.pattern {
590 AntiPattern::FormatHeredoc { location, .. }
591 | AntiPattern::BeginTimeHeredoc { location, .. }
592 | AntiPattern::DynamicHeredocDelimiter { location, .. }
593 | AntiPattern::SourceFilterHeredoc { location, .. }
594 | AntiPattern::RegexCodeBlockHeredoc { location, .. }
595 | AntiPattern::EvalStringHeredoc { location, .. }
596 | AntiPattern::TiedHandleHeredoc { location, .. } =>
597 format!("line {}, column {}", location.line, location.column),
598 }
599 ));
600
601 report.push_str(&format!(" Explanation: {}\n", diag.explanation));
602
603 if let Some(fix) = &diag.suggested_fix {
604 report.push_str(&format!(
605 " Suggested fix:\n {}\n",
606 fix.lines().collect::<Vec<_>>().join("\n ")
607 ));
608 }
609
610 if !diag.references.is_empty() {
611 report.push_str(&format!(" References: {}\n", diag.references.join(", ")));
612 }
613
614 report.push('\n');
615 }
616
617 report
618 }
619}
620
621#[cfg(test)]
622mod tests;