Skip to main content

perl_parser/
heredoc_anti_patterns.rs

1//! Anti-pattern detection for heredoc edge cases
2//!
3//! This crate provides detection and analysis of problematic Perl patterns
4//! that make static parsing difficult or impossible, particularly around heredocs.
5//!
6//! The [`AntiPatternDetector`] scans Perl source for seven categories of
7//! heredoc-related anti-patterns and produces [`Diagnostic`]s describing each
8//! finding, with severity, explanation, suggested fix, and documentation
9//! references.
10
11use regex::Regex;
12use std::collections::HashSet;
13use std::sync::LazyLock;
14
15/// Source location of a detected anti-pattern.
16///
17/// All three coordinates are provided so callers can serve both LSP (line/column)
18/// and byte-level (offset) consumers without re-computing positions.
19#[derive(Debug, Clone, PartialEq)]
20pub struct Location {
21    /// Zero-based line number within the scanned source fragment.
22    pub line: usize,
23    /// Zero-based column (byte offset from the start of the line).
24    pub column: usize,
25    /// Absolute byte offset from the start of the scanned source fragment.
26    pub offset: usize,
27}
28
29/// Diagnostic severity level for a detected anti-pattern.
30#[derive(Debug, Clone, PartialEq)]
31pub enum Severity {
32    /// The construct will likely cause a runtime or parse failure.
33    Error,
34    /// The construct works but is fragile or difficult to analyze statically.
35    Warning,
36    /// The construct is valid but could be improved for readability or tooling support.
37    Info,
38}
39
40/// A specific category of heredoc-related anti-pattern found in Perl source.
41///
42/// Each variant captures the [`Location`] of the offending construct plus any
43/// context needed to produce a useful diagnostic message.
44#[derive(Debug, Clone, PartialEq)]
45pub enum AntiPattern {
46    /// A heredoc declared inside a `format` body.
47    FormatHeredoc { location: Location, format_name: String, heredoc_delimiter: String },
48    /// A heredoc declared inside a `BEGIN { ... }` block, evaluated at compile time.
49    BeginTimeHeredoc { location: Location, heredoc_content: String, side_effects: Vec<String> },
50    /// A heredoc whose terminator is determined by a variable or expression at runtime.
51    DynamicHeredocDelimiter { location: Location, expression: String },
52    /// A `use Filter::*` statement that may rewrite source before static analysis runs.
53    SourceFilterHeredoc { location: Location, module: String },
54    /// A heredoc embedded inside a `(?{ ... })` regex code block.
55    RegexCodeBlockHeredoc { location: Location },
56    /// A heredoc embedded inside a string argument to `eval`.
57    EvalStringHeredoc { location: Location },
58    /// A heredoc written to a filehandle that has been `tie`d to a custom class.
59    TiedHandleHeredoc { location: Location, handle_name: String },
60}
61
62/// A fully-formed diagnostic produced by the anti-pattern detector.
63///
64/// Contains everything needed to display a problem in an IDE or report:
65/// the severity, the matched pattern (with location), a human-readable message,
66/// a longer explanation, an optional suggested fix, and `perldoc` references.
67#[derive(Debug, Clone, PartialEq)]
68pub struct Diagnostic {
69    /// How serious the problem is.
70    pub severity: Severity,
71    /// The specific anti-pattern that triggered this diagnostic.
72    pub pattern: AntiPattern,
73    /// Short one-line summary suitable for an IDE problem marker.
74    pub message: String,
75    /// Longer explanation of why the construct is problematic.
76    pub explanation: String,
77    /// Optional concrete suggestion for fixing the problem.
78    pub suggested_fix: Option<String>,
79    /// Relevant `perldoc` pages or documentation references.
80    pub references: Vec<String>,
81}
82
83/// Scans Perl source for heredoc-related anti-patterns and produces [`Diagnostic`]s.
84///
85/// Construct with [`AntiPatternDetector::new`], then call [`detect_all`] with the
86/// source text.  The detector runs all seven built-in pattern checkers and returns
87/// the results sorted by byte offset so callers receive problems in source order.
88///
89/// [`detect_all`]: AntiPatternDetector::detect_all
90pub struct AntiPatternDetector {
91    patterns: Vec<Box<dyn PatternDetector>>,
92}
93
94trait PatternDetector: Send + Sync {
95    fn detect(
96        &self,
97        code: &str,
98        offset: usize,
99        line_starts: &[usize],
100    ) -> Vec<(AntiPattern, Location)>;
101    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic>;
102}
103
104fn build_line_starts(code: &str) -> Vec<usize> {
105    let mut line_starts = Vec::new();
106    line_starts.push(0);
107
108    for (idx, byte) in code.bytes().enumerate() {
109        if byte == b'\n' {
110            line_starts.push(idx + 1);
111        }
112    }
113
114    line_starts
115}
116
117fn location_from_start(line_starts: &[usize], offset: usize, start: usize) -> Location {
118    let insertion = line_starts.partition_point(|&line_start| line_start <= start);
119    let line = insertion.saturating_sub(1);
120    let line_start = line_starts.get(line).copied().unwrap_or(0);
121    let column = start.saturating_sub(line_start);
122
123    Location { line, column, offset: offset + start }
124}
125
126fn mask_non_code_regions(code: &str) -> String {
127    fn push_masked_char(masked: &mut String, ch: char) {
128        for _ in 0..ch.len_utf8() {
129            masked.push(' ');
130        }
131    }
132
133    let mut masked = String::with_capacity(code.len());
134    let mut in_single_quote = false;
135    let mut in_double_quote = false;
136    let mut in_line_comment = false;
137    let mut escaped = false;
138
139    for ch in code.chars() {
140        if in_line_comment {
141            if ch == '\n' {
142                in_line_comment = false;
143                masked.push('\n');
144            } else {
145                push_masked_char(&mut masked, ch);
146            }
147            continue;
148        }
149
150        if in_single_quote {
151            if escaped {
152                escaped = false;
153            } else if ch == '\\' {
154                escaped = true;
155            } else if ch == '\'' {
156                in_single_quote = false;
157            }
158            push_masked_char(&mut masked, ch);
159            continue;
160        }
161
162        if in_double_quote {
163            if escaped {
164                escaped = false;
165            } else if ch == '\\' {
166                escaped = true;
167            } else if ch == '"' {
168                in_double_quote = false;
169            }
170            push_masked_char(&mut masked, ch);
171            continue;
172        }
173
174        match ch {
175            '#' => {
176                in_line_comment = true;
177                push_masked_char(&mut masked, ch);
178            }
179            '\'' => {
180                in_single_quote = true;
181                push_masked_char(&mut masked, ch);
182            }
183            '"' => {
184                in_double_quote = true;
185                push_masked_char(&mut masked, ch);
186            }
187            _ => masked.push(ch),
188        }
189    }
190
191    masked
192}
193
194// Format heredoc detector
195struct FormatHeredocDetector;
196
197/// Pattern for identifying format declarations
198static FORMAT_PATTERN: LazyLock<Regex> =
199    LazyLock::new(|| match Regex::new(r"(?m)^\s*format\s+(\w+)\s*=\s*$") {
200        Ok(re) => re,
201        Err(_) => unreachable!("FORMAT_PATTERN regex failed to compile"),
202    });
203
204impl PatternDetector for FormatHeredocDetector {
205    fn detect(
206        &self,
207        code: &str,
208        offset: usize,
209        line_starts: &[usize],
210    ) -> Vec<(AntiPattern, Location)> {
211        let mut results = Vec::new();
212        let scan_code = mask_non_code_regions(code);
213
214        for cap in FORMAT_PATTERN.captures_iter(&scan_code) {
215            if let (Some(match_pos), Some(name_match)) = (cap.get(0), cap.get(1)) {
216                let format_name = name_match.as_str().to_string();
217                let location = location_from_start(line_starts, offset, match_pos.start());
218
219                // Look for heredoc marker inside format body (simplified)
220                let body_start = match_pos.end();
221                let body_end = code[body_start..].find("\n.").unwrap_or(code.len() - body_start);
222                let body = &scan_code[body_start..body_start + body_end];
223
224                if body.contains("<<") {
225                    results.push((
226                        AntiPattern::FormatHeredoc {
227                            location: location.clone(),
228                            format_name,
229                            heredoc_delimiter: "UNKNOWN".to_string(), // Would need better extraction
230                        },
231                        location,
232                    ));
233                }
234            }
235        }
236
237        results
238    }
239
240    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
241        let AntiPattern::FormatHeredoc { format_name, .. } = pattern else {
242            return None;
243        };
244
245        Some(Diagnostic {
246            severity: Severity::Warning,
247            pattern: pattern.clone(),
248            message: format!("Heredoc declared inside format '{}'", format_name),
249            explanation: "Heredocs inside format declarations are often handled specially by the Perl interpreter and can be difficult to parse statically.".to_string(),
250            suggested_fix: Some("Consider moving the heredoc outside the format or using a simple string if possible.".to_string()),
251            references: vec!["perldoc perlform".to_string()],
252        })
253    }
254}
255
256// BEGIN-time heredoc detector
257struct BeginTimeHeredocDetector;
258
259/// Pattern for identifying BEGIN block openings
260static BEGIN_BLOCK_START_PATTERN: LazyLock<Regex> =
261    LazyLock::new(|| match Regex::new(r"\bBEGIN\s*\{") {
262        Ok(re) => re,
263        Err(_) => unreachable!("BEGIN_BLOCK_START_PATTERN regex failed to compile"),
264    });
265
266fn find_matching_brace(code: &str, opening_brace_idx: usize) -> Option<usize> {
267    let bytes = code.as_bytes();
268    let mut depth = 0usize;
269    let mut in_single_quote = false;
270    let mut in_double_quote = false;
271    let mut escaped = false;
272
273    for (idx, &byte) in bytes.iter().enumerate().skip(opening_brace_idx) {
274        let ch = byte as char;
275
276        if escaped {
277            escaped = false;
278            continue;
279        }
280
281        if in_single_quote {
282            if ch == '\\' {
283                escaped = true;
284            } else if ch == '\'' {
285                in_single_quote = false;
286            }
287            continue;
288        }
289
290        if in_double_quote {
291            if ch == '\\' {
292                escaped = true;
293            } else if ch == '"' {
294                in_double_quote = false;
295            }
296            continue;
297        }
298
299        match ch {
300            '\'' => in_single_quote = true,
301            '"' => in_double_quote = true,
302            '{' => depth += 1,
303            '}' => {
304                if depth == 0 {
305                    return None;
306                }
307                depth -= 1;
308                if depth == 0 {
309                    return Some(idx);
310                }
311            }
312            _ => {}
313        }
314    }
315
316    None
317}
318
319impl PatternDetector for BeginTimeHeredocDetector {
320    fn detect(
321        &self,
322        code: &str,
323        offset: usize,
324        line_starts: &[usize],
325    ) -> Vec<(AntiPattern, Location)> {
326        let mut results = Vec::new();
327        let scan_code = mask_non_code_regions(code);
328
329        for begin_match in BEGIN_BLOCK_START_PATTERN.find_iter(&scan_code) {
330            let Some(opening_brace_rel) = begin_match.as_str().rfind('{') else {
331                continue;
332            };
333            let opening_brace_idx = begin_match.start() + opening_brace_rel;
334            let Some(closing_brace_idx) = find_matching_brace(&scan_code, opening_brace_idx) else {
335                continue;
336            };
337            let block_content = &scan_code[opening_brace_idx + 1..closing_brace_idx];
338
339            if !block_content.contains("<<") {
340                continue;
341            }
342
343            let location = location_from_start(line_starts, offset, begin_match.start());
344
345            results.push((
346                AntiPattern::BeginTimeHeredoc {
347                    location: location.clone(),
348                    heredoc_content: block_content.to_string(),
349                    side_effects: vec!["Phase-dependent parsing".to_string()],
350                },
351                location,
352            ));
353        }
354
355        results
356    }
357
358    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
359        if let AntiPattern::BeginTimeHeredoc { .. } = pattern {
360            Some(Diagnostic {
361                severity: Severity::Error,
362                pattern: pattern.clone(),
363                message: "Heredoc declared during BEGIN-time".to_string(),
364                explanation: "Heredocs declared inside BEGIN blocks are evaluated during the compilation phase. This can lead to complex side effects that are difficult to track statically.".to_string(),
365                suggested_fix: Some("Move the heredoc declaration out of the BEGIN block if it doesn't need to be evaluated during compilation.".to_string()),
366                references: vec!["perldoc perlmod".to_string()],
367            })
368        } else {
369            None
370        }
371    }
372}
373
374// Dynamic delimiter detector
375struct DynamicDelimiterDetector;
376
377/// Pattern for identifying dynamic heredoc delimiters
378static DYNAMIC_DELIMITER_PATTERN: LazyLock<Regex> =
379    LazyLock::new(|| match Regex::new(r"<<\s*\$\{[^}]+\}|<<\s*\$\w+|<<\s*`[^`]+`") {
380        Ok(re) => re,
381        Err(_) => unreachable!("DYNAMIC_DELIMITER_PATTERN regex failed to compile"),
382    });
383
384impl PatternDetector for DynamicDelimiterDetector {
385    fn detect(
386        &self,
387        code: &str,
388        offset: usize,
389        line_starts: &[usize],
390    ) -> Vec<(AntiPattern, Location)> {
391        let mut results = Vec::new();
392        let scan_code = mask_non_code_regions(code);
393
394        for cap in DYNAMIC_DELIMITER_PATTERN.captures_iter(&scan_code) {
395            if let Some(match_pos) = cap.get(0) {
396                let expression = match_pos.as_str().to_string();
397                let location = location_from_start(line_starts, offset, match_pos.start());
398
399                results.push((
400                    AntiPattern::DynamicHeredocDelimiter { location: location.clone(), expression },
401                    location,
402                ));
403            }
404        }
405
406        results
407    }
408
409    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
410        let AntiPattern::DynamicHeredocDelimiter { expression, .. } = pattern else {
411            return None;
412        };
413
414        Some(Diagnostic {
415            severity: Severity::Warning,
416            pattern: pattern.clone(),
417            message: format!("Dynamic heredoc delimiter: {}", expression),
418            explanation: "Using variables or expressions as heredoc delimiters makes it impossible to know the terminator without executing the code.".to_string(),
419            suggested_fix: Some("Use a literal string as the heredoc terminator.".to_string()),
420            references: vec!["perldoc perlop".to_string()],
421        })
422    }
423}
424
425// Source filter detector
426struct SourceFilterDetector;
427
428/// Pattern for identifying common source filter modules
429static SOURCE_FILTER_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
430    match Regex::new(r"use\s+Filter::(Simple|Util::Call|cpp|exec|sh|decrypt|tee)") {
431        Ok(re) => re,
432        Err(_) => unreachable!("SOURCE_FILTER_PATTERN regex failed to compile"),
433    }
434});
435
436impl PatternDetector for SourceFilterDetector {
437    fn detect(
438        &self,
439        code: &str,
440        offset: usize,
441        line_starts: &[usize],
442    ) -> Vec<(AntiPattern, Location)> {
443        let mut results = Vec::new();
444        let scan_code = mask_non_code_regions(code);
445
446        for cap in SOURCE_FILTER_PATTERN.captures_iter(&scan_code) {
447            if let (Some(match_pos), Some(module_match)) = (cap.get(0), cap.get(1)) {
448                let filter_module = module_match.as_str().to_string();
449                let location = location_from_start(line_starts, offset, match_pos.start());
450
451                results.push((
452                    AntiPattern::SourceFilterHeredoc {
453                        location: location.clone(),
454                        module: filter_module,
455                    },
456                    location,
457                ));
458            }
459        }
460
461        results
462    }
463
464    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
465        let AntiPattern::SourceFilterHeredoc { module, .. } = pattern else {
466            return None;
467        };
468
469        Some(Diagnostic {
470            severity: Severity::Error,
471            pattern: pattern.clone(),
472            message: format!("Source filter detected: Filter::{}", module),
473            explanation: "Source filters rewrite the source code before it's parsed. Static analysis cannot reliably predict the state of the code after filtering.".to_string(),
474            suggested_fix: Some("Avoid using source filters. They are considered problematic and often replaced by better alternatives like Devel::Declare or modern Perl features.".to_string()),
475            references: vec!["perldoc Filter::Simple".to_string()],
476        })
477    }
478}
479
480// Regex heredoc detector
481struct RegexHeredocDetector;
482
483/// Pattern for identifying heredocs inside regex code blocks
484static REGEX_HEREDOC_PATTERN: LazyLock<Regex> =
485    LazyLock::new(|| match Regex::new(r"\(\?\{[^}]*<<[^}]*\}") {
486        Ok(re) => re,
487        Err(_) => unreachable!("REGEX_HEREDOC_PATTERN regex failed to compile"),
488    });
489
490impl PatternDetector for RegexHeredocDetector {
491    fn detect(
492        &self,
493        code: &str,
494        offset: usize,
495        line_starts: &[usize],
496    ) -> Vec<(AntiPattern, Location)> {
497        let mut results = Vec::new();
498        let scan_code = mask_non_code_regions(code);
499
500        for cap in REGEX_HEREDOC_PATTERN.captures_iter(&scan_code) {
501            if let Some(match_pos) = cap.get(0) {
502                let location = location_from_start(line_starts, offset, match_pos.start());
503
504                results.push((
505                    AntiPattern::RegexCodeBlockHeredoc { location: location.clone() },
506                    location,
507                ));
508            }
509        }
510
511        results
512    }
513
514    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
515        if let AntiPattern::RegexCodeBlockHeredoc { .. } = pattern {
516            Some(Diagnostic {
517                severity: Severity::Warning,
518                pattern: pattern.clone(),
519                message: "Heredoc inside regex code block".to_string(),
520                explanation: "Declaring heredocs inside (?{ ... }) or (??{ ... }) blocks is extremely rare and difficult to parse correctly.".to_string(),
521                suggested_fix: None,
522                references: vec!["perldoc perlre".to_string()],
523            })
524        } else {
525            None
526        }
527    }
528}
529
530// Eval heredoc detector
531struct EvalHeredocDetector;
532
533/// Pattern for identifying heredocs inside eval strings
534static EVAL_HEREDOC_PATTERN: LazyLock<Regex> =
535    LazyLock::new(|| match Regex::new(r#"eval\s+(?:'[^']*<<[^']*'|"[^"]*<<[^"]*")"#) {
536        Ok(re) => re,
537        Err(_) => unreachable!("EVAL_HEREDOC_PATTERN regex failed to compile"),
538    });
539
540impl PatternDetector for EvalHeredocDetector {
541    fn detect(
542        &self,
543        code: &str,
544        offset: usize,
545        line_starts: &[usize],
546    ) -> Vec<(AntiPattern, Location)> {
547        let mut results = Vec::new();
548
549        for cap in EVAL_HEREDOC_PATTERN.captures_iter(code) {
550            if let Some(match_pos) = cap.get(0) {
551                let location = location_from_start(line_starts, offset, match_pos.start());
552
553                results.push((
554                    AntiPattern::EvalStringHeredoc { location: location.clone() },
555                    location,
556                ));
557            }
558        }
559
560        results
561    }
562
563    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
564        if let AntiPattern::EvalStringHeredoc { .. } = pattern {
565            Some(Diagnostic {
566                severity: Severity::Warning,
567                pattern: pattern.clone(),
568                message: "Heredoc inside eval string".to_string(),
569                explanation: "Heredocs declared inside strings passed to eval require double parsing and can hide malicious or complex code.".to_string(),
570                suggested_fix: Some("Consider using a block eval or moving the heredoc outside the eval string.".to_string()),
571                references: vec!["perldoc -f eval".to_string()],
572            })
573        } else {
574            None
575        }
576    }
577}
578
579// Tied handle detector
580struct TiedHandleDetector;
581
582/// Pattern for identifying tie statements
583static TIE_PATTERN: LazyLock<Regex> = LazyLock::new(|| match Regex::new(r"tie\s+([*$]\w+)") {
584    Ok(re) => re,
585    Err(_) => unreachable!("TIE_PATTERN regex failed to compile"),
586});
587
588/// Pattern for identifying print statements that write heredocs to a handle.
589static PRINT_HEREDOC_PATTERN: LazyLock<Regex> =
590    LazyLock::new(|| match Regex::new(r"print\s+([*$]?\w+)\s+<<") {
591        Ok(re) => re,
592        Err(_) => unreachable!("PRINT_HEREDOC_PATTERN regex failed to compile"),
593    });
594
595impl PatternDetector for TiedHandleDetector {
596    fn detect(
597        &self,
598        code: &str,
599        offset: usize,
600        line_starts: &[usize],
601    ) -> Vec<(AntiPattern, Location)> {
602        let mut results = Vec::new();
603        let scan_code = mask_non_code_regions(code);
604
605        // First collect tied handles in normalized form:
606        // *FH -> FH, $fh -> $fh.
607        let mut tied_handles = HashSet::new();
608        for cap in TIE_PATTERN.captures_iter(&scan_code) {
609            if let Some(handle_match) = cap.get(1) {
610                let raw_handle = handle_match.as_str();
611                let normalized = raw_handle.strip_prefix('*').unwrap_or(raw_handle);
612                tied_handles.insert(normalized.to_string());
613            }
614        }
615
616        // Use a single static regex for all print-heredoc matches, then filter
617        // by whether the handle is in the tied set. This avoids O(n) Regex
618        // compilations (one per tied handle) and is faster for large files.
619        for cap in PRINT_HEREDOC_PATTERN.captures_iter(&scan_code) {
620            let (Some(match_pos), Some(handle_match)) = (cap.get(0), cap.get(1)) else {
621                continue;
622            };
623
624            let raw_print_handle = handle_match.as_str();
625            let normalized_print_handle =
626                raw_print_handle.strip_prefix('*').unwrap_or(raw_print_handle);
627
628            if tied_handles.contains(normalized_print_handle) {
629                let location = location_from_start(line_starts, offset, match_pos.start());
630                results.push((
631                    AntiPattern::TiedHandleHeredoc {
632                        location: location.clone(),
633                        handle_name: normalized_print_handle.to_string(),
634                    },
635                    location,
636                ));
637            }
638        }
639
640        results
641    }
642
643    fn diagnose(&self, pattern: &AntiPattern) -> Option<Diagnostic> {
644        let AntiPattern::TiedHandleHeredoc { handle_name, .. } = pattern else {
645            return None;
646        };
647
648        Some(Diagnostic {
649            severity: Severity::Info,
650            pattern: pattern.clone(),
651            message: format!("Heredoc written to tied handle '{}'", handle_name),
652            explanation: "Writing to a tied handle invokes custom code. The behavior of heredoc output depends on the tied class implementation.".to_string(),
653            suggested_fix: None,
654            references: vec!["perldoc -f tie".to_string()],
655        })
656    }
657}
658
659impl Default for AntiPatternDetector {
660    fn default() -> Self {
661        Self::new()
662    }
663}
664
665impl AntiPatternDetector {
666    /// Create a detector pre-loaded with all seven built-in pattern checkers.
667    pub fn new() -> Self {
668        Self {
669            patterns: vec![
670                Box::new(FormatHeredocDetector),
671                Box::new(BeginTimeHeredocDetector),
672                Box::new(DynamicDelimiterDetector),
673                Box::new(SourceFilterDetector),
674                Box::new(RegexHeredocDetector),
675                Box::new(EvalHeredocDetector),
676                Box::new(TiedHandleDetector),
677            ],
678        }
679    }
680
681    /// Run all pattern checkers against `code` and return diagnostics sorted by offset.
682    pub fn detect_all(&self, code: &str) -> Vec<Diagnostic> {
683        let mut diagnostics = Vec::new();
684        let line_starts = build_line_starts(code);
685
686        for detector in &self.patterns {
687            let patterns = detector.detect(code, 0, &line_starts);
688            for (pattern, _) in patterns {
689                if let Some(diagnostic) = detector.diagnose(&pattern) {
690                    diagnostics.push(diagnostic);
691                }
692            }
693        }
694
695        diagnostics.sort_by_key(|d| match &d.pattern {
696            AntiPattern::FormatHeredoc { location, .. }
697            | AntiPattern::BeginTimeHeredoc { location, .. }
698            | AntiPattern::DynamicHeredocDelimiter { location, .. }
699            | AntiPattern::SourceFilterHeredoc { location, .. }
700            | AntiPattern::RegexCodeBlockHeredoc { location, .. }
701            | AntiPattern::EvalStringHeredoc { location, .. }
702            | AntiPattern::TiedHandleHeredoc { location, .. } => location.offset,
703        });
704
705        diagnostics
706    }
707
708    /// Format a list of diagnostics as a human-readable plain-text report.
709    ///
710    /// Prints a header, a count, and one entry per diagnostic including its
711    /// severity, location, explanation, optional suggested fix, and references.
712    pub fn format_report(&self, diagnostics: &[Diagnostic]) -> String {
713        let mut report = String::from("Anti-Pattern Analysis Report\n");
714        report.push_str("============================\n\n");
715
716        if diagnostics.is_empty() {
717            report.push_str("No problematic patterns detected.\n");
718            return report;
719        }
720
721        report.push_str(&format!("Found {} problematic patterns:\n\n", diagnostics.len()));
722
723        for (i, diag) in diagnostics.iter().enumerate() {
724            report.push_str(&format!(
725                "{}. {} ({})\n",
726                i + 1,
727                diag.message,
728                match diag.severity {
729                    Severity::Error => "ERROR",
730                    Severity::Warning => "WARNING",
731                    Severity::Info => "INFO",
732                }
733            ));
734
735            report.push_str(&format!(
736                "   Location: {}\n",
737                match &diag.pattern {
738                    AntiPattern::FormatHeredoc { location, .. }
739                    | AntiPattern::BeginTimeHeredoc { location, .. }
740                    | AntiPattern::DynamicHeredocDelimiter { location, .. }
741                    | AntiPattern::SourceFilterHeredoc { location, .. }
742                    | AntiPattern::RegexCodeBlockHeredoc { location, .. }
743                    | AntiPattern::EvalStringHeredoc { location, .. }
744                    | AntiPattern::TiedHandleHeredoc { location, .. } =>
745                        format!("line {}, column {}", location.line, location.column),
746                }
747            ));
748
749            report.push_str(&format!("   Explanation: {}\n", diag.explanation));
750
751            if let Some(fix) = &diag.suggested_fix {
752                report.push_str(&format!(
753                    "   Suggested fix:\n     {}\n",
754                    fix.lines().collect::<Vec<_>>().join("\n     ")
755                ));
756            }
757
758            if !diag.references.is_empty() {
759                report.push_str(&format!("   References: {}\n", diag.references.join(", ")));
760            }
761
762            report.push('\n');
763        }
764
765        report
766    }
767}
768
769#[cfg(test)]
770mod tests {
771    use super::*;
772
773    #[test]
774    fn test_format_heredoc_detection() {
775        let detector = AntiPatternDetector::new();
776        let code = r#"
777format REPORT =
778<<'END'
779Name: @<<<<<<<<<<<<
780$name
781END
782.
783"#;
784
785        let diagnostics = detector.detect_all(code);
786        // Note: DynamicDelimiterDetector might also flag the << inside the format body as a false positive.
787        // But FormatHeredoc should appear first because it starts at 'format'.
788        // So diagnostics[0] should be FormatHeredoc.
789        assert!(!diagnostics.is_empty());
790        assert!(matches!(diagnostics[0].pattern, AntiPattern::FormatHeredoc { .. }));
791    }
792
793    #[test]
794    fn test_begin_heredoc_detection() {
795        let detector = AntiPatternDetector::new();
796        let code = r###"
797BEGIN {
798    $config = <<'END';
799    server = localhost
800END
801}
802"###;
803
804        let diagnostics = detector.detect_all(code);
805        assert_eq!(diagnostics.len(), 1);
806        assert!(matches!(diagnostics[0].pattern, AntiPattern::BeginTimeHeredoc { .. }));
807    }
808
809    #[test]
810    fn test_begin_heredoc_detection_with_nested_braces() {
811        let detector = AntiPatternDetector::new();
812        let code = r###"
813BEGIN {
814    if ($ENV{DEV}) {
815        $config = <<'END';
816        server = localhost
817END
818    }
819}
820"###;
821
822        let diagnostics = detector.detect_all(code);
823        let begin_count = diagnostics
824            .iter()
825            .filter(|diag| matches!(diag.pattern, AntiPattern::BeginTimeHeredoc { .. }))
826            .count();
827        assert_eq!(begin_count, 1);
828    }
829
830    #[test]
831    fn test_dynamic_delimiter_detection() {
832        let detector = AntiPatternDetector::new();
833        let code = r###"
834my $delimiter = "EOF";
835my $content = <<$delimiter;
836This is dynamic
837EOF
838"###;
839
840        let diagnostics = detector.detect_all(code);
841        assert_eq!(diagnostics.len(), 1);
842        assert!(matches!(diagnostics[0].pattern, AntiPattern::DynamicHeredocDelimiter { .. }));
843    }
844
845    #[test]
846    fn test_source_filter_detection() {
847        let detector = AntiPatternDetector::new();
848        let code = r###"
849use Filter::Simple;
850print <<EOF;
851Filtered content
852EOF
853"###;
854        let diagnostics = detector.detect_all(code);
855        assert_eq!(diagnostics.len(), 1);
856        assert!(matches!(diagnostics[0].pattern, AntiPattern::SourceFilterHeredoc { .. }));
857    }
858
859    #[test]
860    fn test_regex_heredoc_detection() {
861        let detector = AntiPatternDetector::new();
862        let code = r###"
863m/pattern(?{
864    print <<'MATCH';
865    Match text
866MATCH
867})/
868"###;
869        let diagnostics = detector.detect_all(code);
870        assert_eq!(diagnostics.len(), 1);
871        assert!(matches!(diagnostics[0].pattern, AntiPattern::RegexCodeBlockHeredoc { .. }));
872    }
873
874    #[test]
875    fn test_eval_heredoc_detection() {
876        let detector = AntiPatternDetector::new();
877        let code = r###"
878eval 'print <<"EVAL";
879Eval content
880EVAL';
881"###;
882        let diagnostics = detector.detect_all(code);
883        assert_eq!(diagnostics.len(), 1);
884        assert!(matches!(diagnostics[0].pattern, AntiPattern::EvalStringHeredoc { .. }));
885    }
886
887    #[test]
888    fn test_tied_handle_detection() {
889        let detector = AntiPatternDetector::new();
890        let code = r###"
891tie *FH, 'Tie::Handle';
892print FH <<'DATA';
893Tied output
894DATA
895"###;
896        let diagnostics = detector.detect_all(code);
897        assert_eq!(diagnostics.len(), 1);
898        assert!(matches!(diagnostics[0].pattern, AntiPattern::TiedHandleHeredoc { .. }));
899    }
900
901    #[test]
902    fn test_tied_scalar_handle_detection() {
903        let detector = AntiPatternDetector::new();
904        let code = r###"
905tie $fh, 'Tie::Handle';
906print $fh <<'DATA';
907Tied output
908DATA
909"###;
910        let diagnostics = detector.detect_all(code);
911        assert_eq!(diagnostics.len(), 1);
912        assert!(matches!(diagnostics[0].pattern, AntiPattern::TiedHandleHeredoc { .. }));
913    }
914
915    #[test]
916    fn test_tied_handle_reports_multiple_writes() {
917        let detector = AntiPatternDetector::new();
918        let code = r###"
919tie *FH, 'Tie::Handle';
920print FH <<'FIRST';
921One
922FIRST
923print FH <<'SECOND';
924Two
925SECOND
926"###;
927
928        let diagnostics = detector.detect_all(code);
929        let tied_handle_count = diagnostics
930            .iter()
931            .filter(|diag| matches!(diag.pattern, AntiPattern::TiedHandleHeredoc { .. }))
932            .count();
933        assert_eq!(tied_handle_count, 2);
934    }
935
936    #[test]
937    fn test_tied_handle_does_not_report_other_handles() {
938        // Regression: PRINT_HEREDOC_PATTERN must only flag handles in the tied set.
939        // Writing a heredoc to an *untied* handle (OTHER) must not produce a diagnostic.
940        let detector = AntiPatternDetector::new();
941        let code = r###"
942tie *FH, 'Tie::Handle';
943print OTHER <<'DATA';
944Not tied
945DATA
946"###;
947
948        let diagnostics = detector.detect_all(code);
949        let tied_handle_count = diagnostics
950            .iter()
951            .filter(|diag| matches!(diag.pattern, AntiPattern::TiedHandleHeredoc { .. }))
952            .count();
953        assert_eq!(tied_handle_count, 0);
954    }
955
956    #[test]
957    fn test_location_column_is_zero_based_for_new_line_matches() {
958        let detector = AntiPatternDetector::new();
959        let code = "my $x = 1;\nuse Filter::Simple;\n";
960
961        let diagnostics = detector.detect_all(code);
962        assert_eq!(diagnostics.len(), 1);
963
964        assert!(
965            matches!(diagnostics[0].pattern, AntiPattern::SourceFilterHeredoc { .. }),
966            "expected SourceFilterHeredoc pattern, got: {:?}",
967            diagnostics[0].pattern
968        );
969        let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
970            return;
971        };
972
973        assert_eq!(location.line, 1);
974        assert_eq!(location.column, 0);
975        assert_eq!(location.offset, 11);
976    }
977
978    #[test]
979    fn test_location_first_byte_is_line_zero_column_zero() {
980        // A match at byte offset 0 must report line=0, column=0.
981        let detector = AntiPatternDetector::new();
982        let code = "use Filter::Simple;\n";
983
984        let diagnostics = detector.detect_all(code);
985        assert_eq!(diagnostics.len(), 1);
986        let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
987            unreachable!("expected SourceFilterHeredoc");
988        };
989        assert_eq!(location.line, 0, "first-byte match must be on line 0");
990        assert_eq!(location.column, 0, "first-byte match must be at column 0");
991        assert_eq!(location.offset, 0);
992    }
993
994    #[test]
995    fn test_location_third_line_accurate() {
996        // Three-line file — match on line 2, column 0.
997        let detector = AntiPatternDetector::new();
998        // Line 0: "my $a = 1;\n"  (11 bytes, \n at index 10)
999        // Line 1: "my $b = 2;\n"  (11 bytes, \n at index 21)
1000        // Line 2: "use Filter::Simple;\n"
1001        let code = "my $a = 1;\nmy $b = 2;\nuse Filter::Simple;\n";
1002
1003        let diagnostics = detector.detect_all(code);
1004        assert_eq!(diagnostics.len(), 1);
1005        let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
1006            unreachable!("expected SourceFilterHeredoc");
1007        };
1008        assert_eq!(location.line, 2, "match on third line must report line 2");
1009        assert_eq!(location.column, 0, "match at start of line must report column 0");
1010        assert_eq!(location.offset, 22, "byte offset of third-line start");
1011    }
1012
1013    #[test]
1014    fn test_location_mid_line_column_nonzero() {
1015        // Match that does not start at column 0 must report the correct column.
1016        // Line 0: "# comment\n"      (10 bytes, \n at index 9)
1017        // Line 1: "    use Filter::Simple;\n"  — 4 leading spaces, match at column 4
1018        let detector = AntiPatternDetector::new();
1019        let code = "# comment\n    use Filter::Simple;\n";
1020
1021        let diagnostics = detector.detect_all(code);
1022        // The comment is masked; only SourceFilterHeredoc on line 1 should fire.
1023        assert_eq!(diagnostics.len(), 1);
1024        let AntiPattern::SourceFilterHeredoc { location, .. } = &diagnostics[0].pattern else {
1025            unreachable!("expected SourceFilterHeredoc");
1026        };
1027        assert_eq!(location.line, 1);
1028        assert_eq!(location.column, 4, "mid-line match must report correct column");
1029        assert_eq!(location.offset, 14, "byte offset = 10 (first line) + 4 spaces");
1030    }
1031
1032    #[test]
1033    fn test_source_filter_detection_ignores_comments_and_strings() {
1034        let detector = AntiPatternDetector::new();
1035        let code = r#"
1036# use Filter::Simple;
1037my $s = "use Filter::Simple";
1038"#;
1039
1040        let diagnostics = detector.detect_all(code);
1041        assert!(diagnostics.is_empty());
1042    }
1043
1044    #[test]
1045    fn test_begin_detection_ignores_comments_and_strings() {
1046        let detector = AntiPatternDetector::new();
1047        let code = r#"
1048# BEGIN { my $x = <<'END'; END }
1049my $s = "BEGIN { my $x = <<'END'; END }";
1050"#;
1051
1052        let diagnostics = detector.detect_all(code);
1053        assert!(diagnostics.is_empty());
1054    }
1055
1056    #[test]
1057    fn test_format_detection_handles_utf8_in_masked_regions() {
1058        let detector = AntiPatternDetector::new();
1059        let code = r#"# comment with emoji 😀
1060format REPORT =
1061<<'END'
1062Body
1063END
1064.
1065"#;
1066
1067        let diagnostics = detector.detect_all(code);
1068        assert!(
1069            diagnostics
1070                .iter()
1071                .any(|diag| matches!(diag.pattern, AntiPattern::FormatHeredoc { .. }))
1072        );
1073    }
1074}