Skip to main content

perl_lsp_critic_parser/
lib.rs

1//! Parse Perl::Critic verbose output into structured records.
2//!
3//! Expected line format:
4//! `file:line:column:severity:policy:message`
5
6#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9#![warn(clippy::all)]
10
11/// A parsed Perl::Critic output line.
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct ParsedCriticLine {
14    /// Source file path.
15    pub file: String,
16    /// 1-indexed line number.
17    pub line: u32,
18    /// 1-indexed column number.
19    pub column: u32,
20    /// Numeric Perl::Critic severity.
21    pub severity: u8,
22    /// Perl::Critic policy identifier.
23    pub policy: String,
24    /// Human-readable violation message.
25    pub message: String,
26}
27
28/// Parse all valid Perl::Critic lines from a UTF-8 string.
29pub fn parse_perlcritic_output(output: &str) -> Vec<ParsedCriticLine> {
30    output.lines().filter_map(parse_perlcritic_line).collect()
31}
32
33/// Parse one Perl::Critic verbose output line.
34pub fn parse_perlcritic_line(line: &str) -> Option<ParsedCriticLine> {
35    if line.trim().is_empty() {
36        return None;
37    }
38
39    let parts: Vec<&str> = line.split(':').collect();
40
41    let mut numeric_idx = None;
42    let max_start = parts.len().saturating_sub(4);
43    for idx in 1..=max_start {
44        if parts.get(idx).and_then(|v| v.parse::<u32>().ok()).is_some()
45            && parts.get(idx + 1).and_then(|v| v.parse::<u32>().ok()).is_some()
46            && parts.get(idx + 2).and_then(|v| v.parse::<u8>().ok()).is_some()
47        {
48            numeric_idx = Some(idx);
49            break;
50        }
51    }
52
53    let start = numeric_idx?;
54    let file = parts[..start].join(":");
55    if file.is_empty() {
56        return None;
57    }
58
59    let line_num = parts[start].parse::<u32>().ok()?;
60    let column = parts[start + 1].parse::<u32>().ok()?;
61    let severity = parts[start + 2].parse::<u8>().ok()?;
62
63    let tail = parts[start + 3..].join(":");
64    let boundary = find_policy_message_boundary(&tail)?;
65
66    let policy = tail[..boundary].to_string();
67    let message = tail[boundary + 1..].to_string();
68
69    if policy.is_empty() || message.is_empty() {
70        return None;
71    }
72
73    Some(ParsedCriticLine { file, line: line_num, column, severity, policy, message })
74}
75
76fn find_policy_message_boundary(tail: &str) -> Option<usize> {
77    let bytes = tail.as_bytes();
78    for (idx, byte) in bytes.iter().enumerate() {
79        if *byte != b':' {
80            continue;
81        }
82
83        let prev_is_colon = idx > 0 && bytes[idx - 1] == b':';
84        let next_is_colon = idx + 1 < bytes.len() && bytes[idx + 1] == b':';
85        if prev_is_colon || next_is_colon {
86            continue;
87        }
88
89        let policy_candidate = &tail[..idx];
90        if is_valid_policy(policy_candidate) {
91            return Some(idx);
92        }
93    }
94
95    None
96}
97
98fn is_valid_policy(policy: &str) -> bool {
99    if policy.is_empty() {
100        return false;
101    }
102
103    for segment in policy.split("::") {
104        let mut chars = segment.chars();
105        let Some(first) = chars.next() else {
106            return false;
107        };
108        if !(first.is_ascii_alphabetic() || first == '_') {
109            return false;
110        }
111        if chars.any(|c| !(c.is_ascii_alphanumeric() || c == '_')) {
112            return false;
113        }
114    }
115
116    true
117}