Skip to main content

perl_dap_variables/
parser.rs

1//! Parser for Perl debugger variable output.
2//!
3//! This module provides utilities for parsing variable output from the Perl debugger
4//! into structured [`PerlValue`] representations.
5
6use crate::PerlValue;
7use once_cell::sync::Lazy;
8use regex::Regex;
9use thiserror::Error;
10
11/// Errors that can occur during variable parsing.
12#[derive(Debug, Error)]
13pub enum VariableParseError {
14    /// The input format was not recognized.
15    #[error("unrecognized variable format: {0}")]
16    UnrecognizedFormat(String),
17
18    /// A nested structure was too deep.
19    #[error("maximum nesting depth exceeded ({0})")]
20    MaxDepthExceeded(usize),
21
22    /// A string literal was not properly terminated.
23    #[error("unterminated string literal")]
24    UnterminatedString,
25
26    /// An array or hash was not properly closed.
27    #[error("unterminated collection")]
28    UnterminatedCollection,
29
30    /// A regex pattern failed to compile.
31    #[error("regex error: {0}")]
32    RegexError(#[from] regex::Error),
33}
34
35// Compiled regex patterns for variable parsing.
36// Stored as Results to avoid panics. All patterns are compile-time constants,
37// so failure is not expected, but this provides graceful degradation.
38
39static SCALAR_VAR_RE: Lazy<Result<Regex, regex::Error>> =
40    Lazy::new(|| Regex::new(r"^\s*(?P<name>[\$\@\%][\w:]+)\s*=\s*(?P<value>.*?)$"));
41
42static UNDEF_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| Regex::new(r"^undef$"));
43
44static INTEGER_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| Regex::new(r"^-?\d+$"));
45
46static NUMBER_RE: Lazy<Result<Regex, regex::Error>> =
47    Lazy::new(|| Regex::new(r"^-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?$"));
48
49static QUOTED_STRING_RE: Lazy<Result<Regex, regex::Error>> =
50    Lazy::new(|| Regex::new(r#"^'(?:[^'\\]|\\.)*'|^"(?:[^"\\]|\\.)*""#));
51
52static ARRAY_REF_RE: Lazy<Result<Regex, regex::Error>> =
53    Lazy::new(|| Regex::new(r"^ARRAY\(0x[0-9a-fA-F]+\)$"));
54
55static HASH_REF_RE: Lazy<Result<Regex, regex::Error>> =
56    Lazy::new(|| Regex::new(r"^HASH\(0x[0-9a-fA-F]+\)$"));
57
58static CODE_REF_RE: Lazy<Result<Regex, regex::Error>> =
59    Lazy::new(|| Regex::new(r"^CODE\(0x[0-9a-fA-F]+\)$"));
60
61static OBJECT_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
62    Regex::new(r"^(?P<class>[\w:]+)=(?P<type>ARRAY|HASH|SCALAR|GLOB)\(0x[0-9a-fA-F]+\)$")
63});
64
65static GLOB_RE: Lazy<Result<Regex, regex::Error>> =
66    Lazy::new(|| Regex::new(r"^\*(?P<name>[\w:]+)$"));
67
68/// Regex for parsing compiled regexp values (reserved for future use)
69#[allow(dead_code)]
70static REGEX_RE: Lazy<Result<Regex, regex::Error>> =
71    Lazy::new(|| Regex::new(r"^(?:\(\?(?P<flags>[xism-]*)(?:-[xism]+)?:)?(?P<pattern>.*?)\)?$"));
72
73// Accessor functions - return Option<&Regex>, treating compile failure as "no match"
74fn scalar_var_re() -> Option<&'static Regex> {
75    SCALAR_VAR_RE.as_ref().ok()
76}
77fn undef_re() -> Option<&'static Regex> {
78    UNDEF_RE.as_ref().ok()
79}
80fn integer_re() -> Option<&'static Regex> {
81    INTEGER_RE.as_ref().ok()
82}
83fn number_re() -> Option<&'static Regex> {
84    NUMBER_RE.as_ref().ok()
85}
86fn quoted_string_re() -> Option<&'static Regex> {
87    QUOTED_STRING_RE.as_ref().ok()
88}
89fn array_ref_re() -> Option<&'static Regex> {
90    ARRAY_REF_RE.as_ref().ok()
91}
92fn hash_ref_re() -> Option<&'static Regex> {
93    HASH_REF_RE.as_ref().ok()
94}
95fn code_ref_re() -> Option<&'static Regex> {
96    CODE_REF_RE.as_ref().ok()
97}
98fn object_re() -> Option<&'static Regex> {
99    OBJECT_RE.as_ref().ok()
100}
101fn glob_re() -> Option<&'static Regex> {
102    GLOB_RE.as_ref().ok()
103}
104
105/// Parser for Perl debugger variable output.
106///
107/// This parser converts text output from the Perl debugger's variable
108/// inspection commands into structured [`PerlValue`] representations.
109#[derive(Debug, Default)]
110pub struct VariableParser {
111    /// Maximum nesting depth for recursive parsing
112    max_depth: usize,
113}
114
115impl VariableParser {
116    /// Creates a new variable parser with default settings.
117    #[must_use]
118    pub fn new() -> Self {
119        Self { max_depth: 50 }
120    }
121
122    /// Sets the maximum nesting depth for parsing.
123    #[must_use]
124    pub fn with_max_depth(mut self, depth: usize) -> Self {
125        self.max_depth = depth;
126        self
127    }
128
129    /// Parses a variable assignment line from debugger output.
130    ///
131    /// # Arguments
132    ///
133    /// * `line` - A line like "$var = value" or "@arr = (1, 2, 3)"
134    ///
135    /// # Returns
136    ///
137    /// A tuple of (variable name, parsed value) if successful.
138    ///
139    /// # Errors
140    ///
141    /// Returns a [`VariableParseError`] if the line cannot be parsed.
142    pub fn parse_assignment(&self, line: &str) -> Result<(String, PerlValue), VariableParseError> {
143        let re = scalar_var_re()
144            .ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?;
145        if let Some(caps) = re.captures(line) {
146            let name = caps
147                .name("name")
148                .ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?
149                .as_str()
150                .to_string();
151            let value_str = caps
152                .name("value")
153                .ok_or_else(|| VariableParseError::UnrecognizedFormat(line.to_string()))?
154                .as_str();
155            let value = self.parse_value(value_str, 0)?;
156            Ok((name, value))
157        } else {
158            Err(VariableParseError::UnrecognizedFormat(line.to_string()))
159        }
160    }
161
162    /// Parses a value string from debugger output.
163    ///
164    /// # Arguments
165    ///
166    /// * `text` - The value portion of debugger output
167    ///
168    /// # Returns
169    ///
170    /// The parsed [`PerlValue`].
171    ///
172    /// # Errors
173    ///
174    /// Returns a [`VariableParseError`] if the value cannot be parsed.
175    pub fn parse_value(&self, text: &str, depth: usize) -> Result<PerlValue, VariableParseError> {
176        if depth > self.max_depth {
177            return Err(VariableParseError::MaxDepthExceeded(self.max_depth));
178        }
179
180        let text = text.trim();
181
182        // Check for undef
183        if undef_re().is_some_and(|re| re.is_match(text)) {
184            return Ok(PerlValue::Undef);
185        }
186
187        // Check for integer
188        if integer_re().is_some_and(|re| re.is_match(text)) {
189            if let Ok(i) = text.parse::<i64>() {
190                return Ok(PerlValue::Integer(i));
191            }
192        }
193
194        // Check for number
195        if number_re().is_some_and(|re| re.is_match(text)) {
196            if let Ok(n) = text.parse::<f64>() {
197                return Ok(PerlValue::Number(n));
198            }
199        }
200
201        // Check for quoted string
202        if quoted_string_re().is_some_and(|re| re.is_match(text)) {
203            let unquoted = self.unquote_string(text)?;
204            return Ok(PerlValue::Scalar(unquoted));
205        }
206
207        // Check for array reference notation
208        if array_ref_re().is_some_and(|re| re.is_match(text)) {
209            return Ok(PerlValue::Array(vec![]));
210        }
211
212        // Check for hash reference notation
213        if hash_ref_re().is_some_and(|re| re.is_match(text)) {
214            return Ok(PerlValue::Hash(vec![]));
215        }
216
217        // Check for code reference
218        if code_ref_re().is_some_and(|re| re.is_match(text)) {
219            return Ok(PerlValue::Code { name: None });
220        }
221
222        // Check for blessed object
223        if let Some(caps) = object_re().and_then(|re| re.captures(text)) {
224            let class = caps
225                .name("class")
226                .ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
227                .as_str()
228                .to_string();
229            let type_str = caps
230                .name("type")
231                .ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
232                .as_str();
233            let inner = match type_str {
234                "ARRAY" => PerlValue::Array(vec![]),
235                "HASH" => PerlValue::Hash(vec![]),
236                _ => PerlValue::Scalar(String::new()),
237            };
238            return Ok(PerlValue::Object { class, value: Box::new(inner) });
239        }
240
241        // Check for glob
242        if let Some(caps) = glob_re().and_then(|re| re.captures(text)) {
243            let name = caps
244                .name("name")
245                .ok_or_else(|| VariableParseError::UnrecognizedFormat(text.to_string()))?
246                .as_str()
247                .to_string();
248            return Ok(PerlValue::Glob(name));
249        }
250
251        // Check for array literal
252        if text.starts_with('(') && text.ends_with(')') {
253            return self.parse_array_literal(text, depth);
254        }
255
256        // Check for array bracket literal
257        if text.starts_with('[') && text.ends_with(']') {
258            return self.parse_array_literal(text, depth);
259        }
260
261        // Check for hash literal
262        if text.starts_with('{') && text.ends_with('}') {
263            return self.parse_hash_literal(text, depth);
264        }
265
266        // Treat as unquoted scalar (bareword or other)
267        Ok(PerlValue::Scalar(text.to_string()))
268    }
269
270    /// Parses an array literal like (1, 2, 3) or [1, 2, 3].
271    fn parse_array_literal(
272        &self,
273        text: &str,
274        depth: usize,
275    ) -> Result<PerlValue, VariableParseError> {
276        // Remove outer delimiters (works for both '(' and '[')
277        let inner = &text[1..text.len() - 1];
278
279        if inner.trim().is_empty() {
280            return Ok(PerlValue::Array(vec![]));
281        }
282
283        let elements = self.split_elements(inner)?;
284        let parsed: Result<Vec<PerlValue>, _> =
285            elements.iter().map(|e| self.parse_value(e, depth + 1)).collect();
286
287        Ok(PerlValue::Array(parsed?))
288    }
289
290    /// Parses a hash literal like {key => value, ...}.
291    fn parse_hash_literal(
292        &self,
293        text: &str,
294        depth: usize,
295    ) -> Result<PerlValue, VariableParseError> {
296        // Remove outer braces
297        let inner = &text[1..text.len() - 1];
298
299        if inner.trim().is_empty() {
300            return Ok(PerlValue::Hash(vec![]));
301        }
302
303        let elements = self.split_elements(inner)?;
304        let mut pairs = Vec::new();
305
306        for element in elements {
307            if let Some((key, value)) = element.split_once("=>") {
308                let key = self.unquote_key(key.trim());
309                let value = self.parse_value(value.trim(), depth + 1)?;
310                pairs.push((key, value));
311            } else {
312                // Treat as key with undef value
313                let key = self.unquote_key(element.trim());
314                pairs.push((key, PerlValue::Undef));
315            }
316        }
317
318        Ok(PerlValue::Hash(pairs))
319    }
320
321    /// Splits a comma-separated list while respecting nested structures.
322    fn split_elements(&self, text: &str) -> Result<Vec<String>, VariableParseError> {
323        let mut elements = Vec::new();
324        let mut current = String::new();
325        let mut paren_depth: u32 = 0;
326        let mut bracket_depth: u32 = 0;
327        let mut brace_depth: u32 = 0;
328        let mut in_string = false;
329        let mut string_char = ' ';
330        let mut escape_next = false;
331
332        for ch in text.chars() {
333            if escape_next {
334                current.push(ch);
335                escape_next = false;
336                continue;
337            }
338
339            if ch == '\\' {
340                current.push(ch);
341                escape_next = true;
342                continue;
343            }
344
345            if in_string {
346                current.push(ch);
347                if ch == string_char {
348                    in_string = false;
349                }
350                continue;
351            }
352
353            match ch {
354                '"' | '\'' => {
355                    current.push(ch);
356                    in_string = true;
357                    string_char = ch;
358                }
359                '(' => {
360                    current.push(ch);
361                    paren_depth += 1;
362                }
363                ')' => {
364                    current.push(ch);
365                    paren_depth = paren_depth.saturating_sub(1);
366                }
367                '[' => {
368                    current.push(ch);
369                    bracket_depth += 1;
370                }
371                ']' => {
372                    current.push(ch);
373                    bracket_depth = bracket_depth.saturating_sub(1);
374                }
375                '{' => {
376                    current.push(ch);
377                    brace_depth += 1;
378                }
379                '}' => {
380                    current.push(ch);
381                    brace_depth = brace_depth.saturating_sub(1);
382                }
383                ',' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
384                    let trimmed = current.trim().to_string();
385                    if !trimmed.is_empty() {
386                        elements.push(trimmed);
387                    }
388                    current = String::new();
389                }
390                _ => {
391                    current.push(ch);
392                }
393            }
394        }
395
396        // Add the last element
397        let trimmed = current.trim().to_string();
398        if !trimmed.is_empty() {
399            elements.push(trimmed);
400        }
401
402        Ok(elements)
403    }
404
405    /// Removes quotes from a string value.
406    fn unquote_string(&self, text: &str) -> Result<String, VariableParseError> {
407        if text.len() < 2 {
408            return Err(VariableParseError::UnterminatedString);
409        }
410
411        let first = text.chars().next();
412        let last = text.chars().next_back();
413
414        match (first, last) {
415            (Some('"'), Some('"')) | (Some('\''), Some('\'')) => {
416                let inner = &text[1..text.len() - 1];
417                Ok(self.unescape_string(inner))
418            }
419            _ => Ok(text.to_string()),
420        }
421    }
422
423    /// Removes quotes from a hash key (or returns as-is if not quoted).
424    fn unquote_key(&self, text: &str) -> String {
425        if text.len() >= 2 {
426            let first = text.chars().next();
427            let last = text.chars().next_back();
428
429            match (first, last) {
430                (Some('"'), Some('"')) | (Some('\''), Some('\'')) => {
431                    return self.unescape_string(&text[1..text.len() - 1]);
432                }
433                _ => {}
434            }
435        }
436        text.to_string()
437    }
438
439    /// Unescapes common escape sequences in a string.
440    fn unescape_string(&self, text: &str) -> String {
441        let mut result = String::with_capacity(text.len());
442        let mut chars = text.chars().peekable();
443
444        while let Some(ch) = chars.next() {
445            if ch == '\\' {
446                match chars.next() {
447                    Some('n') => result.push('\n'),
448                    Some('r') => result.push('\r'),
449                    Some('t') => result.push('\t'),
450                    Some('\\') => result.push('\\'),
451                    Some('"') => result.push('"'),
452                    Some('\'') => result.push('\''),
453                    Some(other) => {
454                        result.push('\\');
455                        result.push(other);
456                    }
457                    None => result.push('\\'),
458                }
459            } else {
460                result.push(ch);
461            }
462        }
463
464        result
465    }
466
467    /// Parses multiple variable lines (e.g., from 'V' command output).
468    ///
469    /// # Arguments
470    ///
471    /// * `output` - Multi-line debugger output
472    ///
473    /// # Returns
474    ///
475    /// A vector of (name, value) pairs for successfully parsed variables.
476    pub fn parse_variables(&self, output: &str) -> Vec<(String, PerlValue)> {
477        output.lines().filter_map(|line| self.parse_assignment(line).ok()).collect()
478    }
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484
485    #[test]
486    fn test_parse_undef() {
487        let parser = VariableParser::new();
488        let result = parser.parse_value("undef", 0);
489        assert!(matches!(result, Ok(PerlValue::Undef)));
490    }
491
492    #[test]
493    fn test_parse_integer() {
494        let parser = VariableParser::new();
495
496        let result = parser.parse_value("42", 0);
497        assert!(matches!(result, Ok(PerlValue::Integer(42))));
498
499        let result = parser.parse_value("-17", 0);
500        assert!(matches!(result, Ok(PerlValue::Integer(-17))));
501    }
502
503    #[test]
504    fn test_parse_number() {
505        let parser = VariableParser::new();
506
507        let result = parser.parse_value("3.25", 0);
508        assert!(matches!(result, Ok(PerlValue::Number(n)) if (n - 3.25).abs() < 0.001));
509
510        let result = parser.parse_value("1.5e10", 0);
511        assert!(matches!(result, Ok(PerlValue::Number(_))));
512    }
513
514    #[test]
515    fn test_parse_quoted_string() {
516        let parser = VariableParser::new();
517
518        let result = parser.parse_value("\"hello\"", 0);
519        assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s == "hello"));
520
521        let result = parser.parse_value("'world'", 0);
522        assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s == "world"));
523    }
524
525    #[test]
526    fn test_parse_string_with_escapes() {
527        let parser = VariableParser::new();
528
529        let result = parser.parse_value("\"line1\\nline2\"", 0);
530        assert!(matches!(result, Ok(PerlValue::Scalar(s)) if s.contains('\n')));
531    }
532
533    #[test]
534    fn test_parse_array_reference() {
535        let parser = VariableParser::new();
536
537        let result = parser.parse_value("ARRAY(0x1234abcd)", 0);
538        assert!(matches!(result, Ok(PerlValue::Array(_))));
539    }
540
541    #[test]
542    fn test_parse_hash_reference() {
543        let parser = VariableParser::new();
544
545        let result = parser.parse_value("HASH(0x5678abcd)", 0);
546        assert!(matches!(result, Ok(PerlValue::Hash(_))));
547    }
548
549    #[test]
550    fn test_parse_code_reference() {
551        let parser = VariableParser::new();
552
553        let result = parser.parse_value("CODE(0xdeadbeef)", 0);
554        assert!(matches!(result, Ok(PerlValue::Code { name: None })));
555    }
556
557    #[test]
558    fn test_parse_object() {
559        let parser = VariableParser::new();
560
561        let result = parser.parse_value("My::Class=HASH(0x1234)", 0);
562        assert!(matches!(result, Ok(PerlValue::Object { class, .. }) if class == "My::Class"));
563    }
564
565    #[test]
566    fn test_parse_glob() {
567        let parser = VariableParser::new();
568
569        let result = parser.parse_value("*main::foo", 0);
570        assert!(matches!(result, Ok(PerlValue::Glob(name)) if name == "main::foo"));
571    }
572
573    #[test]
574    fn test_parse_array_literal() {
575        let parser = VariableParser::new();
576
577        let result = parser.parse_value("(1, 2, 3)", 0);
578        assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.len() == 3));
579
580        let result = parser.parse_value("[1, 2, 3]", 0);
581        assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.len() == 3));
582
583        let result = parser.parse_value("()", 0);
584        assert!(matches!(result, Ok(PerlValue::Array(arr)) if arr.is_empty()));
585    }
586
587    #[test]
588    fn test_parse_hash_literal() {
589        let parser = VariableParser::new();
590
591        let result = parser.parse_value("{foo => 1, bar => 2}", 0);
592        assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.len() == 2));
593
594        let result = parser.parse_value("{}", 0);
595        assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.is_empty()));
596    }
597
598    #[test]
599    fn test_parse_assignment() {
600        let parser = VariableParser::new();
601
602        let result = parser.parse_assignment("$x = 42");
603        assert!(matches!(result, Ok((name, PerlValue::Integer(42))) if name == "$x"));
604
605        let result = parser.parse_assignment("@arr = (1, 2, 3)");
606        assert!(matches!(result, Ok((name, PerlValue::Array(_))) if name == "@arr"));
607
608        let result = parser.parse_assignment("%hash = {a => 1}");
609        assert!(matches!(result, Ok((name, PerlValue::Hash(_))) if name == "%hash"));
610    }
611
612    #[test]
613    fn test_parse_variables_multi_line() {
614        let parser = VariableParser::new();
615
616        let output = "$x = 1\n$y = 2\n$z = \"hello\"";
617        let vars = parser.parse_variables(output);
618
619        assert_eq!(vars.len(), 3);
620        assert_eq!(vars[0].0, "$x");
621        assert_eq!(vars[1].0, "$y");
622        assert_eq!(vars[2].0, "$z");
623    }
624
625    #[test]
626    fn test_max_depth_exceeded() {
627        let parser = VariableParser::new().with_max_depth(2);
628
629        // Create deeply nested structure
630        let result = parser.parse_value("(((1)))", 0);
631        assert!(matches!(result, Err(VariableParseError::MaxDepthExceeded(_))));
632    }
633
634    #[test]
635    fn test_parse_nested_structure() {
636        let parser = VariableParser::new();
637
638        let result = parser.parse_value("{arr => [1, 2], hash => {a => 1}}", 0);
639        assert!(matches!(result, Ok(PerlValue::Hash(pairs)) if pairs.len() == 2));
640    }
641}