Skip to main content

perl_dap_stack/
parser.rs

1//! Parser for Perl debugger stack trace output.
2//!
3//! This module provides utilities for parsing stack trace output from the Perl debugger
4//! into structured [`StackFrame`] representations.
5
6use crate::{Source, StackFrame, StackFramePresentationHint};
7use once_cell::sync::Lazy;
8use regex::Regex;
9use thiserror::Error;
10
11/// Errors that can occur during stack trace parsing.
12#[derive(Debug, Error)]
13pub enum StackParseError {
14    /// The input format was not recognized.
15    #[error("unrecognized stack frame format: {0}")]
16    UnrecognizedFormat(String),
17
18    /// A regex pattern failed to compile.
19    #[error("regex error: {0}")]
20    RegexError(#[from] regex::Error),
21}
22
23// Compiled regex patterns for stack trace parsing.
24// These patterns are extracted from the perl-dap debug_adapter.rs implementation.
25// Stored as Results to avoid panics; compile failure treated as "no match".
26
27/// Pattern for parsing context information from debugger output.
28/// Matches formats like:
29/// - `Package::func(file.pl:42):`
30/// - `main::(script.pl):42:`
31static CONTEXT_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
32    Regex::new(
33        r"^(?:(?P<func>[A-Za-z_][\w:]*+?)::(?:\((?P<file>[^:)]+):(?P<line>\d+)\):?|__ANON__)|main::(?:\()?(?P<file2>[^:)\s]+)(?:\))?:(?P<line2>\d+):?)",
34    )
35});
36
37/// Pattern for parsing standard stack frame output.
38/// Matches formats like:
39/// - `  @ = Package::func called from file 'path/file.pl' line 42`
40/// - `  #0  main::foo at script.pl line 10`
41static STACK_FRAME_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
42    Regex::new(
43        r"^\s*#?\s*(?P<frame>\d+)?\s+(?P<func>[A-Za-z_][\w:]*+?)(?:\s+called)?\s+at\s+(?P<file>[^\s]+)\s+line\s+(?P<line>\d+)",
44    )
45});
46
47/// Pattern for Perl debugger 'T' command output (verbose backtrace).
48/// Matches formats like:
49/// - `$ = My::Module::method(arg1, arg2) called from file `/path/file.pm' line 123`
50static VERBOSE_FRAME_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
51    Regex::new(
52        r"^\s*[\$\@\.]\s*=\s*(?P<func>[A-Za-z_][\w:]*+?)\((?P<args>.*?)\)\s+called\s+from\s+file\s+[`'](?P<file>[^'`]+)[`']\s+line\s+(?P<line>\d+)",
53    )
54});
55
56/// Pattern for simple 'T' command format.
57/// Matches formats like:
58/// - `. = My::Module::method() called from '-e' line 1`
59static SIMPLE_FRAME_RE: Lazy<Result<Regex, regex::Error>> = Lazy::new(|| {
60    Regex::new(
61        r"^\s*[\$\@\.]\s*=\s*(?P<func>[A-Za-z_][\w:]*+?)\s*\(\)\s+called\s+from\s+[`'](?P<file>[^'`]+)[`']\s+line\s+(?P<line>\d+)",
62    )
63});
64
65/// Pattern for eval context in stack traces.
66/// Matches formats like:
67/// - `(eval 10)[/path/file.pm:42]`
68static EVAL_CONTEXT_RE: Lazy<Result<Regex, regex::Error>> =
69    Lazy::new(|| Regex::new(r"^\(eval\s+(?P<eval_num>\d+)\)\[(?P<file>[^\]:]+):(?P<line>\d+)\]"));
70
71// Accessor functions for regexes
72fn context_re() -> Option<&'static Regex> {
73    CONTEXT_RE.as_ref().ok()
74}
75fn stack_frame_re() -> Option<&'static Regex> {
76    STACK_FRAME_RE.as_ref().ok()
77}
78fn verbose_frame_re() -> Option<&'static Regex> {
79    VERBOSE_FRAME_RE.as_ref().ok()
80}
81fn simple_frame_re() -> Option<&'static Regex> {
82    SIMPLE_FRAME_RE.as_ref().ok()
83}
84fn eval_context_re() -> Option<&'static Regex> {
85    EVAL_CONTEXT_RE.as_ref().ok()
86}
87
88/// Parser for Perl debugger stack trace output.
89///
90/// This parser converts text output from the Perl debugger's stack trace
91/// commands (`T`, `y`, etc.) into structured [`StackFrame`] representations.
92#[derive(Debug, Default)]
93pub struct PerlStackParser {
94    /// Whether to include frames with no source location
95    include_unknown_frames: bool,
96    /// Whether to assign IDs automatically
97    auto_assign_ids: bool,
98    /// Starting ID for auto-assignment
99    next_id: i64,
100}
101
102impl PerlStackParser {
103    /// Creates a new stack parser with default settings.
104    #[must_use]
105    pub fn new() -> Self {
106        Self { include_unknown_frames: false, auto_assign_ids: true, next_id: 1 }
107    }
108
109    /// Sets whether to include frames with no source location.
110    #[must_use]
111    pub fn with_unknown_frames(mut self, include: bool) -> Self {
112        self.include_unknown_frames = include;
113        self
114    }
115
116    /// Sets whether to auto-assign frame IDs.
117    #[must_use]
118    pub fn with_auto_ids(mut self, auto: bool) -> Self {
119        self.auto_assign_ids = auto;
120        self
121    }
122
123    /// Sets the starting ID for auto-assignment.
124    #[must_use]
125    pub fn with_starting_id(mut self, id: i64) -> Self {
126        self.next_id = id;
127        self
128    }
129
130    /// Parses a single stack frame line.
131    ///
132    /// # Arguments
133    ///
134    /// * `line` - A line from stack trace output
135    /// * `id` - The frame ID to assign (ignored if auto_assign_ids is true)
136    ///
137    /// # Returns
138    ///
139    /// A parsed [`StackFrame`] if the line matches a known format.
140    pub fn parse_frame(&mut self, line: &str, id: i64) -> Option<StackFrame> {
141        let line = line.trim();
142
143        // Try verbose backtrace format first
144        if let Some(caps) = verbose_frame_re().and_then(|re| re.captures(line)) {
145            return self.build_frame_from_captures(&caps, id, true);
146        }
147
148        // Try simple frame format
149        if let Some(caps) = simple_frame_re().and_then(|re| re.captures(line)) {
150            return self.build_frame_from_captures(&caps, id, false);
151        }
152
153        // Try standard stack frame format
154        if let Some(caps) = stack_frame_re().and_then(|re| re.captures(line)) {
155            return self.build_frame_from_captures(&caps, id, false);
156        }
157
158        // Try context format
159        if let Some(caps) = context_re().and_then(|re| re.captures(line)) {
160            return self.build_frame_from_context(&caps, id);
161        }
162
163        // Try eval context
164        if let Some(caps) = eval_context_re().and_then(|re| re.captures(line)) {
165            return self.build_eval_frame(&caps, id);
166        }
167
168        None
169    }
170
171    /// Builds a frame from regex captures.
172    fn build_frame_from_captures(
173        &mut self,
174        caps: &regex::Captures<'_>,
175        provided_id: i64,
176        _has_args: bool,
177    ) -> Option<StackFrame> {
178        let func = caps.name("func")?.as_str();
179        let file = caps.name("file")?.as_str();
180        let line_str = caps.name("line")?.as_str();
181        let line: i64 = line_str.parse().ok()?;
182
183        // Use frame number from capture if available, otherwise use provided/auto ID
184        let id = if self.auto_assign_ids {
185            let id = self.next_id;
186            self.next_id += 1;
187            id
188        } else if let Some(frame_num) = caps.name("frame") {
189            frame_num.as_str().parse().unwrap_or(provided_id)
190        } else {
191            provided_id
192        };
193
194        let source = Source::new(file);
195        let frame = StackFrame::new(id, func, Some(source), line);
196
197        Some(frame)
198    }
199
200    /// Builds a frame from context regex captures.
201    fn build_frame_from_context(
202        &mut self,
203        caps: &regex::Captures<'_>,
204        provided_id: i64,
205    ) -> Option<StackFrame> {
206        // Get function name, defaulting to "main" if not present
207        let func = caps.name("func").map_or("main", |m| m.as_str());
208
209        // Get file from either capture group
210        let file = caps.name("file").or_else(|| caps.name("file2"))?.as_str();
211
212        // Get line from either capture group
213        let line_str = caps.name("line").or_else(|| caps.name("line2"))?.as_str();
214        let line: i64 = line_str.parse().ok()?;
215
216        let id = if self.auto_assign_ids {
217            let id = self.next_id;
218            self.next_id += 1;
219            id
220        } else {
221            provided_id
222        };
223
224        let source = Source::new(file);
225        let frame = StackFrame::new(id, func, Some(source), line);
226
227        Some(frame)
228    }
229
230    /// Builds an eval frame from regex captures.
231    fn build_eval_frame(
232        &mut self,
233        caps: &regex::Captures<'_>,
234        provided_id: i64,
235    ) -> Option<StackFrame> {
236        let eval_num = caps.name("eval_num")?.as_str();
237        let file = caps.name("file")?.as_str();
238        let line_str = caps.name("line")?.as_str();
239        let line: i64 = line_str.parse().ok()?;
240
241        let id = if self.auto_assign_ids {
242            let id = self.next_id;
243            self.next_id += 1;
244            id
245        } else {
246            provided_id
247        };
248
249        let name = format!("(eval {})", eval_num);
250        let source = Source::new(file).with_origin("eval");
251        let frame = StackFrame::new(id, name, Some(source), line)
252            .with_presentation_hint(StackFramePresentationHint::Label);
253
254        Some(frame)
255    }
256
257    /// Parses multi-line stack trace output.
258    ///
259    /// # Arguments
260    ///
261    /// * `output` - Multi-line debugger output from 'T' command
262    ///
263    /// # Returns
264    ///
265    /// A vector of parsed stack frames, ordered from innermost to outermost.
266    pub fn parse_stack_trace(&mut self, output: &str) -> Vec<StackFrame> {
267        // Reset auto-ID counter for new trace
268        if self.auto_assign_ids {
269            self.next_id = 1;
270        }
271
272        let frames: Vec<StackFrame> = output
273            .lines()
274            .filter_map(|line| {
275                let line = line.trim();
276                if line.is_empty() {
277                    return None;
278                }
279                self.parse_frame(line, 0)
280            })
281            .collect();
282
283        frames
284    }
285
286    /// Parses context information from a debugger prompt line.
287    ///
288    /// This is useful for determining the current execution position
289    /// from the debugger's status output.
290    ///
291    /// # Arguments
292    ///
293    /// * `line` - A line containing context information
294    ///
295    /// # Returns
296    ///
297    /// A tuple of (function, file, line) if parsed successfully.
298    pub fn parse_context(&self, line: &str) -> Option<(String, String, i64)> {
299        if let Some(caps) = context_re().and_then(|re| re.captures(line)) {
300            let func = caps.name("func").map_or("main", |m| m.as_str()).to_string();
301            let file = caps.name("file").or_else(|| caps.name("file2"))?.as_str().to_string();
302            let line_str = caps.name("line").or_else(|| caps.name("line2"))?.as_str();
303            let line: i64 = line_str.parse().ok()?;
304
305            return Some((func, file, line));
306        }
307
308        None
309    }
310
311    /// Determines if a line looks like a stack frame.
312    ///
313    /// This can be used for filtering lines before full parsing.
314    #[must_use]
315    pub fn looks_like_frame(line: &str) -> bool {
316        let line = line.trim();
317
318        // Check for common patterns
319        line.contains(" at ") && line.contains(" line ")
320            || line.contains(" called from ")
321            || line.starts_with('$') && line.contains(" = ")
322            || line.starts_with('@') && line.contains(" = ")
323            || line.starts_with('.') && line.contains(" = ")
324            || line.starts_with('#')
325    }
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn test_parse_standard_frame() {
334        use perl_tdd_support::must_some;
335        let mut parser = PerlStackParser::new();
336        let line = "  #0  main::foo at script.pl line 10";
337        let frame = must_some(parser.parse_frame(line, 0));
338        assert_eq!(frame.name, "main::foo");
339        assert_eq!(frame.line, 10);
340        assert_eq!(frame.file_path(), Some("script.pl"));
341    }
342
343    #[test]
344    fn test_parse_verbose_frame() {
345        use perl_tdd_support::must_some;
346        let mut parser = PerlStackParser::new();
347        let line =
348            "$ = My::Module::method('arg1', 'arg2') called from file `/lib/My/Module.pm' line 42";
349        let frame = must_some(parser.parse_frame(line, 0));
350        assert_eq!(frame.name, "My::Module::method");
351        assert_eq!(frame.line, 42);
352        assert_eq!(frame.file_path(), Some("/lib/My/Module.pm"));
353    }
354
355    #[test]
356    fn test_parse_simple_frame() {
357        use perl_tdd_support::must_some;
358        let mut parser = PerlStackParser::new();
359        let line = ". = main::run() called from '-e' line 1";
360        let frame = must_some(parser.parse_frame(line, 0));
361        assert_eq!(frame.name, "main::run");
362        assert_eq!(frame.line, 1);
363    }
364
365    #[test]
366    fn test_parse_context_with_package() {
367        use perl_tdd_support::must_some;
368        let mut parser = PerlStackParser::new();
369        // Use the standard frame format which is well-supported
370        let line = "  #0  My::Package::subname at file.pl line 25";
371        let frame = must_some(parser.parse_frame(line, 0));
372        assert_eq!(frame.name, "My::Package::subname");
373        assert_eq!(frame.line, 25);
374    }
375
376    #[test]
377    fn test_parse_context_main() {
378        use perl_tdd_support::must_some;
379        let mut parser = PerlStackParser::new();
380        let line = "main::(script.pl):42:";
381        let frame = must_some(parser.parse_frame(line, 0));
382        assert_eq!(frame.name, "main");
383        assert_eq!(frame.line, 42);
384    }
385
386    #[test]
387    fn test_parse_eval_context() {
388        use perl_tdd_support::must_some;
389        let mut parser = PerlStackParser::new();
390        let line = "(eval 10)[/path/to/file.pm:42]";
391        let frame = must_some(parser.parse_frame(line, 0));
392        assert!(frame.name.contains("eval 10"));
393        assert_eq!(frame.line, 42);
394        assert!(frame.source.as_ref().is_some_and(|s| s.is_eval()));
395    }
396
397    #[test]
398    fn test_parse_stack_trace_multi_line() {
399        let mut parser = PerlStackParser::new();
400        let output = r#"
401$ = My::Module::foo() called from file `/lib/My/Module.pm' line 10
402$ = My::Module::bar() called from file `/lib/My/Module.pm' line 20
403$ = main::run() called from file `script.pl' line 5
404"#;
405
406        let frames = parser.parse_stack_trace(output);
407
408        assert_eq!(frames.len(), 3);
409        assert_eq!(frames[0].name, "My::Module::foo");
410        assert_eq!(frames[1].name, "My::Module::bar");
411        assert_eq!(frames[2].name, "main::run");
412
413        // Check IDs are sequential
414        assert_eq!(frames[0].id, 1);
415        assert_eq!(frames[1].id, 2);
416        assert_eq!(frames[2].id, 3);
417    }
418
419    #[test]
420    fn test_parse_context_method() {
421        use perl_tdd_support::must_some;
422        let parser = PerlStackParser::new();
423
424        // The context regex expects formats like:
425        // Package::func::(file.pm:100): or main::(file.pm):100:
426        let result = must_some(parser.parse_context("main::(file.pm):100:"));
427
428        let (func, file, line) = result;
429        assert_eq!(func, "main");
430        assert_eq!(file, "file.pm");
431        assert_eq!(line, 100);
432    }
433
434    #[test]
435    fn test_looks_like_frame() {
436        assert!(PerlStackParser::looks_like_frame("  #0  main::foo at script.pl line 10"));
437        assert!(PerlStackParser::looks_like_frame("$ = foo() called from file 'x' line 1"));
438        assert!(!PerlStackParser::looks_like_frame("some random text"));
439        assert!(!PerlStackParser::looks_like_frame(""));
440    }
441
442    #[test]
443    fn test_auto_id_assignment() {
444        let mut parser = PerlStackParser::new().with_starting_id(100);
445
446        let frame1 = parser.parse_frame("  #0  main::foo at a.pl line 1", 0);
447        let frame2 = parser.parse_frame("  #1  main::bar at b.pl line 2", 0);
448
449        assert_eq!(frame1.map(|f| f.id), Some(100));
450        assert_eq!(frame2.map(|f| f.id), Some(101));
451    }
452
453    #[test]
454    fn test_manual_id_assignment() {
455        let mut parser = PerlStackParser::new().with_auto_ids(false);
456
457        let frame = parser.parse_frame("  #5  main::foo at a.pl line 1", 0);
458
459        // Should use the frame number from the capture
460        assert_eq!(frame.map(|f| f.id), Some(5));
461    }
462
463    #[test]
464    fn test_parse_unrecognized() {
465        let mut parser = PerlStackParser::new();
466
467        let frame = parser.parse_frame("this is not a stack frame", 0);
468        assert!(frame.is_none());
469    }
470}