Skip to main content

pest_debugger/
lib.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018-2022 DragoČ™ Tiselice, Tomas Tauber
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9//! # pest debugger
10//!
11//! This crate contains definitions for the debugger.
12//! A sample CLI-based debugger is available in `main.rs`.
13//! Other debugger frontends can be implemented using this
14//! crate's `DebuggerContext`:
15//!
16//! ```
17//! use pest_debugger::DebuggerContext;
18//! use std::sync::mpsc::sync_channel;
19//! let mut context = DebuggerContext::default();
20//!
21//! context
22//! .load_grammar_direct(
23//!     "testgrammar",
24//!     r#"alpha = { 'a'..'z' | 'A'..'Z' }
25//! digit = { '0'..'9' }
26//!
27//! ident = { !digit ~ (alpha | digit)+ }
28//!
29//! ident_list = _{ ident ~ (" " ~ ident)* }"#,
30//! ).expect("Error: failed to load grammar");
31//! context.load_input_direct("test test2".to_owned());
32//!
33//! let (sender, receiver) = sync_channel(1);
34//!
35//! context.add_breakpoint("ident".to_owned());
36//! for b in context.list_breakpoints().iter() {
37//!     println!("Breakpoint: {}", b);
38//! }
39//! context
40//! .run("ident_list", sender)
41//! .expect("Error: failed to run rule");
42//!
43//! let event = receiver.recv().expect("Error: failed to receive event");
44//! println!("Received a debugger event: {:?}", event);
45//!
46//! context.cont().expect("Error: failed to continue");
47//!
48//! let event = receiver.recv().expect("Error: failed to receive event");
49//! println!("Received a debugger event: {:?}", event);
50//! ```
51//! ## Current Limitations
52//! - relies on OS threads instead of stack-full generators
53//! - only shows position from the `ParserState` when it reaches a breakpoint
54//! - no way to run another rule from a breakpoint, only from the start
55#![doc(
56    html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg",
57    html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg"
58)]
59#![warn(missing_docs, rust_2018_idioms, unused_qualifications)]
60use std::{
61    collections::HashSet,
62    fs::File,
63    io::{self, Read},
64    path::Path,
65    sync::{
66        atomic::{AtomicBool, Ordering},
67        mpsc::SyncSender as Sender,
68        Arc, Mutex,
69    },
70    thread::{self, JoinHandle},
71};
72
73use pest::{error::Error, Position};
74use pest_meta::{
75    optimizer::OptimizedRule,
76    parse_and_optimize,
77    parser::{rename_meta_rule, Rule},
78};
79use pest_vm::Vm;
80
81/// Possible errors that can occur in the debugger context.
82#[derive(Debug, thiserror::Error)]
83pub enum DebuggerError {
84    /// Errors from opening files etc.
85    #[error("I/O error: {0}")]
86    Io(#[from] io::Error),
87    /// When a filename can't be extracted from a grammar path.
88    #[error("Missing filename")]
89    MissingFilename,
90    /// Running a debugger requires a grammar to be provided.
91    #[error("Open grammar first")]
92    GrammarNotOpened,
93    /// Running a debugger requires a parsing input to be provided.
94    #[error("Open input first")]
95    InputNotOpened,
96    /// Continuing a debugger session requires starting a session by running a rule.
97    #[error("Run rule first")]
98    RunRuleFirst,
99    /// Parsing finished (i.e. cannot continue the session).
100    #[error("End-of-input reached")]
101    EofReached,
102    /// Can't create a `Position` in a given input.
103    #[error("Invalid position: {0}")]
104    InvalidPosition(usize),
105    /// The provided grammar is invalid.
106    /// The first element contains a formatted error message.
107    /// The second element (`Vec`) contains the errors.
108    #[error("Grammar error: {0}")]
109    IncorrectGrammar(String, Vec<Error<Rule>>),
110    /// When restarting a session, the previous session
111    /// seem to have panicked.
112    #[error("Previous parsing execution panic: {0}")]
113    PreviousRunPanic(String),
114}
115
116/// Events that are sent from the debugger.
117#[derive(Debug, PartialEq, Eq)]
118pub enum DebuggerEvent {
119    /// A breakpoint encountered.
120    /// The first element is the rule name.
121    /// The second element is the position.
122    Breakpoint(String, usize),
123    /// The end of the input has been reached.
124    Eof,
125    /// A parsing error encountered.
126    Error(String),
127}
128
129/// Debugger for pest grammars.
130pub struct DebuggerContext {
131    handle: Option<JoinHandle<()>>,
132    is_done: Arc<AtomicBool>,
133    grammar: Option<Vec<OptimizedRule>>,
134    input: Option<String>,
135    breakpoints: Arc<Mutex<HashSet<String>>>,
136}
137
138const POISONED_LOCK_PANIC: &str = "poisoned lock";
139const CHANNEL_CLOSED_PANIC: &str = "channel closed";
140
141impl DebuggerContext {
142    fn file_to_string(path: impl AsRef<Path>) -> Result<String, DebuggerError> {
143        let mut file = File::open(path)?;
144
145        let mut string = String::new();
146        file.read_to_string(&mut string)?;
147
148        Ok(string)
149    }
150
151    /// Loads a grammar from a file.
152    pub fn load_grammar(&mut self, path: impl AsRef<Path>) -> Result<(), DebuggerError> {
153        let grammar = DebuggerContext::file_to_string(&path)?;
154
155        let file_name = path
156            .as_ref()
157            .file_name()
158            .map(|string| string.to_string_lossy().into_owned())
159            .ok_or(DebuggerError::MissingFilename)?;
160
161        self.grammar = Some(DebuggerContext::parse_grammar(&file_name, &grammar)?);
162
163        Ok(())
164    }
165
166    /// Loads a grammar from a string.
167    pub fn load_grammar_direct(
168        &mut self,
169        grammar_name: &str,
170        grammar: &str,
171    ) -> Result<(), DebuggerError> {
172        self.grammar = Some(DebuggerContext::parse_grammar(grammar_name, grammar)?);
173
174        Ok(())
175    }
176
177    /// Loads a parsing input from a file.
178    pub fn load_input(&mut self, path: impl AsRef<Path>) -> Result<(), DebuggerError> {
179        let input = DebuggerContext::file_to_string(path)?;
180
181        self.input = Some(input);
182
183        Ok(())
184    }
185
186    /// Loads a parsing input from a string.
187    pub fn load_input_direct(&mut self, input: String) {
188        self.input = Some(input);
189    }
190
191    /// Adds all grammar rules as breakpoints.
192    /// This is useful for stepping through the entire parsing process.
193    /// It returns an error if the grammar hasn't been loaded yet.
194    pub fn add_all_rules_breakpoints(&mut self) -> Result<(), DebuggerError> {
195        let ast = self
196            .grammar
197            .as_ref()
198            .ok_or(DebuggerError::GrammarNotOpened)?;
199        let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC);
200        for rule in ast {
201            breakpoints.insert(rule.name.clone());
202        }
203
204        Ok(())
205    }
206
207    /// Adds a rule to breakpoints.
208    pub fn add_breakpoint(&mut self, rule: String) {
209        let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC);
210
211        breakpoints.insert(rule);
212    }
213
214    /// Removes a rule from breakpoints.
215    pub fn delete_breakpoint(&mut self, rule: &str) {
216        let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC);
217
218        breakpoints.remove(rule);
219    }
220
221    /// Removes all breakpoints.
222    pub fn delete_all_breakpoints(&mut self) {
223        let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC);
224
225        breakpoints.clear();
226    }
227
228    /// Returns a list of all breakpoints.
229    pub fn list_breakpoints(&self) -> Vec<String> {
230        let breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC);
231        let mut breakpoints: Vec<_> = breakpoints.iter().map(ToOwned::to_owned).collect();
232        breakpoints.sort();
233        breakpoints
234    }
235
236    fn handle(
237        &self,
238        ast: Vec<OptimizedRule>,
239        rule: String,
240        input: String,
241        sender: Sender<DebuggerEvent>,
242    ) -> JoinHandle<()> {
243        let breakpoints = Arc::clone(&self.breakpoints);
244        let is_done = Arc::clone(&self.is_done);
245        let is_done_signal = Arc::clone(&self.is_done);
246
247        let rsender = sender.clone();
248        thread::spawn(move || {
249            let vm = Vm::new_with_listener(
250                ast,
251                Box::new(move |rule, pos| {
252                    if is_done_signal.load(Ordering::SeqCst) {
253                        return true;
254                    }
255
256                    let contains_rule = {
257                        let lock = breakpoints.lock().expect(POISONED_LOCK_PANIC);
258                        lock.contains(&rule)
259                    };
260
261                    if contains_rule {
262                        rsender
263                            .send(DebuggerEvent::Breakpoint(rule, pos.pos()))
264                            .expect(CHANNEL_CLOSED_PANIC);
265
266                        thread::park();
267                    }
268                    false
269                }),
270            );
271
272            match vm.parse(&rule, &input) {
273                Ok(_) => sender.send(DebuggerEvent::Eof).expect(CHANNEL_CLOSED_PANIC),
274                Err(error) => sender
275                    .send(DebuggerEvent::Error(error.to_string()))
276                    .expect(CHANNEL_CLOSED_PANIC),
277            };
278
279            is_done.store(true, Ordering::SeqCst);
280        })
281    }
282
283    fn parse_grammar(file_name: &str, grammar: &str) -> Result<Vec<OptimizedRule>, DebuggerError> {
284        match parse_and_optimize(grammar) {
285            Ok((_, ast)) => Ok(ast),
286            Err(errors) => {
287                let msg = format!(
288                    "error parsing {:?}\n\n{}",
289                    file_name,
290                    errors
291                        .iter()
292                        .cloned()
293                        .map(|error| format!("{}", error.renamed_rules(rename_meta_rule)))
294                        .collect::<Vec<_>>()
295                        .join("\n")
296                );
297                Err(DebuggerError::IncorrectGrammar(msg, errors))
298            }
299        }
300    }
301
302    /// Starts a debugger session: runs a rule on an input and stops at breakpoints.
303    /// When the debugger is stopped, an event is sent to the channel using `sender`.
304    /// The debugger can be resumed by calling `cont`.
305    /// This naturally returns errors if the grammar or input haven't been loaded yet etc.
306    pub fn run(&mut self, rule: &str, sender: Sender<DebuggerEvent>) -> Result<(), DebuggerError> {
307        if let Some(handle) = self.handle.take() {
308            if !(self.is_done.load(Ordering::Relaxed)) {
309                self.is_done.store(true, Ordering::SeqCst);
310                handle.thread().unpark();
311            }
312            handle
313                .join()
314                .map_err(|e| DebuggerError::PreviousRunPanic(format!("{:?}", e)))?;
315        }
316
317        self.is_done.store(false, Ordering::SeqCst);
318        let ast = self
319            .grammar
320            .as_ref()
321            .ok_or(DebuggerError::GrammarNotOpened)?;
322        match self.input {
323            Some(ref input) => {
324                let rule = rule.to_owned();
325                let input = input.clone();
326
327                self.handle = Some(self.handle(ast.clone(), rule, input, sender));
328                Ok(())
329            }
330            None => Err(DebuggerError::InputNotOpened),
331        }
332    }
333
334    /// Continue the debugger session from the breakpoint.
335    /// It returns an error if the session finished or wasn't started yet.
336    pub fn cont(&self) -> Result<(), DebuggerError> {
337        if self.is_done.load(Ordering::SeqCst) {
338            return Err(DebuggerError::EofReached);
339        }
340
341        match self.handle {
342            Some(ref handle) => {
343                handle.thread().unpark();
344                Ok(())
345            }
346            None => Err(DebuggerError::RunRuleFirst),
347        }
348    }
349
350    /// Returns a `Position` from the loaded input.
351    pub fn get_position(&self, pos: usize) -> Result<Position<'_>, DebuggerError> {
352        match self.input {
353            Some(ref input) => Position::new(input, pos).ok_or(DebuggerError::InvalidPosition(pos)),
354            None => Err(DebuggerError::InputNotOpened),
355        }
356    }
357}
358
359impl Default for DebuggerContext {
360    fn default() -> Self {
361        Self {
362            handle: None,
363            is_done: Arc::new(AtomicBool::new(false)),
364            grammar: None,
365            input: None,
366            breakpoints: Arc::new(Mutex::new(HashSet::new())),
367        }
368    }
369}
370
371#[cfg(test)]
372mod test {
373    use super::*;
374    use std::sync::mpsc::sync_channel;
375
376    fn get_test_context() -> DebuggerContext {
377        let mut context = DebuggerContext::default();
378
379        context
380            .load_grammar_direct(
381                "testgrammar",
382                r#"alpha = { 'a'..'z' | 'A'..'Z' }
383            digit = { '0'..'9' }
384            
385            ident = { !digit ~ (alpha | digit)+ }
386            
387            ident_list = _{ ident ~ (" " ~ ident)* }"#,
388            )
389            .expect("Error: failed to load grammar");
390        context.load_input_direct("test test2".to_owned());
391        context
392    }
393
394    #[test]
395    fn test_full_flow() {
396        let mut context = get_test_context();
397
398        let (sender, receiver) = sync_channel(1);
399
400        assert_eq!(context.list_breakpoints().len(), 0);
401        context.add_breakpoint("ident".to_owned());
402        assert_eq!(context.list_breakpoints().len(), 1);
403        context
404            .run("ident_list", sender)
405            .expect("Error: failed to run rule");
406
407        let event = receiver.recv().expect("Error: failed to receive event");
408        assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 0));
409
410        context.cont().expect("Error: failed to continue");
411
412        let event = receiver.recv().expect("Error: failed to receive event");
413        assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 5));
414        context.cont().expect("Error: failed to continue");
415        let event = receiver.recv().expect("Error: failed to receive event");
416
417        assert_eq!(event, DebuggerEvent::Eof);
418        context
419            .add_all_rules_breakpoints()
420            .expect("grammar is loaded");
421        assert_eq!(context.list_breakpoints().len(), 4);
422        context.delete_breakpoint("ident");
423        assert_eq!(context.list_breakpoints().len(), 3);
424        context.delete_all_breakpoints();
425        assert_eq!(context.list_breakpoints().len(), 0);
426    }
427
428    #[test]
429    fn test_restart() {
430        let mut context = get_test_context();
431
432        let (sender, receiver) = sync_channel(1);
433
434        assert_eq!(context.list_breakpoints().len(), 0);
435        context.add_breakpoint("ident".to_owned());
436        assert_eq!(context.list_breakpoints().len(), 1);
437        context
438            .run("ident_list", sender)
439            .expect("Error: failed to run rule");
440
441        let event = receiver.recv().expect("Error: failed to receive event");
442        assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 0));
443        let (sender2, receiver2) = sync_channel(1);
444
445        context
446            .run("ident_list", sender2)
447            .expect("Error: failed to run rule");
448        let event = receiver2.recv().expect("Error: failed to receive event");
449        assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 0));
450    }
451
452    #[test]
453    pub fn test_errors() {
454        let mut context = DebuggerContext::default();
455
456        assert!(context.load_input(".").is_err());
457        let pest_readme = concat!(env!("CARGO_MANIFEST_DIR"), "/../README.md");
458        let pest_grammar = concat!(env!("CARGO_MANIFEST_DIR"), "/../meta/src/grammar.pest");
459
460        assert!(context.load_grammar(pest_readme).is_err());
461        assert!(context.add_all_rules_breakpoints().is_err());
462        assert!(context.cont().is_err());
463        assert!(context.run("rule", sync_channel(1).0).is_err());
464        assert!(context.load_grammar(pest_grammar).is_ok());
465        assert!(context.run("rule", sync_channel(1).0).is_err());
466        assert!(context.get_position(0).is_err());
467        context.load_input_direct("".to_owned());
468        assert!(context.get_position(0).is_ok());
469        assert!(context.get_position(1).is_err());
470        assert!(context.load_input(pest_grammar).is_ok());
471        let (sender, _receiver) = sync_channel(1);
472        assert!(context.run("ANY", sender).is_ok());
473        while context.cont().is_ok() {}
474        assert!(context.cont().is_err());
475    }
476}