Skip to main content

pest_vm/
lib.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9//! # pest vm
10//!
11//! This crate run ASTs on-the-fly and is used by the fiddle and debugger.
12
13#![doc(
14    html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg",
15    html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg"
16)]
17#![warn(missing_docs, rust_2018_idioms, unused_qualifications)]
18
19use pest::error::Error;
20use pest::iterators::Pairs;
21use pest::{unicode, Position};
22use pest::{Atomicity, MatchDir, ParseResult, ParserState};
23use pest_meta::ast::RuleType;
24use pest_meta::optimizer::{OptimizedExpr, OptimizedRule};
25
26use std::collections::HashMap;
27use std::panic::{RefUnwindSafe, UnwindSafe};
28
29mod macros;
30
31/// A callback function that is called when a rule is matched.
32/// The first argument is the name of the rule and the second is the span of the rule.
33/// The function should return `true` if parsing should be terminated
34/// (if the new parsing session was started) or `false` otherwise.
35type ListenerFn =
36    Box<dyn Fn(String, &Position<'_>) -> bool + Sync + Send + RefUnwindSafe + UnwindSafe>;
37
38/// A virtual machine-like construct that runs an AST on-the-fly
39pub struct Vm {
40    rules: HashMap<String, OptimizedRule>,
41    listener: Option<ListenerFn>,
42}
43
44impl Vm {
45    /// Creates a new `Vm` from optimized rules
46    pub fn new(rules: Vec<OptimizedRule>) -> Vm {
47        let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect();
48        Vm {
49            rules,
50            listener: None,
51        }
52    }
53
54    /// Creates a new `Vm` from optimized rules
55    /// and a listener function that is called when a rule is matched.
56    /// (used by the `pest_debugger` crate)
57    pub fn new_with_listener(rules: Vec<OptimizedRule>, listener: ListenerFn) -> Vm {
58        let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect();
59        Vm {
60            rules,
61            listener: Some(listener),
62        }
63    }
64
65    /// Runs a parser rule on an input
66    #[allow(clippy::perf)]
67    pub fn parse<'a>(
68        &'a self,
69        rule: &'a str,
70        input: &'a str,
71    ) -> Result<Pairs<'a, &'a str>, Error<&'a str>> {
72        pest::state(input, |state| self.parse_rule(rule, state))
73    }
74
75    #[allow(clippy::suspicious)]
76    fn parse_rule<'a>(
77        &'a self,
78        rule: &'a str,
79        state: Box<ParserState<'a, &'a str>>,
80    ) -> ParseResult<Box<ParserState<'a, &'a str>>> {
81        if let Some(ref listener) = self.listener {
82            if listener(rule.to_owned(), state.position()) {
83                return Err(ParserState::new(state.position().line_of()));
84            }
85        }
86        match rule {
87            "ANY" => return state.skip(1),
88            "EOI" => return state.rule("EOI", |state| state.end_of_input()),
89            "SOI" => return state.start_of_input(),
90            "PEEK" => return state.stack_peek(),
91            "PEEK_ALL" => return state.stack_match_peek(),
92            "POP" => return state.stack_pop(),
93            "POP_ALL" => return state.stack_match_pop(),
94            "DROP" => return state.stack_drop(),
95            "ASCII_DIGIT" => return state.match_range('0'..'9'),
96            "ASCII_NONZERO_DIGIT" => return state.match_range('1'..'9'),
97            "ASCII_BIN_DIGIT" => return state.match_range('0'..'1'),
98            "ASCII_OCT_DIGIT" => return state.match_range('0'..'7'),
99            "ASCII_HEX_DIGIT" => {
100                return state
101                    .match_range('0'..'9')
102                    .or_else(|state| state.match_range('a'..'f'))
103                    .or_else(|state| state.match_range('A'..'F'));
104            }
105            "ASCII_ALPHA_LOWER" => return state.match_range('a'..'z'),
106            "ASCII_ALPHA_UPPER" => return state.match_range('A'..'Z'),
107            "ASCII_ALPHA" => {
108                return state
109                    .match_range('a'..'z')
110                    .or_else(|state| state.match_range('A'..'Z'));
111            }
112            "ASCII_ALPHANUMERIC" => {
113                return state
114                    .match_range('a'..'z')
115                    .or_else(|state| state.match_range('A'..'Z'))
116                    .or_else(|state| state.match_range('0'..'9'));
117            }
118            "ASCII" => return state.match_range('\x00'..'\x7f'),
119            "NEWLINE" => {
120                return state
121                    .match_string("\n")
122                    .or_else(|state| state.match_string("\r\n"))
123                    .or_else(|state| state.match_string("\r"));
124            }
125            _ => (),
126        };
127
128        if let Some(rule) = self.rules.get(rule) {
129            if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
130                match rule.ty {
131                    RuleType::Normal => state.rule(&rule.name, |state| {
132                        state.atomic(Atomicity::Atomic, |state| {
133                            self.parse_expr(&rule.expr, state)
134                        })
135                    }),
136                    RuleType::Silent => state.atomic(Atomicity::Atomic, |state| {
137                        self.parse_expr(&rule.expr, state)
138                    }),
139                    RuleType::Atomic => state.rule(&rule.name, |state| {
140                        state.atomic(Atomicity::Atomic, |state| {
141                            self.parse_expr(&rule.expr, state)
142                        })
143                    }),
144                    RuleType::CompoundAtomic => state.atomic(Atomicity::CompoundAtomic, |state| {
145                        state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
146                    }),
147                    RuleType::NonAtomic => state.atomic(Atomicity::Atomic, |state| {
148                        state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
149                    }),
150                }
151            } else {
152                match rule.ty {
153                    RuleType::Normal => {
154                        state.rule(&rule.name, move |state| self.parse_expr(&rule.expr, state))
155                    }
156                    RuleType::Silent => self.parse_expr(&rule.expr, state),
157                    RuleType::Atomic => state.rule(&rule.name, move |state| {
158                        state.atomic(Atomicity::Atomic, move |state| {
159                            self.parse_expr(&rule.expr, state)
160                        })
161                    }),
162                    RuleType::CompoundAtomic => {
163                        state.atomic(Atomicity::CompoundAtomic, move |state| {
164                            state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
165                        })
166                    }
167                    RuleType::NonAtomic => state.atomic(Atomicity::NonAtomic, move |state| {
168                        state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
169                    }),
170                }
171            }
172        } else {
173            if let Some(property) = unicode::by_name(rule) {
174                // std::boxed::Box<dyn std::ops::Fn(char) -> bool> is not FnOnce(char)->bool
175                return state.match_char_by(property);
176            }
177
178            panic!("undefined rule {}", rule);
179        }
180    }
181
182    fn parse_expr<'a>(
183        &'a self,
184        expr: &'a OptimizedExpr,
185        state: Box<ParserState<'a, &'a str>>,
186    ) -> ParseResult<Box<ParserState<'a, &'a str>>> {
187        match *expr {
188            OptimizedExpr::Str(ref string) => state.match_string(string),
189            OptimizedExpr::Insens(ref string) => state.match_insensitive(string),
190            OptimizedExpr::Range(ref start, ref end) => {
191                let start = start.chars().next().expect("empty char literal");
192                let end = end.chars().next().expect("empty char literal");
193
194                state.match_range(start..end)
195            }
196            OptimizedExpr::Ident(ref name) => self.parse_rule(name, state),
197            OptimizedExpr::PeekSlice(start, end) => {
198                state.stack_match_peek_slice(start, end, MatchDir::BottomToTop)
199            }
200            OptimizedExpr::PosPred(ref expr) => {
201                state.lookahead(true, |state| self.parse_expr(expr, state))
202            }
203            OptimizedExpr::NegPred(ref expr) => {
204                state.lookahead(false, |state| self.parse_expr(expr, state))
205            }
206            OptimizedExpr::Seq(ref lhs, ref rhs) => state.sequence(|state| {
207                self.parse_expr(lhs, state)
208                    .and_then(|state| self.skip(state))
209                    .and_then(|state| self.parse_expr(rhs, state))
210            }),
211            OptimizedExpr::Choice(ref lhs, ref rhs) => self
212                .parse_expr(lhs, state)
213                .or_else(|state| self.parse_expr(rhs, state)),
214            OptimizedExpr::Opt(ref expr) => state.optional(|state| self.parse_expr(expr, state)),
215            OptimizedExpr::Rep(ref expr) => state.sequence(|state| {
216                state.optional(|state| {
217                    self.parse_expr(expr, state).and_then(|state| {
218                        state.repeat(|state| {
219                            state.sequence(|state| {
220                                self.skip(state)
221                                    .and_then(|state| self.parse_expr(expr, state))
222                            })
223                        })
224                    })
225                })
226            }),
227            #[cfg(feature = "grammar-extras")]
228            OptimizedExpr::RepOnce(ref expr) => state.sequence(|state| {
229                self.parse_expr(expr, state).and_then(|state| {
230                    state.repeat(|state| {
231                        state.sequence(|state| {
232                            self.skip(state)
233                                .and_then(|state| self.parse_expr(expr, state))
234                        })
235                    })
236                })
237            }),
238            OptimizedExpr::Push(ref expr) => state.stack_push(|state| self.parse_expr(expr, state)),
239            #[cfg(feature = "grammar-extras")]
240            OptimizedExpr::PushLiteral(ref string) => state.stack_push_literal(string.to_owned()),
241            OptimizedExpr::Skip(ref strings) => state.skip_until(
242                &strings
243                    .iter()
244                    .map(|state| state.as_str())
245                    .collect::<Vec<&str>>(),
246            ),
247            #[cfg(feature = "grammar-extras")]
248            OptimizedExpr::NodeTag(ref expr, ref tag) => self
249                .parse_expr(expr, state)
250                .and_then(|state| state.tag_node(tag)),
251            OptimizedExpr::RestoreOnErr(ref expr) => {
252                state.restore_on_err(|state| self.parse_expr(expr, state))
253            }
254        }
255    }
256
257    fn skip<'a>(
258        &'a self,
259        state: Box<ParserState<'a, &'a str>>,
260    ) -> ParseResult<Box<ParserState<'a, &'a str>>> {
261        match (
262            self.rules.contains_key("WHITESPACE"),
263            self.rules.contains_key("COMMENT"),
264        ) {
265            (false, false) => Ok(state),
266            (true, false) => {
267                if state.atomicity() == Atomicity::NonAtomic {
268                    state.repeat(|state| self.parse_rule("WHITESPACE", state))
269                } else {
270                    Ok(state)
271                }
272            }
273            (false, true) => {
274                if state.atomicity() == Atomicity::NonAtomic {
275                    state.repeat(|state| self.parse_rule("COMMENT", state))
276                } else {
277                    Ok(state)
278                }
279            }
280            (true, true) => {
281                if state.atomicity() == Atomicity::NonAtomic {
282                    state.sequence(|state| {
283                        state
284                            .repeat(|state| self.parse_rule("WHITESPACE", state))
285                            .and_then(|state| {
286                                state.repeat(|state| {
287                                    state.sequence(|state| {
288                                        self.parse_rule("COMMENT", state).and_then(|state| {
289                                            state.repeat(|state| {
290                                                self.parse_rule("WHITESPACE", state)
291                                            })
292                                        })
293                                    })
294                                })
295                            })
296                    })
297                } else {
298                    Ok(state)
299                }
300            }
301        }
302    }
303}