1#![doc(
14 html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg",
15 html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg"
16)]
17#![warn(missing_docs, rust_2018_idioms, unused_qualifications)]
18
19use pest::error::Error;
20use pest::iterators::Pairs;
21use pest::{unicode, Position};
22use pest::{Atomicity, MatchDir, ParseResult, ParserState};
23use pest_meta::ast::RuleType;
24use pest_meta::optimizer::{OptimizedExpr, OptimizedRule};
25
26use std::collections::HashMap;
27use std::panic::{RefUnwindSafe, UnwindSafe};
28
29mod macros;
30
31type ListenerFn =
36 Box<dyn Fn(String, &Position<'_>) -> bool + Sync + Send + RefUnwindSafe + UnwindSafe>;
37
38pub struct Vm {
40 rules: HashMap<String, OptimizedRule>,
41 listener: Option<ListenerFn>,
42}
43
44impl Vm {
45 pub fn new(rules: Vec<OptimizedRule>) -> Vm {
47 let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect();
48 Vm {
49 rules,
50 listener: None,
51 }
52 }
53
54 pub fn new_with_listener(rules: Vec<OptimizedRule>, listener: ListenerFn) -> Vm {
58 let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect();
59 Vm {
60 rules,
61 listener: Some(listener),
62 }
63 }
64
65 #[allow(clippy::perf)]
67 pub fn parse<'a>(
68 &'a self,
69 rule: &'a str,
70 input: &'a str,
71 ) -> Result<Pairs<'a, &'a str>, Error<&'a str>> {
72 pest::state(input, |state| self.parse_rule(rule, state))
73 }
74
75 #[allow(clippy::suspicious)]
76 fn parse_rule<'a>(
77 &'a self,
78 rule: &'a str,
79 state: Box<ParserState<'a, &'a str>>,
80 ) -> ParseResult<Box<ParserState<'a, &'a str>>> {
81 if let Some(ref listener) = self.listener {
82 if listener(rule.to_owned(), state.position()) {
83 return Err(ParserState::new(state.position().line_of()));
84 }
85 }
86 match rule {
87 "ANY" => return state.skip(1),
88 "EOI" => return state.rule("EOI", |state| state.end_of_input()),
89 "SOI" => return state.start_of_input(),
90 "PEEK" => return state.stack_peek(),
91 "PEEK_ALL" => return state.stack_match_peek(),
92 "POP" => return state.stack_pop(),
93 "POP_ALL" => return state.stack_match_pop(),
94 "DROP" => return state.stack_drop(),
95 "ASCII_DIGIT" => return state.match_range('0'..'9'),
96 "ASCII_NONZERO_DIGIT" => return state.match_range('1'..'9'),
97 "ASCII_BIN_DIGIT" => return state.match_range('0'..'1'),
98 "ASCII_OCT_DIGIT" => return state.match_range('0'..'7'),
99 "ASCII_HEX_DIGIT" => {
100 return state
101 .match_range('0'..'9')
102 .or_else(|state| state.match_range('a'..'f'))
103 .or_else(|state| state.match_range('A'..'F'));
104 }
105 "ASCII_ALPHA_LOWER" => return state.match_range('a'..'z'),
106 "ASCII_ALPHA_UPPER" => return state.match_range('A'..'Z'),
107 "ASCII_ALPHA" => {
108 return state
109 .match_range('a'..'z')
110 .or_else(|state| state.match_range('A'..'Z'));
111 }
112 "ASCII_ALPHANUMERIC" => {
113 return state
114 .match_range('a'..'z')
115 .or_else(|state| state.match_range('A'..'Z'))
116 .or_else(|state| state.match_range('0'..'9'));
117 }
118 "ASCII" => return state.match_range('\x00'..'\x7f'),
119 "NEWLINE" => {
120 return state
121 .match_string("\n")
122 .or_else(|state| state.match_string("\r\n"))
123 .or_else(|state| state.match_string("\r"));
124 }
125 _ => (),
126 };
127
128 if let Some(rule) = self.rules.get(rule) {
129 if rule.name == "WHITESPACE" || rule.name == "COMMENT" {
130 match rule.ty {
131 RuleType::Normal => state.rule(&rule.name, |state| {
132 state.atomic(Atomicity::Atomic, |state| {
133 self.parse_expr(&rule.expr, state)
134 })
135 }),
136 RuleType::Silent => state.atomic(Atomicity::Atomic, |state| {
137 self.parse_expr(&rule.expr, state)
138 }),
139 RuleType::Atomic => state.rule(&rule.name, |state| {
140 state.atomic(Atomicity::Atomic, |state| {
141 self.parse_expr(&rule.expr, state)
142 })
143 }),
144 RuleType::CompoundAtomic => state.atomic(Atomicity::CompoundAtomic, |state| {
145 state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
146 }),
147 RuleType::NonAtomic => state.atomic(Atomicity::Atomic, |state| {
148 state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
149 }),
150 }
151 } else {
152 match rule.ty {
153 RuleType::Normal => {
154 state.rule(&rule.name, move |state| self.parse_expr(&rule.expr, state))
155 }
156 RuleType::Silent => self.parse_expr(&rule.expr, state),
157 RuleType::Atomic => state.rule(&rule.name, move |state| {
158 state.atomic(Atomicity::Atomic, move |state| {
159 self.parse_expr(&rule.expr, state)
160 })
161 }),
162 RuleType::CompoundAtomic => {
163 state.atomic(Atomicity::CompoundAtomic, move |state| {
164 state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
165 })
166 }
167 RuleType::NonAtomic => state.atomic(Atomicity::NonAtomic, move |state| {
168 state.rule(&rule.name, |state| self.parse_expr(&rule.expr, state))
169 }),
170 }
171 }
172 } else {
173 if let Some(property) = unicode::by_name(rule) {
174 return state.match_char_by(property);
176 }
177
178 panic!("undefined rule {}", rule);
179 }
180 }
181
182 fn parse_expr<'a>(
183 &'a self,
184 expr: &'a OptimizedExpr,
185 state: Box<ParserState<'a, &'a str>>,
186 ) -> ParseResult<Box<ParserState<'a, &'a str>>> {
187 match *expr {
188 OptimizedExpr::Str(ref string) => state.match_string(string),
189 OptimizedExpr::Insens(ref string) => state.match_insensitive(string),
190 OptimizedExpr::Range(ref start, ref end) => {
191 let start = start.chars().next().expect("empty char literal");
192 let end = end.chars().next().expect("empty char literal");
193
194 state.match_range(start..end)
195 }
196 OptimizedExpr::Ident(ref name) => self.parse_rule(name, state),
197 OptimizedExpr::PeekSlice(start, end) => {
198 state.stack_match_peek_slice(start, end, MatchDir::BottomToTop)
199 }
200 OptimizedExpr::PosPred(ref expr) => {
201 state.lookahead(true, |state| self.parse_expr(expr, state))
202 }
203 OptimizedExpr::NegPred(ref expr) => {
204 state.lookahead(false, |state| self.parse_expr(expr, state))
205 }
206 OptimizedExpr::Seq(ref lhs, ref rhs) => state.sequence(|state| {
207 self.parse_expr(lhs, state)
208 .and_then(|state| self.skip(state))
209 .and_then(|state| self.parse_expr(rhs, state))
210 }),
211 OptimizedExpr::Choice(ref lhs, ref rhs) => self
212 .parse_expr(lhs, state)
213 .or_else(|state| self.parse_expr(rhs, state)),
214 OptimizedExpr::Opt(ref expr) => state.optional(|state| self.parse_expr(expr, state)),
215 OptimizedExpr::Rep(ref expr) => state.sequence(|state| {
216 state.optional(|state| {
217 self.parse_expr(expr, state).and_then(|state| {
218 state.repeat(|state| {
219 state.sequence(|state| {
220 self.skip(state)
221 .and_then(|state| self.parse_expr(expr, state))
222 })
223 })
224 })
225 })
226 }),
227 #[cfg(feature = "grammar-extras")]
228 OptimizedExpr::RepOnce(ref expr) => state.sequence(|state| {
229 self.parse_expr(expr, state).and_then(|state| {
230 state.repeat(|state| {
231 state.sequence(|state| {
232 self.skip(state)
233 .and_then(|state| self.parse_expr(expr, state))
234 })
235 })
236 })
237 }),
238 OptimizedExpr::Push(ref expr) => state.stack_push(|state| self.parse_expr(expr, state)),
239 #[cfg(feature = "grammar-extras")]
240 OptimizedExpr::PushLiteral(ref string) => state.stack_push_literal(string.to_owned()),
241 OptimizedExpr::Skip(ref strings) => state.skip_until(
242 &strings
243 .iter()
244 .map(|state| state.as_str())
245 .collect::<Vec<&str>>(),
246 ),
247 #[cfg(feature = "grammar-extras")]
248 OptimizedExpr::NodeTag(ref expr, ref tag) => self
249 .parse_expr(expr, state)
250 .and_then(|state| state.tag_node(tag)),
251 OptimizedExpr::RestoreOnErr(ref expr) => {
252 state.restore_on_err(|state| self.parse_expr(expr, state))
253 }
254 }
255 }
256
257 fn skip<'a>(
258 &'a self,
259 state: Box<ParserState<'a, &'a str>>,
260 ) -> ParseResult<Box<ParserState<'a, &'a str>>> {
261 match (
262 self.rules.contains_key("WHITESPACE"),
263 self.rules.contains_key("COMMENT"),
264 ) {
265 (false, false) => Ok(state),
266 (true, false) => {
267 if state.atomicity() == Atomicity::NonAtomic {
268 state.repeat(|state| self.parse_rule("WHITESPACE", state))
269 } else {
270 Ok(state)
271 }
272 }
273 (false, true) => {
274 if state.atomicity() == Atomicity::NonAtomic {
275 state.repeat(|state| self.parse_rule("COMMENT", state))
276 } else {
277 Ok(state)
278 }
279 }
280 (true, true) => {
281 if state.atomicity() == Atomicity::NonAtomic {
282 state.sequence(|state| {
283 state
284 .repeat(|state| self.parse_rule("WHITESPACE", state))
285 .and_then(|state| {
286 state.repeat(|state| {
287 state.sequence(|state| {
288 self.parse_rule("COMMENT", state).and_then(|state| {
289 state.repeat(|state| {
290 self.parse_rule("WHITESPACE", state)
291 })
292 })
293 })
294 })
295 })
296 })
297 } else {
298 Ok(state)
299 }
300 }
301 }
302 }
303}