1use std::rc::Rc;
4
5type HashMap<K, V> = std::collections::HashMap::<K, V, crate::HashBuilder>;
6
7use crate::bnf::*;
8
9#[derive(Clone, Debug, Default)]
10pub struct ASTNode {
14 pub children : Option<Vec<ASTNode>>,
16 pub token_count : u32, pub text : u32,
26}
27
28impl ASTNode {
29 pub fn is_poisoned(&self) -> bool { self.token_count >= 0x80000000 }
30 pub fn get_real_token_count(&self) -> u32 { if self.token_count >= 0x80000000 { self.token_count ^ !0u32 } else { self.token_count } }
31}
32
33pub enum GuardResult {
58 #[allow(unused)] Accept,
60 #[allow(unused)] Reject,
62 #[allow(unused)] HardError(String)
64}
65
66#[derive(Default)]
68pub struct AnyMap { map: HashMap<std::any::TypeId, Box<dyn std::any::Any>> }
69
70impl AnyMap {
71 #[allow(unused)] pub fn insert<T: 'static>(&mut self, value: T) { self.map.insert(std::any::TypeId::of::<T>(), Box::new(value)); }
72 #[allow(unused)] pub fn get<T: 'static>(&self) -> Option<&T> { self.map.get(&std::any::TypeId::of::<T>())?.downcast_ref::<T>() }
73 #[allow(unused)] pub fn get_mut<T: 'static>(&mut self) -> Option<&mut T> { self.map.get_mut(&std::any::TypeId::of::<T>())?.downcast_mut::<T>() }
74}
75
76pub struct PrdGlobal<'a> {
78 pub (crate) guards : Rc<HashMap<String, Rc<dyn Fn(&mut PrdGlobal, &[Token], usize) -> GuardResult>>>,
79 pub (crate) hooks : Rc<HashMap<String, Rc<dyn Fn(&mut PrdGlobal, &[Token], usize, &mut Vec<ASTNode>) -> Result<usize, String>>>>,
80
81 #[allow(unused)] pub udata : AnyMap,
83 #[allow(unused)] pub udata_r : HashMap<usize, RegexCacher>,
85
86 #[allow(unused)] pub (crate) g : &'a Grammar,
87}
88
89pub (crate) fn pred_recdec_parse_impl_recursive(
90 global : &mut PrdGlobal,
91 gp_id : usize, tokens : &[Token], token_start : usize,
92 depth : usize
93) -> Result<ASTNode, String>
94{
95 const DEPTH_LIMIT : usize = 1000;
96 if depth > DEPTH_LIMIT { return Err(format!("Exceeded recursion depth limit of {DEPTH_LIMIT}.")); }
97 let mut g_item = &global.g.points[gp_id];
98 let mut chosen_name_id = g_item.name_id;
99
100 let mut children = vec!();
101 let mut i = token_start;
102
103 let mut poisoned = false;
104
105 #[cfg(feature = "parse_trace")] { println!("entered {} at {i}, depth {depth}", global.g.string_cache_inv[chosen_name_id as usize]); }
106
107 let mut alt_id = 0;
111 'top: while alt_id < g_item.forms.len()
112 {
113 let alt = &g_item.forms[alt_id];
114 alt_id += 1;
115
116 if alt.matching_terms.len() == 0
117 {
118 return Ok(ASTNode {
119 text : chosen_name_id,
120 children : Some(children),
121 token_count : (i - token_start) as u32,
122 });
123 }
124
125 let mut term_idx = 0;
126
127 let mut accepted = true;
128 let first_term = alt.matching_terms.get(0);
129 let first_term = first_term.as_ref().unwrap();
130 match &first_term.t
131 {
132 MatchingTermE::Guard(guard) =>
133 {
134 accepted = false;
135 if let Some(f) = global.guards.get(&**guard)
136 {
137 let f = Rc::clone(&f);
138 match f(global, tokens, i)
139 {
140 GuardResult::Accept => accepted = true,
141 GuardResult::HardError(e) => { return Err(e); }
142 _ => {}
143 }
144 }
145 else
146 {
147 return Err(format!("Unknown guard {guard}"));
148 }
149 term_idx += 1;
150 }
151 MatchingTermE::Peek(loc, tester) =>
152 {
153 accepted = false;
154 let loc = (i as isize + loc) as usize;
155 if loc < tokens.len() && tokens[loc].text == *tester
156 {
157 accepted = true;
158 }
159 term_idx += 1;
160 }
161 MatchingTermE::PeekR(loc, tester) =>
162 {
163 accepted = false;
164 let loc = (i as isize + loc) as usize;
165 if loc < tokens.len() && tester.is_match_interned(tokens[loc].text, &global.g.string_cache_inv)
167 {
168 accepted = true;
169 }
170 term_idx += 1;
171 }
172 MatchingTermE::PeekRes(loc, tester) =>
173 {
174 accepted = false;
175 let loc = (i as isize + loc) as usize;
176 if loc < tokens.len() && tester.is_match_interned(tokens[loc].text, &global.g.string_cache_inv)
178 {
179 accepted = true;
180 if let Some(r) = &global.g.reserved
181 {
182 if regex_is_match(r, &global.g.string_cache_inv[tokens[loc].text as usize]) { accepted = false; }
183 }
184 }
185 term_idx += 1;
186 }
187 MatchingTermE::Eof =>
188 {
189 accepted = i == tokens.len();
190 term_idx += 1;
191 }
192 _ => {}
193 }
194
195 if !accepted { continue; }
196
197 #[cfg(feature = "parse_trace")] { println!("chose variant {}", alt_id-1); }
198
199 if children.capacity() == 0
200 {
201 children.reserve_exact(alt.matching_terms.len());
202 }
203
204 while term_idx < alt.matching_terms.len()
205 {
206 let term = &alt.matching_terms[term_idx];
207 let mut matched = false;
208 match &term.t
209 {
210 MatchingTermE::Rule(id) =>
211 {
212 let mut child = pred_recdec_parse_impl_recursive(global, *id, tokens, i, depth + 1);
213 child = std::hint::black_box(child);
214 if child.is_err() && global.g.points[*id].recover.is_some()
215 {
216 if let Some((r, after)) = &global.g.points[*id].recover
217 {
218 let mut j = i + 1;
219 while j < tokens.len() && !r.is_match(&global.g.string_cache_inv[tokens[j].text as usize])
220 {
221 j += 1;
222 }
223 if j < tokens.len()
224 {
225 if *after { j += 1; }
226 child = Ok(ASTNode {
227 text : global.g.points[*id].name_id,
228 children : Some(vec!()),
229 token_count : (j - i) as u32 ^ !0u32,
230 });
231 }
232 }
233 }
234 let child = child.map_err(|e| format!("In {}: {e}", g_item.name))?;
235 if child.is_poisoned()
236 {
237 poisoned = true;
238 }
239 i += child.get_real_token_count() as usize;
240 children.push(child);
241 matched = true;
242 }
243 MatchingTermE::TermLit(lit) =>
244 {
245 if i < tokens.len() && tokens[i].text == *lit
246 {
247 if !alt.pruned
248 {
249 children.push(ASTNode {
250 text : tokens[i].text.clone(), children : None,
251 token_count : 1,
252 });
253 }
254 i += 1;
256 matched = true;
257 }
258 }
259 MatchingTermE::TermRegex(regex) => if i < tokens.len() && regex.is_match_interned(tokens[i].text, &global.g.string_cache_inv)
260 {
261 if !alt.pruned
262 {
263 children.push(ASTNode {
264 text : tokens[i].text.clone(), children : None,
265 token_count : 1,
266 });
267 }
268 i += 1;
270 matched = true;
271 }
272 MatchingTermE::Directive(d) =>
273 {
274 match d
275 {
276 MatchDirective::Become | MatchDirective::BecomeAs =>
277 {
278 if let Some(MatchingTermE::Rule(id)) = alt.matching_terms.get(term_idx + 1).map(|x| &x.t)
279 {
280 g_item = &global.g.points[*id];
281 #[cfg(feature = "parse_trace")] { println!("became {} at {i}, depth {depth}", g_item.name); }
282 alt_id = 0;
283 if matches!(d, MatchDirective::BecomeAs)
284 {
285 chosen_name_id = g_item.name_id;
286 }
287 continue 'top;
288 }
289 }
290 MatchDirective::Any => if i < tokens.len()
291 {
292 children.push(ASTNode {
293 text : tokens[i].text.clone(), children : None,
294 token_count : 1,
295 });
296 matched = true;
297 i += 1;
298 }
299 _ => panic!("TODO: {:?}", d), }
301 }
302 MatchingTermE::Hook(name) =>
303 {
304 if let Some(f) = global.hooks.get(&**name)
305 {
306 let f = Rc::clone(&f);
307 match f(global, tokens, i, &mut children)
308 {
309 Ok(consumed) => { i += consumed; }
310 Err(e) => Err(e)?
311 }
312 }
313 else
314 {
315 Err(format!("Unknown custom hook {:?} inside of {}", name, global.g.string_cache_inv[chosen_name_id as usize]))?
316 }
317 matched = true;
318 }
319 _ => Err(format!("Term type {:?} not supported in this position in a rule (context: {})", term, global.g.string_cache_inv[chosen_name_id as usize]))?
320 }
321 if !matched
322 {
323 Err(format!("Failed to match token at {i} in rule {} alt {alt_id}. Token is `{:?}`.\n{:?}",
324 global.g.string_cache_inv[chosen_name_id as usize],
325 tokens.get(i).map(|x| global.g.string_cache_inv[x.text as usize].clone()),
326 tokens[token_start..tokens.len().min(token_start+15)].iter().map(|x| global.g.string_cache_inv[x.text as usize].clone()).collect::<Vec<_>>()))?
327 }
328 term_idx += 1;
329 }
330
331 #[cfg(feature = "parse_trace")] { println!("accepted {} from {token_start} to {i}, depth {depth}", global.g.string_cache_inv[chosen_name_id as usize]); }
332 let mut token_count = (i - token_start) as u32;
333 if poisoned
334 {
335 token_count = token_count ^ !0u32;
336 }
337 return Ok(ASTNode {
338 text : chosen_name_id,
339 children : Some(children),
340 token_count,
341 });
342 }
343
344 Err(format!("Failed to match rule {} at token position {token_start}\n{:?}", global.g.string_cache_inv[chosen_name_id as usize],
345 tokens[token_start..tokens.len().min(token_start+15)].iter().map(|x| global.g.string_cache_inv[x.text as usize].clone()).collect::<Vec<_>>()))
346}
347
348#[allow(unused)]
352pub fn visit_ast(n : &ASTNode, f : &mut dyn FnMut(&ASTNode) -> bool)
353{
354 let flag = f(n);
355 if flag && let Some(c) = &n.children
356 {
357 for c in c.iter()
358 {
359 visit_ast(c, f);
360 }
361 }
362}
363
364pub type Guard = Rc<dyn Fn(&mut PrdGlobal, &[Token], usize) -> GuardResult>;
369pub type Hook = Rc<dyn Fn(&mut PrdGlobal, &[Token], usize, &mut Vec<ASTNode>) -> Result<usize, String>>;
375
376#[allow(unused)]
377pub fn parse(
383 g : &Grammar, root_rule_name : &str, tokens : &[Token],
384 guards : Rc<HashMap<String, Guard>>,
385 hooks : Rc<HashMap<String, Hook>>,
386) -> Result<ASTNode, String>
387{
388 let gp_id = g.by_name.get(root_rule_name).unwrap();
389 let mut global = PrdGlobal {
390 guards,
391 hooks,
392 udata : <_>::default(),
393 udata_r : <_>::default(),
394 g,
395 };
396
397 if let Some(f) = global.hooks.get("init")
398 {
399 let f = Rc::clone(&f);
400 let _ = f(&mut global, tokens, 0, &mut vec!());
401 }
402
403 pred_recdec_parse_impl_recursive(&mut global, *gp_id, tokens, 0, 0)
404}
405
406
407#[allow(unused)]
408pub fn print_ast_pred_recdec(ast : &ASTNode, string_cache_inv : &Vec<Rc<String>>, indent : usize)
410{
411 print!("{}", " ".repeat(indent));
412 if let Some(c) = &ast.children
413 {
414 if ast.is_poisoned() { println!("{} (POISONED) {{", ast.text); } else
415 { println!("{} {{", string_cache_inv[ast.text as usize]); }
416 for c in c.iter()
417 {
418 print_ast_pred_recdec(c, string_cache_inv, indent+1);
419 }
420 print!("{}", " ".repeat(indent));
421 println!("}};");
422 }
423 else
424 {
425 println!("{}", string_cache_inv[ast.text as usize]);
426 }
427}