1use std::rc::Rc;
4
5type HashMap<K, V> = std::collections::HashMap::<K, V, crate::HashBuilder>;
6
7use crate::bnf::*;
8
9#[derive(Clone, Debug, Default)]
10#[non_exhaustive]
18pub struct ASTNode {
19 pub children : Option<Vec<ASTNode>>,
21 pub token_count : u32, pub text : u32,
31}
32
33impl ASTNode {
34 pub fn new(children : Option<Vec<ASTNode>>, token_count : u32, text : u32) -> Self { Self { children, token_count, text } }
35 pub fn is_poisoned(&self) -> bool { self.token_count >= 0x80000000 }
36 pub fn get_real_token_count(&self) -> u32 { if self.token_count >= 0x80000000 { self.token_count ^ !0u32 } else { self.token_count } }
37}
38
39pub enum GuardResult {
64 #[allow(unused)] Accept,
66 #[allow(unused)] Reject,
68 #[allow(unused)] HardError(String)
70}
71
72#[derive(Default)]
74pub struct AnyMap { map: HashMap<std::any::TypeId, Box<dyn std::any::Any>> }
75
76impl AnyMap {
77 #[allow(unused)] pub fn insert<T: 'static>(&mut self, value: T) { self.map.insert(std::any::TypeId::of::<T>(), Box::new(value)); }
78 #[allow(unused)] pub fn get<T: 'static>(&self) -> Option<&T> { self.map.get(&std::any::TypeId::of::<T>())?.downcast_ref::<T>() }
79 #[allow(unused)] pub fn get_mut<T: 'static>(&mut self) -> Option<&mut T> { self.map.get_mut(&std::any::TypeId::of::<T>())?.downcast_mut::<T>() }
80}
81
82pub struct PrdGlobal<'a> {
84 pub (crate) guards : Rc<HashMap<String, Rc<dyn Fn(&mut PrdGlobal, &[Token], usize) -> GuardResult>>>,
85 pub (crate) hooks : Rc<HashMap<String, Rc<dyn Fn(&mut PrdGlobal, &[Token], usize, &mut Vec<ASTNode>) -> Result<usize, String>>>>,
86
87 #[allow(unused)] pub udata : AnyMap,
89 #[allow(unused)] pub udata_r : HashMap<usize, RegexCacher>,
91
92 #[allow(unused)] pub g : &'a Grammar,
93}
94
95#[derive(Clone, Debug)]
97pub struct PrdError {
98 #[allow(unused)] pub err_message : String,
100 #[allow(unused)] pub token_index : usize,
102 #[allow(unused)] pub rule : u32,
104 #[allow(unused)] pub in_alt : u16,
110 #[allow(unused)] pub alt_progress : Option<u16>,
114 #[allow(unused)] pub on_behalf_of_rule : u32,
116}
117
118pub (crate) fn pred_recdec_parse_impl_recursive(
119 global : &mut PrdGlobal,
120 gp_id : usize, tokens : &[Token], token_start : usize,
121 depth : usize
122) -> Result<ASTNode, PrdError>
123{
124 const DEPTH_LIMIT : usize = if cfg!(debug_assertions) { 300 } else { 1200 };
125 let mut g_item = &global.g.points[gp_id];
126 let mut chosen_name_id = g_item.name_id;
127
128 let mut children = vec!();
129 let mut i = token_start;
130
131 let mut poisoned = false;
132 let mut alt_id : usize = 0;
133
134 macro_rules! build_err { ($x:expr, $prog:expr) => {
135 Err(PrdError {
136 err_message : $x, token_index : i, rule : g_item.name_id, on_behalf_of_rule : chosen_name_id,
137 in_alt : alt_id.wrapping_sub(1) as u16, alt_progress : $prog
138 })
139 } }
140
141 if depth > DEPTH_LIMIT { return build_err!(format!("Exceeded recursion depth limit of {DEPTH_LIMIT}."), None); }
142
143 #[cfg(feature = "parse_trace")] { println!("entered {} at {i}, depth {depth}", global.g.string_cache_inv[chosen_name_id as usize]); }
144
145 'top: while alt_id < g_item.forms.len()
149 {
150 let alt = &g_item.forms[alt_id];
151 alt_id += 1;
152
153 if alt.matching_terms.len() == 0
154 {
155 return Ok(ASTNode::new(Some(children), (i - token_start) as u32, chosen_name_id));
156 }
157
158 let mut term_idx = 0;
159
160 let mut accepted = true;
161 let first_term = alt.matching_terms.get(0);
162 let first_term = first_term.as_ref().unwrap();
163 match &first_term.t
164 {
165 MatchingTermE::Guard(guard) =>
166 {
167 accepted = false;
168 if let Some(f) = global.guards.get(&**guard)
169 {
170 let f = Rc::clone(&f);
171 match f(global, tokens, i)
172 {
173 GuardResult::Accept => accepted = true,
174 GuardResult::HardError(e) => build_err!(e, None)?,
175 _ => {}
176 }
177 }
178 else
179 {
180 return build_err!(format!("Unknown guard {guard}"), None);
181 }
182 term_idx += 1;
183 }
184 MatchingTermE::Peek(loc, tester) =>
185 {
186 accepted = false;
187 let loc = (i as isize + loc) as usize;
188 if loc < tokens.len() && tokens[loc].text == *tester
189 {
190 accepted = true;
191 }
192 term_idx += 1;
193 }
194 MatchingTermE::PeekR(loc, tester) =>
195 {
196 accepted = false;
197 let loc = (i as isize + loc) as usize;
198 if loc < tokens.len() && tester.is_match_interned(tokens[loc].text, &global.g.string_cache_inv)
200 {
201 accepted = true;
202 }
203 term_idx += 1;
204 }
205 MatchingTermE::PeekRes(loc, tester) =>
206 {
207 accepted = false;
208 let loc = (i as isize + loc) as usize;
209 if loc < tokens.len() && tester.is_match_interned(tokens[loc].text, &global.g.string_cache_inv)
211 {
212 accepted = true;
213 if let Some(r) = &global.g.reserved
214 {
215 if regex_is_match(r, &global.g.string_cache_inv[tokens[loc].text as usize]) { accepted = false; }
216 }
217 }
218 term_idx += 1;
219 }
220 MatchingTermE::Eof =>
221 {
222 accepted = i == tokens.len();
223 term_idx += 1;
224 }
225 _ => {}
226 }
227
228 if !accepted { continue; }
229
230 #[cfg(feature = "parse_trace")] { println!("chose variant {}", alt_id-1); }
231
232 if children.capacity() == 0
233 {
234 children.reserve_exact(alt.matching_terms.len());
235 }
236
237 while term_idx < alt.matching_terms.len()
238 {
239 let term = &alt.matching_terms[term_idx];
240 let mut matched = false;
241 match &term.t
242 {
243 MatchingTermE::Rule(id) =>
244 {
245 let mut child = pred_recdec_parse_impl_recursive(global, *id, tokens, i, depth + 1);
246 child = std::hint::black_box(child);
247 if child.is_err() && global.g.points[*id].recover.is_some()
248 {
249 if let Some((r, after)) = &global.g.points[*id].recover
250 {
251 let mut j = i + 1;
252 while j < tokens.len() && !r.is_match(&global.g.string_cache_inv[tokens[j].text as usize])
253 {
254 j += 1;
255 }
256 if j < tokens.len()
257 {
258 if *after { j += 1; }
259 child = Ok(ASTNode::new(Some(vec!()), (j - i) as u32 ^ !0u32, global.g.points[*id].name_id));
260 }
261 }
262 }
263 const FAT_ERRS : bool = false;
264 if FAT_ERRS
265 {
266 child = child.map_err(|e| {
267 let mut e2 = e.clone(); e2.err_message = format!("In {}: {}", g_item.name, e2.err_message); e2
268 });
269 }
270 let child = child?;
271 if child.is_poisoned()
272 {
273 poisoned = true;
274 }
275 i += child.get_real_token_count() as usize;
276 children.push(child);
277 matched = true;
278 }
279 MatchingTermE::TermLit(lit) =>
280 {
281 if i < tokens.len() && tokens[i].text == *lit
282 {
283 if !alt.pruned
284 {
285 children.push(ASTNode::new(None, 1, tokens[i].text));
286 }
287 i += 1;
289 matched = true;
290 }
291 }
292 MatchingTermE::TermRegex(regex) => if i < tokens.len() && regex.is_match_interned(tokens[i].text, &global.g.string_cache_inv)
293 {
294 if !alt.pruned
295 {
296 children.push(ASTNode::new(None, 1, tokens[i].text));
297 }
298 i += 1;
300 matched = true;
301 }
302 MatchingTermE::Directive(d) =>
303 {
304 match d
305 {
306 MatchDirective::Become | MatchDirective::BecomeAs =>
307 {
308 if let Some(MatchingTermE::Rule(id)) = alt.matching_terms.get(term_idx + 1).map(|x| &x.t)
309 {
310 g_item = &global.g.points[*id];
311 #[cfg(feature = "parse_trace")] { println!("became {} at {i}, depth {depth}", g_item.name); }
312 alt_id = 0;
313 if matches!(d, MatchDirective::BecomeAs)
314 {
315 chosen_name_id = g_item.name_id;
316 }
317 continue 'top;
318 }
319 }
320 MatchDirective::Rename => if let Some(MatchingTermE::Rule(id)) = alt.matching_terms.get(term_idx + 1).map(|x| &x.t)
321 {
322 chosen_name_id = *id as u32;
323 term_idx += 1;
324 matched = true;
325 }
326 MatchDirective::Drop => if children.len() > 0
327 {
328 children.pop();
329 matched = true;
330 }
331 MatchDirective::DropIfEmpty => if children.len() > 0
332 {
333 if matches!(children.last().unwrap().children, Some(_))
334 {
335 children.pop();
336 }
337 matched = true;
338 }
339 MatchDirective::Hoist => if children.len() > 0
340 {
341 let x = children.pop().unwrap();
342 if let Some(mut c) = x.children
343 {
344 children.append(&mut c);
345 }
346 matched = true;
347 }
348 MatchDirective::HoistIfUnit => if children.len() > 0
349 {
350 let x = children.pop().unwrap();
351 if let Some(mut c) = x.children && c.len() == 1
352 {
353 children.append(&mut c);
354 }
355 matched = true;
356 }
357 MatchDirective::Any => if i < tokens.len()
358 {
359 children.push(ASTNode::new(None, 1, tokens[i].text));
360 matched = true;
361 i += 1;
362 }
363 }
364 }
365 MatchingTermE::Hook(name) =>
366 {
367 if let Some(f) = global.hooks.get(&**name)
368 {
369 let f = Rc::clone(&f);
370 match f(global, tokens, i, &mut children)
371 {
372 Ok(consumed) => { i += consumed; }
373 Err(e) => build_err!(e, Some(term_idx as u16))?
374 }
375 }
376 else
377 {
378 build_err!(
379 format!("Unknown custom hook {:?} inside of {}",
380 name, global.g.string_cache_inv[chosen_name_id as usize]
381 ),
382 Some(term_idx as u16)
383 )?
384 }
385 matched = true;
386 }
387 _ => build_err!(
388 format!("Term type {:?} not supported in this position in a rule (context: {})",
389 term, global.g.string_cache_inv[chosen_name_id as usize]
390 ),
391 Some(term_idx as u16)
392 )?
393 }
394 if !matched
395 {
396 build_err!(
397 format!("Failed to match token at {i} in rule {} alt {alt_id}. Token is `{:?}`.\n{:?}",
398 global.g.string_cache_inv[chosen_name_id as usize],
399 tokens.get(i).map(|x| global.g.string_cache_inv[x.text as usize].clone()),
400 tokens[token_start..tokens.len().min(token_start+15)].iter()
401 .map(|x| global.g.string_cache_inv[x.text as usize].clone()).collect::<Vec<_>>()
402 ),
403 Some(term_idx as u16)
404 )?
405 }
406 term_idx += 1;
407 }
408
409 #[cfg(feature = "parse_trace")] { println!("accepted {} from {token_start} to {i}, depth {depth}", global.g.string_cache_inv[chosen_name_id as usize]); }
410 let mut token_count = (i - token_start) as u32;
411 if poisoned
412 {
413 token_count = token_count ^ !0u32;
414 }
415 return Ok(ASTNode::new(Some(children), token_count, chosen_name_id));
416 }
417
418 build_err!(
419 format!("Failed to match rule {} at token position {token_start}\n{:?}", global.g.string_cache_inv[chosen_name_id as usize],
420 tokens[token_start..tokens.len().min(token_start+15)].iter().map(|x| global.g.string_cache_inv[x.text as usize].clone()).collect::<Vec<_>>()
421 ),
422 Some(g_item.forms.len() as u16)
423 )
424}
425
426#[allow(unused)]
430pub fn visit_ast(n : &ASTNode, f : &mut dyn FnMut(&ASTNode) -> bool)
431{
432 let flag = f(n);
433 if flag && let Some(c) = &n.children
434 {
435 for c in c.iter()
436 {
437 visit_ast(c, f);
438 }
439 }
440}
441
442pub type Guard = Rc<dyn Fn(&mut PrdGlobal, &[Token], usize) -> GuardResult>;
447pub type Hook = Rc<dyn Fn(&mut PrdGlobal, &[Token], usize, &mut Vec<ASTNode>) -> Result<usize, String>>;
453
454#[allow(unused)]
455pub fn parse(
461 g : &Grammar, root_rule_name : &str, tokens : &[Token],
462 guards : Rc<HashMap<String, Guard>>,
463 hooks : Rc<HashMap<String, Hook>>,
464) -> Result<ASTNode, PrdError>
465{
466 let gp_id = g.by_name.get(root_rule_name).unwrap();
467 let mut global = PrdGlobal {
468 guards,
469 hooks,
470 udata : <_>::default(),
471 udata_r : <_>::default(),
472 g,
473 };
474
475 if let Some(f) = global.hooks.get("init")
476 {
477 let f = Rc::clone(&f);
478 let _ = f(&mut global, tokens, 0, &mut vec!());
479 }
480
481 pred_recdec_parse_impl_recursive(&mut global, *gp_id, tokens, 0, 0)
482}
483
484
485#[allow(unused)]
486pub fn print_ast_pred_recdec(ast : &ASTNode, string_cache_inv : &Vec<Rc<String>>, indent : usize)
488{
489 print!("{}", " ".repeat(indent));
490 if let Some(c) = &ast.children
491 {
492 if ast.is_poisoned() { println!("{} (POISONED) {{", ast.text); } else
493 { println!("{} {{", string_cache_inv[ast.text as usize]); }
494 for c in c.iter()
495 {
496 print_ast_pred_recdec(c, string_cache_inv, indent+1);
497 }
498 print!("{}", " ".repeat(indent));
499 println!("}};");
500 }
501 else
502 {
503 println!("{}", string_cache_inv[ast.text as usize]);
504 }
505}
506
507#[allow(unused)]
508pub fn ast_to_shape_string(ast : &ASTNode) -> String
514{
515 let mut s = "".to_string();
516 if let Some(c) = &ast.children
517 {
518 if ast.is_poisoned() { s += "p"; }
519 s += "+";
520 for c in c.iter()
521 {
522 s += &ast_to_shape_string(c);
523 }
524 s += "-";
525 }
526 else
527 {
528 s += ".";
529 }
530 s
531}