Skip to main content

pred_recdec/
ast.rs

1// Predicated Recursive Descent
2
3use std::rc::Rc;
4
5type HashMap<K, V> = std::collections::HashMap::<K, V, crate::HashBuilder>;
6
7use crate::bnf::*;
8
9#[derive(Clone, Debug, Default)]
10/// AST node. Grammar rules have children, tokens do not.
11///
12/// The total structure of the AST is defined by the grammar that it was parsed with.
13///
14/// Why isn't this an enum? `Option<Vec<ASTNode>>` and other two-variant enums containing a Vec undergo niche optimization. If this were `enum ASTNode { Rule{...}, Token(u32) }` then it would look like you can just add a third variant (e.g. poisoned) without issue. However, doing that would actually increase the size of the ASTNode from 32 bytes to 40 bytes.
15/// 
16/// If the size is ever forced above 32 bytes (e.g. increasing token count from u32 to u64) then I'll probably change it to an enum.
17#[non_exhaustive]
18pub struct ASTNode {
19    /// If `Some`, this node is a parent/nonterminal. If `None`, this node is a token/leaf/terminal.
20    pub children : Option<Vec<ASTNode>>,
21    /// Due to error recovery, AST nodes can be marked as "poisoned".
22    ///
23    /// When a node is poisoned, its token count is XOR'd with !0u32 (all one-bits).
24    pub token_count : u32, // IF POISONED: xor with !0u32 (all one-bits)
25    /// Index into grammar.string_cache_inv, giving an `Rc<String>`.
26    ///
27    /// For parents, it's the name of the associated grammar rule.
28    ///
29    /// For tokens, it's the token content (the *actual* token contents, but what it was matched with, i.e. it's not a regex).
30    pub text : u32,
31}
32
33impl ASTNode {
34    pub fn new(children : Option<Vec<ASTNode>>, token_count : u32, text : u32) -> Self { Self { children, token_count, text } }
35    pub fn is_poisoned(&self) -> bool { self.token_count >= 0x80000000 }
36    pub fn get_real_token_count(&self) -> u32 { if self.token_count >= 0x80000000 { self.token_count ^ !0u32 } else { self.token_count } }
37}
38
39// ASTs can be deeply recursive, so we need to avoid destroying them recursively.
40// Collect all transitive children into self.
41// This is slower than normal dropping (around 2x the runtime) but much safer.
42/*
43impl Drop for ASTNode {
44    fn drop(&mut self)
45    {
46        if let Some(collected) = self.children.as_mut()
47        {
48            let mut i = 0;
49            while i < collected.len()
50            {
51                if let Some(mut c) = collected[i].children.take()
52                {
53                    collected.append(&mut c);
54                }
55                i += 1;
56            }
57        }
58    }
59}
60*/
61
62/// Result of a [`Guard`] checking if a given alternation should be taken or not.
63pub enum GuardResult {
64    /// The alternation is chosen.
65    #[allow(unused)] Accept,
66    /// The alternation is rejected.
67    #[allow(unused)] Reject,
68    /// The guard has preemptively decided the the parse is invalid at this state and position.
69    #[allow(unused)] HardError(String)
70}
71
72/// Standard "AnyMap" for putting whatever you want in it.
73#[derive(Default)]
74pub struct AnyMap { map: HashMap<std::any::TypeId, Box<dyn std::any::Any>> }
75
76impl AnyMap {
77    #[allow(unused)] pub fn insert<T: 'static>(&mut self, value: T) { self.map.insert(std::any::TypeId::of::<T>(), Box::new(value)); }
78    #[allow(unused)] pub fn get<T: 'static>(&self) -> Option<&T> { self.map.get(&std::any::TypeId::of::<T>())?.downcast_ref::<T>() }
79    #[allow(unused)] pub fn get_mut<T: 'static>(&mut self) -> Option<&mut T> { self.map.get_mut(&std::any::TypeId::of::<T>())?.downcast_mut::<T>() }
80}
81
82/// Exposable parts of current parser state.
83pub struct PrdGlobal<'a> {
84    pub (crate) guards : Rc<HashMap<String, Rc<dyn Fn(&mut PrdGlobal, &[Token], usize) -> GuardResult>>>,
85    pub (crate) hooks : Rc<HashMap<String, Rc<dyn Fn(&mut PrdGlobal, &[Token], usize, &mut Vec<ASTNode>) -> Result<usize, String>>>>,
86    
87    /// Put your impure data here.
88    #[allow(unused)] pub udata : AnyMap,
89    /// Put your impure data here (simple path for cached regexes).
90    #[allow(unused)] pub udata_r : HashMap<usize, RegexCacher>,
91    
92    #[allow(unused)] pub g : &'a Grammar,
93}
94
95/// Parser error state.
96#[derive(Clone, Debug)]
97pub struct PrdError {
98    /// Human-readable error message. Format is not guaranteed and may change arbitrarily.
99    #[allow(unused)] pub err_message : String,
100    /// How far into the token stream did we successfully parse? Note: this is NOT
101    #[allow(unused)] pub token_index : usize,
102    /// Which rule's alternations were we inside of? (Index into [`bnf::Grammar::points`](`super::bnf::Grammar::points`))
103    #[allow(unused)] pub rule : u32,
104    /// Which alternation was it?
105    ///
106    /// If `u16::MAX`, we errored out before entering an alternation.
107    ///
108    /// If past the end of the grammar point's list of alternations, then every alternation was checked and failed.
109    #[allow(unused)] pub in_alt : u16,
110    /// Where were we inside of that alternation?
111    ///
112    /// If "None", we errored on a guard/lookahead test.
113    #[allow(unused)] pub alt_progress : Option<u16>,
114    /// On behalf of what rule were we parsing for? (e.g. the parent of a `$become`)
115    #[allow(unused)] pub on_behalf_of_rule : u32,
116}
117
118struct WorkState<'a> { 
119    pub g_item : &'a GrammarPoint,
120    pub chosen_name_id : u32,
121    pub children : Vec<ASTNode>,
122    pub i : usize,
123    pub token_start : usize,
124    pub poisoned : bool,
125    pub alt_id : usize,
126    pub term_idx : u16,
127}
128
129impl<'a> std::fmt::Debug for WorkState<'a> {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        f.debug_struct("WorkState")
132        .field("g_item.name_id", &self.g_item.name_id)
133        .field("chosen_name_id", &self.chosen_name_id)
134        .field("children.len()", &self.children.len())
135        .field("i", &self.i)
136        .field("token_start", &self.i)
137        .field("poisoned", &self.poisoned)
138        .field("alt_id", &self.alt_id)
139        .field("term_idx", &self.term_idx)
140        .finish()
141    }
142}
143
144fn error_builder(msg : String, i : usize, id : u32, behalf : u32, in_alt : u16, prog : Option<u16>) -> Box<PrdError>
145{
146    Box::new(PrdError {
147        err_message : msg, token_index : i, rule : id, on_behalf_of_rule : behalf,
148        in_alt : in_alt, alt_progress : prog
149    })
150}
151macro_rules! build_err { ($prog:expr, $ws:expr, $($tts:tt)*) => { {
152    #[inline(never)]
153    fn ___build_err_temp(a : std::fmt::Arguments) -> String {
154        format!("{a}")
155    }
156    Err(error_builder(___build_err_temp(format_args!($($tts)*)), $ws.i, $ws.g_item.name_id, $ws.chosen_name_id, $ws.alt_id.wrapping_sub(1) as u16, $prog))
157} } }
158#[inline(never)]
159fn token_name(cache : &Vec<Rc<String>>, t : Option<&Token>) -> String
160{
161    if let Some(t) = t { (*cache[t.text as usize]).clone() }
162    else { "<no token>".to_string() }
163}
164
165#[inline(always)]
166fn handle_matchterm(
167    global : &mut PrdGlobal, tokens : &[Token],
168    ws : &mut WorkState, term: &MatchingTermE, matched : &mut bool)
169    -> Result<(), Box<PrdError>>
170{
171    let alt = &ws.g_item.forms[ws.alt_id as usize];
172    match &term
173    {
174        MatchingTermE::Rule(_) => {} // already handled
175        MatchingTermE::TermLit(lit) =>
176        {
177            if ws.i < tokens.len() && tokens[ws.i].text == *lit
178            {
179                if !alt.pruned
180                {
181                    ws.children.push(ASTNode::new(None, 1, tokens[ws.i].text));
182                }
183                ws.i += 1;
184                *matched = true;
185            }
186        }
187        MatchingTermE::TermRegex(regex) => if ws.i < tokens.len() && regex.is_match_interned(tokens[ws.i].text, &global.g.string_cache_inv)
188        {
189            if !alt.pruned
190            {
191                ws.children.push(ASTNode::new(None, 1, tokens[ws.i].text));
192            }
193            ws.i += 1;
194            *matched = true;
195        }
196        MatchingTermE::Directive(d) =>
197        {
198            match d
199            {
200                MatchDirective::Become | MatchDirective::BecomeAs => {} // handled externally
201                MatchDirective::Rename =>
202                {
203                    if let Some(qx) = alt.matching_terms.get(ws.term_idx as usize + 1) && let MatchingTermE::Rule(id) = &qx.t
204                    {
205                        ws.chosen_name_id = *id as u32;
206                        ws.term_idx += 1;
207                        *matched = true;
208                    }
209                }
210                MatchDirective::Drop => if ws.children.len() > 0
211                {
212                    ws.children.pop();
213                    *matched = true;
214                }
215                MatchDirective::DropIfEmpty => if ws.children.len() > 0
216                {
217                    if ws.children.last().unwrap().children.is_some()
218                    {
219                        ws.children.pop();
220                    }
221                    *matched = true;
222                }
223                MatchDirective::Hoist => if ws.children.len() > 0
224                {
225                    let x = ws.children.pop().unwrap();
226                    if let Some(mut c) = x.children
227                    {
228                        ws.children.append(&mut c);
229                    }
230                    *matched = true;
231                }
232                MatchDirective::HoistIfUnit => if ws.children.len() > 0
233                {
234                    let x = ws.children.pop().unwrap();
235                    if let Some(mut c) = x.children && c.len() == 1
236                    {
237                        ws.children.append(&mut c);
238                    }
239                    *matched = true;
240                }
241                MatchDirective::Any => if ws.i < tokens.len()
242                {
243                    ws.children.push(ASTNode::new(None, 1, tokens[ws.i].text));
244                    *matched = true;
245                    ws.i += 1;
246                }
247            }
248        }
249        MatchingTermE::Hook(name) =>
250        {
251            if let Some(f) = global.hooks.get(&**name)
252            {
253                let f = Rc::clone(&f);
254                match f(global, tokens, ws.i, &mut ws.children)
255                {
256                    Ok(consumed) => { ws.i += consumed; }
257                    Err(e) => build_err!(Some(ws.term_idx as u16), ws, "{}", e)?
258                }
259            }
260            else
261            {
262                build_err!(
263                    Some(ws.term_idx as u16), ws,
264                    "Unknown custom hook {:?} inside of {}",
265                    name, 
266                    global.g.string_cache_inv[ws.chosen_name_id as usize],
267                )?
268            }
269            *matched = true;
270        }
271        _ => build_err!(
272                Some(ws.term_idx as u16), ws,
273                "Term type {:?} not supported in this position in a rule (context: {})",
274                term, global.g.string_cache_inv[ws.chosen_name_id as usize]
275            )?
276    }
277    Ok(())
278}
279
280#[inline(always)]
281fn handle_acceptance(
282    global : &mut PrdGlobal, tokens : &[Token],
283    alt : &Alternation,
284    ws : &WorkState, accepted : &mut bool)
285    -> Result<usize, Box<PrdError>>
286{
287    match &alt.matching_terms.get(0).as_ref().unwrap().t
288    {
289        MatchingTermE::Guard(guard) =>
290        {
291            *accepted = false;
292            if let Some(f) = global.guards.get(&**guard)
293            {
294                let f = Rc::clone(&f);
295                match f(global, tokens, ws.i)
296                {
297                    GuardResult::Accept => *accepted = true,
298                    GuardResult::HardError(e) => build_err!(None, ws, "{}", e)?,
299                    _ => {}
300                }
301            }
302            else
303            {
304                return build_err!(None, ws, "Unknown guard {guard}");
305            }
306            return Ok(1);
307        }
308        MatchingTermE::Peek(loc, tester) =>
309        {
310            *accepted = false;
311            let loc = (ws.i as isize + loc) as usize;
312            if loc < tokens.len() && tokens[loc].text == *tester
313            {
314                *accepted = true;
315            }
316            return Ok(1);
317        }
318        MatchingTermE::PeekR(loc, tester) =>
319        {
320            *accepted = false;
321            let loc = (ws.i as isize + loc) as usize;
322            if loc < tokens.len() && tester.is_match_interned(tokens[loc].text, &global.g.string_cache_inv)
323            {
324                *accepted = true;
325            }
326            return Ok(1);
327        }
328        MatchingTermE::PeekRes(loc, tester) =>
329        {
330            *accepted = false;
331            let loc = (ws.i as isize + loc) as usize;
332            if loc < tokens.len() && tester.is_match_interned(tokens[loc].text, &global.g.string_cache_inv)
333            {
334                *accepted = true;
335                if let Some(r) = &global.g.reserved
336                {
337                    if regex_is_match(r, &global.g.string_cache_inv[tokens[loc].text as usize]) { *accepted = false; }
338                }
339            }
340            return Ok(1);
341        }
342        MatchingTermE::Eof =>
343        {
344            *accepted = ws.i == tokens.len();
345            return Ok(1);
346        }
347        _ => {}
348    }
349    Ok(0)
350}
351
352#[inline(always)]
353fn check_recovery(
354    global : &mut PrdGlobal, tokens : &[Token],
355    ws : &mut WorkState, child : &mut Result<ASTNode, Box<PrdError>>, id : usize)
356    -> Result<(), Box<PrdError>>
357{
358    if child.is_err() && global.g.points[id].recover.is_some()
359    {
360        if let Some((r, after)) = &global.g.points[id].recover
361        {
362            let mut j = ws.i + 1;
363            while j < tokens.len() && !r.is_match(&global.g.string_cache_inv[tokens[j].text as usize])
364            {
365                j += 1;
366            }
367            if j < tokens.len()
368            {
369                if *after { j += 1; }
370                *child = Ok(ASTNode::new(Some(vec!()), (j - ws.i) as u32 ^ !0u32, global.g.points[id].name_id));
371            }
372        }
373    }
374    Ok(())
375}
376
377fn make_workstate<'a>(
378    g_item : &'a GrammarPoint, token_start : usize,
379) -> WorkState<'a>
380{
381    let ws = WorkState {
382        g_item, chosen_name_id : g_item.name_id, children : vec!(),
383        i : token_start, token_start : token_start, poisoned : false, alt_id : 0, term_idx : 0
384    };
385    ws
386}
387
388#[inline(never)]
389pub (crate) fn pred_recdec_parse_impl_recursive(
390    global : &mut PrdGlobal,
391    gp_id : usize, tokens : &[Token], _token_start : usize,
392    depth : usize
393) -> Result<ASTNode, Box<PrdError>>
394{
395    // BE WARNED: There are a bunch of very strange patterns in this function that look like they have no purpose,
396    //   but exist to trick LLVM into spilling less data onto the stack.
397    // In the future, I'll write a non-recursive implementation, but it'll likely be a bit slower.
398    const DEPTH_LIMIT : usize = if cfg!(debug_assertions) { 300 } else { 1500 };
399    
400    let mut ws = make_workstate(&global.g.points[gp_id], _token_start);
401    
402    if depth > DEPTH_LIMIT { return build_err!(None, ws, "Exceeded recursion depth limit of {DEPTH_LIMIT}."); }
403    
404    #[cfg(feature = "parse_trace")] { println!("entered {} at {}, depth {depth}", global.g.string_cache_inv[ws.chosen_name_id as usize], ws.i); }
405    
406    // Structured this way for the sake of $become
407    // 1) We can't use an iterator because then we can't go back to alternation 0.
408    // 2) We can't have a "find alt" loop followed by a non-loop process block because then we can't go back to the "find alt" loop during $BECOME.
409    'top: while (ws.alt_id as usize) < ws.g_item.forms.len()
410    {
411        let alt = &ws.g_item.forms[ws.alt_id as usize];
412        ws.term_idx = 0;
413        
414        if alt.matching_terms.len() == 0
415        {
416            return Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id));
417        }
418        else
419        {
420            let mut accepted = true;
421            let a = handle_acceptance(
422                global, tokens,
423                &ws.g_item.forms[ws.alt_id as usize],
424                &mut ws, &mut accepted
425            )?;
426            if !accepted { ws.alt_id += 1; ws.term_idx = 0; continue; }
427            ws.term_idx += a as u16;
428        }
429        
430        #[cfg(feature = "parse_trace")] { println!("chose variant {}", ws.alt_id); }
431        
432        if ws.children.capacity() == 0
433        {
434            ws.children.reserve_exact(alt.matching_terms.len());
435        }
436        
437        while (ws.term_idx as usize) < alt.matching_terms.len()
438        {
439            let term = &alt.matching_terms[ws.term_idx as usize].t;
440            let mut matched = false;
441            match &term
442            {
443                MatchingTermE::Rule(id) =>
444                {
445                    let mut child = pred_recdec_parse_impl_recursive(global, *id, tokens, ws.i, depth + 1);
446                    check_recovery(global, tokens, &mut ws, &mut child, *id)?;
447                    #[cfg(feature = "deep_errors")]
448                    {
449                        if let Err(e) = child
450                        {
451                            let mut e2 = e.clone();
452                            e2.err_message = format!("In {}: {}", ws.g_item.name, e2.err_message);
453                            child = Err(e2);
454                        }
455                    }
456                    let child = child?;
457                    if child.is_poisoned()
458                    {
459                        ws.poisoned = true;
460                    }
461                    ws.i += child.get_real_token_count() as usize;
462                    #[cfg(feature = "parse_trace")]
463                    { println!("added {} to i via child", child.get_real_token_count()); }
464                    ws.children.push(child);
465                    matched = true;
466                }
467                MatchingTermE::Directive(MatchDirective::Become | MatchDirective::BecomeAs) =>
468                {
469                    if let Some(qx) = alt.matching_terms.get(ws.term_idx as usize + 1) && let MatchingTermE::Rule(id) = &qx.t
470                    {
471                        ws.g_item = &global.g.points[*id];
472                        ws.alt_id = 0;
473                        ws.term_idx = 0;
474                        #[cfg(feature = "parse_trace")] { println!("became {} at {}, depth {depth}", ws.g_item.name, ws.i); }
475                        if matches!(term, MatchingTermE::Directive(MatchDirective::BecomeAs))
476                        {
477                            ws.chosen_name_id = ws.g_item.name_id;
478                        }
479                        continue 'top;
480                    }
481                }
482                _ => {}
483            }
484            handle_matchterm(global, tokens, &mut ws, term, &mut matched)?;
485            
486            if !matched
487            {
488                let token_text = token_name(&global.g.string_cache_inv, tokens.get(ws.i));
489                build_err!(
490                    Some(ws.term_idx as u16), &ws,
491                    "Failed to match token at {} in rule {} alt {}. Token is `{}`.",
492                    ws.i, global.g.string_cache_inv[ws.g_item.name_id as usize], ws.alt_id, token_text,
493                )?
494            }
495            ws.term_idx += 1;
496        }
497        
498        //println!("Validating {:?}", ws);
499        
500        #[cfg(feature = "parse_trace")] { println!("accepted {} from {} to {}, depth {depth}", global.g.string_cache_inv[ws.g_item.name_id as usize], ws.token_start, ws.i); }
501        let mut token_count = (ws.i - ws.token_start) as u32;
502        if ws.poisoned
503        {
504            token_count = token_count ^ !0u32;
505        }
506        return Ok(ASTNode::new(Some(ws.children), token_count, ws.chosen_name_id));
507    }
508    
509    build_err!(
510        Some(ws.g_item.forms.len() as u16),
511        &ws,
512        "Failed to match rule {} at token position {}",
513        global.g.string_cache_inv[ws.g_item.name_id as usize],
514        ws.token_start
515    )
516}
517
518#[inline(never)]
519pub (crate) fn pred_recdec_parse_impl_lifo(
520    global : &mut PrdGlobal,
521    gp_id : usize, tokens : &[Token], _token_start : usize,
522) -> Result<ASTNode, Box<PrdError>>
523{
524    let mut stack : Vec<WorkState> = vec!();
525    stack.reserve(1024);
526    let mut ready_child : Option<(Result<_, _>, u32)> = None;
527    let mut ws = make_workstate(&global.g.points[gp_id], _token_start);
528    
529    #[cfg(feature = "parse_trace")]
530    { println!("entered {} at {}", global.g.string_cache_inv[ws.g_item.name_id as usize], ws.i); }
531    
532    'top: while (ws.alt_id as usize) < ws.g_item.forms.len()
533    {
534        macro_rules! set_ready_child { ($ex:expr) => { {
535            ready_child = Some($ex);
536            ws = stack.pop().unwrap();
537        } } }
538        macro_rules! engage_ready_child { ($ex:expr) => { {
539            ready_child = Some($ex);
540            ws = stack.pop().unwrap();
541            
542            while let Some((child, _)) = &ready_child && child.is_ok()
543            {
544                let child = ready_child.take().unwrap().0.unwrap();
545                if child.is_poisoned()
546                {
547                    ws.poisoned = true;
548                }
549                ws.i += child.get_real_token_count() as usize;
550                #[cfg(feature = "parse_trace")]
551                { println!("added {} to i via child", child.get_real_token_count()); }
552                ws.children.push(child);
553                ws.term_idx += 1;
554                
555                let alt = &ws.g_item.forms[ws.alt_id as usize];
556                
557                if ws.term_idx as usize >= alt.matching_terms.len()
558                {
559                    if stack.len() == 0
560                    {
561                        return Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id));
562                    }
563                    else
564                    {
565                        set_ready_child!((Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id)), ws.g_item.name_id));
566                    }
567                }
568            }
569            
570            continue 'top;
571        } } }
572        
573        macro_rules! errify { ($x:expr) => { {
574            let _e = $x;
575            if stack.len() > 0 {
576                if let Err(e) = _e {
577                    engage_ready_child!((Err(e), 0));
578                } else { _e.unwrap() }
579            } else { _e? }
580        }}}
581        
582        #[cfg(feature = "parse_trace")]
583        let depth = stack.len() + 1;
584        
585        let mut alt = &ws.g_item.forms[ws.alt_id as usize];
586        
587        if let Some((mut child, id)) = ready_child.take()
588        {
589            errify!(check_recovery(global, tokens, &mut ws, &mut child, id as usize));
590            
591            let child = errify!(child);
592            if child.is_poisoned()
593            {
594                ws.poisoned = true;
595            }
596            ws.i += child.get_real_token_count() as usize;
597            #[cfg(feature = "parse_trace")]
598            { println!("added {} to i via child", child.get_real_token_count()); }
599            ws.children.push(child);
600            ws.term_idx += 1;
601            
602            if ws.term_idx as usize >= alt.matching_terms.len()
603            {
604                if stack.len() == 0
605                {
606                    return Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id));
607                }
608                else
609                {
610                    engage_ready_child!((Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id)), ws.g_item.name_id));
611                }
612            }
613            continue 'top;
614        }
615        if ws.term_idx == 0
616        {
617            for id in 0..ws.g_item.forms.len()
618            {
619                ws.alt_id = id;
620                ws.term_idx = 0;
621                alt = &ws.g_item.forms[ws.alt_id as usize];
622                if alt.matching_terms.len() == 0
623                {
624                    if stack.len() == 0
625                    {
626                        return Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id));
627                    }
628                    else
629                    {
630                        engage_ready_child!((Ok(ASTNode::new(Some(ws.children), (ws.i - ws.token_start) as u32, ws.chosen_name_id)), ws.g_item.name_id));
631                    }
632                }
633                else
634                {
635                    let mut accepted = true;
636                    let a = errify!(handle_acceptance(global, tokens, alt, &mut ws, &mut accepted));
637                    if !accepted
638                    {
639                        if ws.alt_id as usize + 1 >= ws.g_item.forms.len()
640                        {
641                            ws.alt_id += 1;
642                            break 'top;
643                        }
644                        continue;
645                    }
646                    ws.term_idx += a as u16;
647                    break;
648                }
649            }
650            #[cfg(feature = "parse_trace")] { println!("chose variant {}", ws.alt_id); }
651            
652            if ws.children.capacity() == 0
653            {
654                ws.children.reserve_exact(alt.matching_terms.len());
655            }
656        }
657        
658        while (ws.term_idx as usize) < alt.matching_terms.len()
659        {
660            let term = &alt.matching_terms[ws.term_idx as usize].t;
661            match &term
662            {
663                MatchingTermE::Rule(id) =>
664                {
665                    let next_ws = make_workstate(&global.g.points[*id], ws.i);
666                    stack.push(ws);
667                    ws = next_ws;
668                    
669                    #[cfg(feature = "parse_trace")]
670                    { println!("entered {} at {}, depth {depth}", global.g.string_cache_inv[ws.g_item.name_id as usize], ws.i); }
671                    
672                    continue 'top;
673                }
674                MatchingTermE::Directive(MatchDirective::Become | MatchDirective::BecomeAs) =>
675                {
676                    if let Some(qx) = alt.matching_terms.get(ws.term_idx as usize + 1) && let MatchingTermE::Rule(id) = &qx.t
677                    {
678                        ws.g_item = &global.g.points[*id];
679                        ws.alt_id = 0;
680                        ws.term_idx = 0;
681                        #[cfg(feature = "parse_trace")] { println!("became {} at {}, depth {depth}", ws.g_item.name, ws.i); }
682                        if matches!(term, MatchingTermE::Directive(MatchDirective::BecomeAs))
683                        {
684                            ws.chosen_name_id = ws.g_item.name_id;
685                        }
686                        continue 'top;
687                    }
688                }
689                _ => {}
690            }
691            let mut matched = false;
692            errify!(handle_matchterm(global, tokens, &mut ws, term, &mut matched));
693            
694            if !matched
695            {
696                let token_text = token_name(&global.g.string_cache_inv, tokens.get(ws.i));
697                errify!(build_err!(
698                    Some(ws.term_idx as u16), &ws,
699                    "Failed to match token at {} in rule {} alt {}. Token is `{}`.",
700                    ws.i, global.g.string_cache_inv[ws.g_item.name_id as usize], ws.alt_id, token_text,
701                ))
702            }
703            ws.term_idx += 1;
704        }
705        
706        #[cfg(feature = "parse_trace")] { println!("accepted {} from {} to {}, depth {depth}", global.g.string_cache_inv[ws.g_item.name_id as usize], ws.token_start, ws.i); }
707        let mut token_count = (ws.i - ws.token_start) as u32;
708        if ws.poisoned
709        {
710            token_count = token_count ^ !0u32;
711        }
712        
713        if stack.len() == 0
714        {
715            return Ok(ASTNode::new(Some(ws.children), token_count, ws.chosen_name_id));
716        }
717        else
718        {
719            engage_ready_child!((Ok(ASTNode::new(Some(ws.children), token_count, ws.chosen_name_id)), ws.g_item.name_id));
720        }
721    }
722    
723    build_err!(
724        Some(ws.g_item.forms.len() as u16),
725        &ws,
726        "Failed to match rule {} at token position {}",
727        global.g.string_cache_inv[ws.g_item.name_id as usize],
728        ws.token_start,
729    )
730}
731
732/// Visit the AST with a possibly-impure callback. The AST itself cannot be modified this way.
733///
734/// Takes a function that returns whether it's OK to also visit the children of this node.
735#[allow(unused)]
736pub fn visit_ast(n : &ASTNode, f : &mut dyn FnMut(&ASTNode) -> bool)
737{
738    let flag = f(n);
739    if flag && let Some(c) = &n.children
740    {
741        for c in c.iter()
742        {
743            visit_ast(c, f);
744        }
745    }
746}
747
748/// Arguments:
749/// - `&mut PrdGlobal` - context
750/// - `&[Token]` - tokenstream
751/// - `usize` - position in tokenstream.
752pub type Guard = Rc<dyn Fn(&mut PrdGlobal, &[Token], usize) -> GuardResult>;
753/// Arguments:
754/// - `&mut PrdGlobal` - context
755/// - `&[Token]` - tokenstream
756/// - `usize` - position in tokenstream.
757/// - `&mut Vec<ASTNode>` - current partially-produced AST item.
758pub type Hook = Rc<dyn Fn(&mut PrdGlobal, &[Token], usize, &mut Vec<ASTNode>) -> Result<usize, String>>;
759
760#[allow(unused)]
761/// *Recursive implementation:* Parse the given token stream (produced by [`bnf::tokenize`](`super::bnf::tokenize`)) into an AST, using the given [`bnf::Grammar`](`super::bnf::Grammar`), and taking the given root rule name as the starting point.
762/// 
763/// Guards and hooks are for advanced usage (e.g. parsing C).
764/// 
765/// This implementation is vulnerable to deep recursion issues. Depending on compiler settings, the recursion depth limit may vary from 300 to 3000, with very little control. For this reason, there's a hard depth limit of about 1500 in release mode and 300 in debug mode.
766///
767/// However, this version is about 4% faster than [`parse`].
768/// 
769/// See also: [`ASTNode`], [`parse`]
770pub fn parse_recursive(
771    g : &Grammar, root_rule_name : &str, tokens : &[Token],
772    guards : Rc<HashMap<String, Guard>>,
773    hooks : Rc<HashMap<String, Hook>>,
774) -> Result<ASTNode, Box<PrdError>>
775{
776    let gp_id = g.by_name.get(root_rule_name).unwrap();
777    let mut global = PrdGlobal { guards, hooks, udata : <_>::default(), udata_r : <_>::default(), g };
778    
779    if let Some(f) = global.hooks.get("init")
780    {
781        let f = Rc::clone(&f);
782        let _ = f(&mut global, tokens, 0, &mut vec!());
783    }
784    
785    pred_recdec_parse_impl_recursive(&mut global, *gp_id, tokens, 0, 0)
786}
787
788#[allow(unused)]
789/// *Worklist implementation:* Parse the given token stream (produced by [`bnf::tokenize`](`super::bnf::tokenize`)) into an AST, using the given [`bnf::Grammar`](`super::bnf::Grammar`), and taking the given root rule name as the starting point.
790/// 
791/// Guards and hooks are for advanced usage (e.g. parsing C).
792/// 
793/// This implementation is not vulnerable to deep recursion issues. Each level of nesting depth consumes only 64 more bytes of memory.
794/// 
795/// However, this version is about 4% slower than [`parse`]. I'm going to keep working on optimizing it.
796/// 
797/// This implementation does not yet support the "deep_errors" feature. When it does, adding support for it will not be considered a breaking change.
798/// 
799/// See also: [`ASTNode`], [`parse_recursive`]
800pub fn parse(
801    g : &Grammar, root_rule_name : &str, tokens : &[Token],
802    guards : Rc<HashMap<String, Guard>>,
803    hooks : Rc<HashMap<String, Hook>>,
804) -> Result<ASTNode, Box<PrdError>>
805{
806    let gp_id = g.by_name.get(root_rule_name).unwrap();
807    let mut global = PrdGlobal { guards, hooks, udata : <_>::default(), udata_r : <_>::default(), g };
808    
809    if let Some(f) = global.hooks.get("init")
810    {
811        let f = Rc::clone(&f);
812        let _ = f(&mut global, tokens, 0, &mut vec!());
813    }
814    
815    pred_recdec_parse_impl_lifo(&mut global, *gp_id, tokens, 0)
816}
817
818
819#[allow(unused)]
820/// For debugging only: print out the given AST.
821pub fn print_ast_pred_recdec(ast : &ASTNode, string_cache_inv : &Vec<Rc<String>>, indent : usize)
822{
823    print!("{}", " ".repeat(indent));
824    if let Some(c) = &ast.children
825    {
826        if ast.is_poisoned() { println!("{} (POISONED) {{", ast.text); } else
827        { println!("{} {{", string_cache_inv[ast.text as usize]); }
828        for c in c.iter()
829        {
830            print_ast_pred_recdec(c, string_cache_inv, indent+1);
831        }
832        print!("{}", " ".repeat(indent));
833        println!("}};");
834    }
835    else
836    {
837        println!("{}", string_cache_inv[ast.text as usize]);
838    }
839}
840
841#[allow(unused)]
842/// For testing only: convert the AST's shape to a string.
843///
844/// Parents turn into `+<contents>-`. If poisoned (i.e. containing any error recovery), they're prefixed with `p`.
845///
846/// Leaves turn into `.`
847pub fn ast_to_shape_string(ast : &ASTNode) -> String
848{
849    let mut s = "".to_string();
850    if let Some(c) = &ast.children
851    {
852        if ast.is_poisoned() { s += "p"; }
853        s += "+";
854        for c in c.iter()
855        {
856            s += &ast_to_shape_string(c);
857        }
858        s += "-";
859    }
860    else
861    {
862        s += ".";
863    }
864    s
865}