rustemo/glr/
parser.rs

1//! Right Nulled GLR parser implementation
2//!
3//! The implementation is based on this paper:
4//! Elizabeth Scott and Adrian Johnstone. 2006. Right nulled GLR parsers. ACM
5//! Trans. Program. Lang. Syst. 28, 4 (July 2006), 577–618.
6//! <https://doi.org/10.1145/1146809.1146810>
7
8use crate::{
9    context::Context,
10    error::error_expected,
11    glr::gss::Parent,
12    input::Input,
13    lexer::{Lexer, Token},
14    location::Location,
15    log,
16    lr::{
17        builder::SliceBuilder,
18        parser::{Action, LRParser, ParserDefinition},
19    },
20    parser::{Parser, State},
21    utils::Dedup,
22    Error, Result,
23};
24#[cfg(debug_assertions)]
25use colored::*;
26use petgraph::prelude::*;
27use std::{
28    borrow::Borrow,
29    cell::RefCell,
30    collections::{BTreeMap, VecDeque},
31    fmt::{Debug, Display},
32    marker::PhantomData,
33    ops::Range,
34    rc::Rc,
35};
36
37use super::gss::{Forest, GssGraph, GssHead, SPPFTree, TreeData};
38
39/// The start of the reduction. For length 0 it will carry the node of the
40/// reduction (empty reduction, thus the path is empty), while for len>0 it will
41/// be the first edge along the reduction path.
42#[derive(Debug)]
43enum ReductionStart {
44    Edge(EdgeIndex),
45    Node(NodeIndex),
46}
47
48/// Used by the GLR algorithm to keep track of pending reductions.
49#[derive(Debug)]
50struct Reduction<P> {
51    start: ReductionStart,
52
53    /// The production to reduce by
54    production: P,
55
56    /// The length of the reduction path. Determined by the RHS of the grammar
57    /// production for non right-nulled productions. For right-nulled production
58    /// it will be the number of non-nullable symbol references on the left side
59    /// of the production RHS.
60    length: usize,
61}
62
63/// Reduction path is determined by the root node of the reduction together with
64/// the path parent links containing sub-results (sub-trees).
65#[derive(Debug)]
66struct ReductionPath<'i, I, P, TK>
67where
68    I: Input + ?Sized,
69    TK: Copy,
70{
71    /// Parents along the path
72    parents: VecDeque<Rc<Parent<'i, I, P, TK>>>,
73
74    /// The root of the reduction
75    root_head: NodeIndex,
76}
77
78impl<I, P, TK> Display for ReductionPath<'_, I, P, TK>
79where
80    I: Input + ?Sized,
81    TK: Copy,
82{
83    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
84        let mut pariter = self.parents.iter().rev();
85        if let Some(parent) = pariter.next() {
86            write!(f, "{}", parent.head_node.index())?;
87            for parent in pariter {
88                write!(f, " -> ")?;
89                write!(f, "{}", parent.head_node.index())?;
90            }
91            write!(f, " -> ")?;
92        }
93        write!(f, "{}", self.root_head.index())
94    }
95}
96
97type Content<'i, L, I, S, TK> =
98    <<L as Lexer<'i, GssHead<'i, I, S, TK>, S, TK>>::Input as ToOwned>::Owned;
99
100type LayoutParser<'i, I, S, P, TK, NTK, D, L> =
101    Option<LRParser<'i, GssHead<'i, I, S, TK>, S, P, TK, NTK, D, L, SliceBuilder<'i, I>, I>>;
102
103/// An implementation of Right-Nulled GLR parsing (RNGLR)
104pub struct GlrParser<
105    'i,
106    S: State,
107    L: Lexer<'i, GssHead<'i, I, S, TK>, S, TK, Input = I>,
108    P,
109    TK: Default,
110    NTK,
111    D: ParserDefinition<S, P, TK, NTK> + 'static,
112    I: Input + ?Sized,
113    B,
114> {
115    /// Parser definition generated by Rustemo
116    definition: &'static D,
117
118    /// The file path if any or `<str>` if from str
119    file_name: String,
120
121    /// The owned input being parsed
122    content: Option<Content<'i, L, I, S, TK>>,
123
124    /// Layout parser if there is the Layout rule in the grammar. We keep the
125    /// parser in a RefCell as we need it to be mutable during lookaheads
126    /// finding.
127    layout_parser: RefCell<LayoutParser<'i, I, S, P, TK, NTK, D, L>>,
128
129    /// Is partial parse allowed, i.e. not requiring that the whole input is
130    /// consumed. Use with care in GLR as it can lead to a *huge* number of
131    /// possible solutions/trees.
132    partial_parse: bool,
133    start_position: usize,
134    has_layout: bool,
135    lexer: Rc<L>,
136
137    phantom: PhantomData<(NTK, B)>,
138}
139
140impl<'i, S, L, P, TK, NTK, D, I, B> GlrParser<'i, S, L, P, TK, NTK, D, I, B>
141where
142    I: Input + ?Sized + Debug,
143    L: Lexer<'i, GssHead<'i, I, S, TK>, S, TK, Input = I>,
144    S: State + Ord + Debug,
145    D: ParserDefinition<S, P, TK, NTK>,
146    TK: Copy + Default + PartialEq + Ord + Debug + 'i,
147    P: Copy + Debug + Into<NTK> + PartialEq,
148{
149    pub fn new(definition: &'static D, partial_parse: bool, has_layout: bool, lexer: L) -> Self {
150        Self {
151            file_name: "<str>".into(),
152            content: None,
153            layout_parser: RefCell::new(None),
154            definition,
155            partial_parse,
156            start_position: 0,
157            has_layout,
158            lexer: Rc::new(lexer),
159            phantom: PhantomData,
160        }
161    }
162
163    /// Create pending shifts and reduction for the initial frontier.
164    fn initial_process_frontier(
165        &self,
166        gss: &mut GssGraph<'i, I, S, P, TK>,
167        frontier: &BTreeMap<(usize, TK), BTreeMap<S, NodeIndex>>,
168        pending_reductions: &mut BTreeMap<(usize, TK), VecDeque<Reduction<P>>>,
169        pending_shifts: &mut Vec<(NodeIndex, S)>,
170        accepted_heads: &mut Vec<NodeIndex>,
171    ) {
172        for ((position, token_kind), subfrontier) in frontier {
173            log!(
174                "\n{} {:?} {} {}.",
175                "Preparing subfrontier for token".red(),
176                token_kind,
177                "at position".red(),
178                position
179            );
180            for (&state, head) in subfrontier {
181                log!("  {}", format!("Processing head {}", head.index()).green());
182                for action in self
183                    .definition
184                    .actions(state, gss.head(*head).token_ahead().as_ref().unwrap().kind)
185                {
186                    match action {
187                        Action::Reduce(prod, length) => {
188                            if length == 0 {
189                                log!(
190                                    "    {} '{:?}' over head {} by len {}",
191                                    "Register new reduction".green(),
192                                    prod,
193                                    head.index(),
194                                    length
195                                );
196                                pending_reductions
197                                    .entry((*position, *token_kind))
198                                    .or_default()
199                                    .push_back(Reduction {
200                                        start: ReductionStart::Node(*head),
201                                        production: prod,
202                                        length,
203                                    })
204                            } else {
205                                for edge in gss.backedges(*head) {
206                                    log!(
207                                        "    {} '{:?}' over edge {} -> {} by len {}",
208                                        "Register new reduction".green(),
209                                        prod,
210                                        edge.source().index(),
211                                        edge.target().index(),
212                                        length
213                                    );
214                                    pending_reductions
215                                        .entry((*position, *token_kind))
216                                        .or_default()
217                                        .push_back(Reduction {
218                                            start: ReductionStart::Edge(edge.id()),
219                                            production: prod,
220                                            length,
221                                        })
222                                }
223                            }
224                        }
225                        Action::Shift(state) => {
226                            log!(
227                                "    {}",
228                                format!("Adding head {} to pending shifts.", head.index()).green()
229                            );
230                            pending_shifts.push((*head, state))
231                        }
232                        Action::Accept => {
233                            log!("    {}", format!("Accepting head {}.", head.index()).red());
234                            accepted_heads.push(*head)
235                        }
236                        Action::Error => break,
237                    }
238                }
239            }
240        }
241    }
242
243    /// From the current frontier base create full frontier with per-tokenkind
244    /// sub-frontiers.
245    ///
246    /// If a head has multiple possible tokens ahead (lexical ambiguity) split
247    /// the head and create a head per token thus enabling handling of lexical
248    /// ambiguities using the same GLR mechanics.
249    fn create_frontier(
250        &self,
251        gss: &mut GssGraph<'i, I, S, P, TK>,
252        frontier_base: &Vec<NodeIndex>,
253        input: &'i I,
254    ) -> BTreeMap<(usize, TK), BTreeMap<S, NodeIndex>> {
255        let mut frontier: BTreeMap<(usize, TK), BTreeMap<S, NodeIndex>> = BTreeMap::new();
256        for &head_idx in frontier_base {
257            // Multiple heads are possible per state in case of lexical ambiguity.
258            let head = gss.head(head_idx);
259            if let Some(token) = &head.token_ahead() {
260                // May happen after error recovery
261                frontier
262                    .entry((head.position(), token.kind))
263                    .or_default()
264                    .insert(head.state(), head_idx);
265            } else {
266                // Find lookaheads
267                log!(
268                    "  {}.",
269                    format!("Finding lookaheads for head {}", head_idx.index()).green()
270                );
271                let mut lookahead_tokens = self.find_lookaheads(gss, head_idx, input).into_iter();
272                let head = gss.head_mut(head_idx);
273                let position = head.position();
274                if let Some(token) = lookahead_tokens.next() {
275                    frontier
276                        .entry((position, token.kind))
277                        .or_default()
278                        .insert(head.state(), head_idx);
279
280                    let token_start_pos = token.location.start.position();
281                    if token_start_pos > head.position() {
282                        head.set_layout_ahead(Some(input.slice(head.position()..token_start_pos)));
283                    }
284                    head.set_token_ahead(token);
285                    log!(
286                        "    {} {}: {:?}",
287                        "Added lookahead for head".to_string().green(),
288                        head_idx.index(),
289                        head
290                    );
291
292                    let state = head.state();
293                    // If more tokens are found we have lexical ambiguity. Make
294                    // a new head for each token.
295                    for lookahead in lookahead_tokens {
296                        frontier
297                            .entry((position, lookahead.kind))
298                            .or_default()
299                            .insert(state, self.head_for_lookahead(gss, head_idx, lookahead));
300                    }
301                } else {
302                    log!("No lookaheads found. Killing head.");
303                }
304            }
305        }
306        frontier
307    }
308
309    /// Find all possible lookahead tokens for the given head. There can be more
310    /// than one Token at the current location due to lexical ambiguity.
311    ///
312    /// If Layout rule is given in the grammar the layout parser will be used to
313    /// skip whitespaces/comments before recognizing next tokens.
314    fn find_lookaheads(
315        &self,
316        gss: &mut GssGraph<'i, I, S, P, TK>,
317        head: NodeIndex,
318        input: &'i I,
319    ) -> Vec<Token<'i, I, TK>> {
320        let head = gss.head_mut(head);
321        let expected_tokens = self.definition.expected_token_kinds(head.state());
322        let mut layout_parsing = true;
323        loop {
324            let mut tokens: Vec<_> = self
325                .lexer
326                .next_tokens(head, input, expected_tokens.clone())
327                .collect();
328
329            if !tokens.is_empty() {
330                if tokens.len() > 1 {
331                    log!(
332                        "{} Trying configured disambiguation strategies.",
333                        "Lexical ambiguity.".red()
334                    );
335                    // We still have lexical ambiguity after priorities and most
336                    // specific match handled by table construction and string
337                    // lexer. Try to disambiguate if additional strategies are
338                    // configured.
339                    if D::longest_match() {
340                        log!(
341                            "{}",
342                            "Applying longest match disambiguation strategy".green()
343                        );
344                        // Longest match strategy for lexical disambiguation
345                        let longest_len = tokens
346                            .iter()
347                            .max_by_key(|token| token.value.len())
348                            .unwrap()
349                            .value
350                            .len();
351                        tokens.retain(|token| token.value.len() == longest_len);
352                        log!("{} {:?}", "Tokens retained:".green(), &tokens);
353                    }
354
355                    if D::grammar_order() {
356                        // Take the first by the grammar order. This is safe as
357                        // at least one token must be in the tokens vector.
358                        log!(
359                            "{}",
360                            "Applying grammar order disambiguation strategy".green()
361                        );
362                        tokens.truncate(1)
363                    }
364                }
365                return tokens;
366            } else if layout_parsing {
367                layout_parsing = false;
368                if let Some(layout_parser) = self.layout_parser.borrow_mut().as_ref() {
369                    log!("\n{}", "*** Parsing layout".red().bold());
370                    let current_state = head.state();
371                    head.set_state(S::default_layout().unwrap());
372                    let p = layout_parser.parse_with_context(head, input);
373                    log!("Layout is {p:?}");
374                    head.set_state(current_state);
375                    if let Ok(Some(layout)) = p {
376                        if layout.len() > 0 {
377                            log!("Skipping layout: {layout:?}");
378                            head.set_layout_ahead(Some(layout));
379                            log!("\n{}", "*** Parsing content".red().bold());
380                            continue;
381                        }
382                    }
383                }
384            }
385            break;
386        }
387
388        // No tokens are found. For partial parsing return STOP if expected
389        // even if we are not at the end of the input
390        let stop_kind = <TK as Default>::default();
391        if self.partial_parse && expected_tokens.iter().any(|tk| tk.0 == stop_kind) {
392            vec![Token {
393                kind: stop_kind,
394                value: &input[0..0],
395                location: head.location(),
396            }]
397        } else {
398            vec![]
399        }
400    }
401
402    /// Create a new head based on `head_idx` with the given lookahead.
403    /// Used in lexical ambiguity.
404    fn head_for_lookahead(
405        &self,
406        gss: &mut GssGraph<'i, I, S, P, TK>,
407        head_idx: NodeIndex,
408        lookahead: Token<'i, I, TK>,
409    ) -> NodeIndex {
410        let head = gss.head(head_idx).with_tok(lookahead);
411
412        #[cfg(debug_assertions)]
413        let new_head_str = format!("{:?}", head);
414        let new_head = gss.add_head(head);
415
416        log!(
417            "    {} {}: {}",
418            "Created head for lookahead".green(),
419            new_head.index(),
420            new_head_str
421        );
422
423        let new_parents: Vec<_> = gss
424            .backedges(head_idx)
425            .map(|e| {
426                (
427                    e.target(),
428                    Rc::new({
429                        let p = e.weight();
430                        Parent {
431                            head_node: new_head,
432                            root_node: p.root_node,
433                            possibilities: p.possibilities.clone(),
434                        }
435                    }),
436                )
437            })
438            .collect();
439
440        for (target, parent) in new_parents {
441            // Copy all parent edges
442            gss.add_parent(new_head, target, parent);
443        }
444
445        new_head
446    }
447
448    /// Starting from the queue of pending reduction execute reductions until no
449    /// more reduction can be done. For each reduced head register shift
450    /// operation if possible.
451    fn reducer(
452        &self,
453        gss: &mut GssGraph<'i, I, S, P, TK>,
454        pending_reductions: &mut VecDeque<Reduction<P>>,
455        pending_shifts: &mut Vec<(NodeIndex, S)>,
456        accepted_heads: &mut Vec<NodeIndex>,
457        subfrontier: &mut BTreeMap<S, NodeIndex>,
458    ) {
459        log!(
460            "\n{}{}",
461            "Reducing".red(),
462            format!(
463                " - {} pending reduction start(s)",
464                if pending_reductions.is_empty() {
465                    "no".to_owned()
466                } else {
467                    pending_reductions.len().to_string()
468                }
469            )
470            .green()
471        );
472        while let Some(reduction) = pending_reductions.pop_front() {
473            let production = reduction.production;
474            let start_head = match reduction.start {
475                ReductionStart::Edge(e) => gss.start(e),
476                ReductionStart::Node(n) => n,
477            };
478            log!(
479                "\n{} '{:?}' over {} by len {}",
480                "Reducing by production".green(),
481                production,
482                match reduction.start {
483                    ReductionStart::Edge(e) =>
484                        format!("edge {} -> {}", gss.start(e).index(), gss.end(e).index()),
485                    ReductionStart::Node(n) => format!("head {}", n.index()),
486                },
487                reduction.length
488            );
489            for path in self.find_reduction_paths(gss, &reduction) {
490                log!("  {} {path}", "Reducing over path:".green());
491                let token_kind_ahead = gss.head(start_head).token_ahead().as_ref().unwrap().kind;
492                let root_state = gss.head(path.root_head).state();
493                let next_state = self.definition.goto(root_state, production.into());
494
495                // Get all non-error actions
496                let actions = self.definition.actions(next_state, token_kind_ahead);
497
498                if actions.is_empty() {
499                    log!(
500                        "    No actions for new state {:?} and lookahead {:?}. Skipping.",
501                        next_state,
502                        token_kind_ahead
503                    );
504                } else {
505                    // Find a head with the same state or create new if it doesn't exist
506                    let mut head_created = false;
507                    let head = if let Some(head) = subfrontier.get(&next_state) {
508                        log!(
509                            "    {}",
510                            format!("Head {} with the same state already exists.", head.index())
511                                .green()
512                        );
513                        *head
514                    } else {
515                        // Create new head
516                        let shead = gss.head(start_head);
517                        let new_head = shead
518                            .with_tok_state(shead.token_ahead().cloned().unwrap(), next_state);
519                        #[cfg(debug_assertions)]
520                        let new_head_str = format!("{:?}", new_head);
521                        let new_head_idx = gss.add_head(new_head);
522                        subfrontier.insert(next_state, new_head_idx);
523                        log!(
524                            "    {} {}: {}",
525                            "Created reduced head".green(),
526                            new_head_idx.index(),
527                            new_head_str
528                        );
529                        head_created = true;
530                        new_head_idx
531                    };
532
533                    // Find an edge between the head and the root_head or create new
534                    // if it doesn't exist
535                    let mut edge_created = false;
536                    let edge = if let Some(edge) = gss.edge_between(head, path.root_head) {
537                        log!(
538                            "      {}",
539                            format!(
540                                "Edge {} -> {} already exists. Not created.",
541                                head.index(),
542                                path.root_head.index()
543                            )
544                            .green()
545                        );
546                        edge
547                    } else {
548                        // Create new edge
549                        log!(
550                            "      {} {} -> {}.",
551                            "Created edge".green(),
552                            head.index(),
553                            path.root_head.index()
554                        );
555                        edge_created = true;
556                        gss.add_parent(
557                            head,
558                            path.root_head,
559                            Rc::new(Parent::new(path.root_head, head, vec![])),
560                        )
561                    };
562
563                    let is_new_solution = head_created || edge_created
564                        // It is not new solution if we already have a solution
565                        // based on the same production
566                        || gss.parent(edge).possibilities.borrow().iter().all(
567                            |t| match **t {
568                                SPPFTree::Term { .. } | SPPFTree::Empty => false,
569                                SPPFTree::NonTerm { prod, ref children, ..} => {
570                                    prod != production || (path.parents.len() == children.borrow().len())
571                                }
572                            },
573                        );
574
575                    if !is_new_solution {
576                        log!(
577                            "    {}",
578                            format!(
579                                "Solution {} -> {} based on production '{:?}' exists. Extending right-nulled children",
580                                head.index(),
581                                path.root_head.index(),
582                                production
583                            )
584                            .green()
585                        );
586                        // Replace children for this solution
587                        for possibility in gss.parent(edge).possibilities.borrow_mut().iter_mut() {
588                            if let SPPFTree::NonTerm { prod, children, .. } = &**possibility {
589                                if *prod == production
590                                    && (path.parents.len() > children.borrow().len())
591                                {
592                                    *children.borrow_mut() = path.parents;
593                                    break;
594                                }
595                            }
596                        }
597                    } else {
598                        log!(
599                            "    {}",
600                            format!(
601                                "Register new solution for {} -> {}.",
602                                head.index(),
603                                path.root_head.index()
604                            )
605                            .green()
606                        );
607
608                        let root_head = gss.head(path.root_head);
609                        let range = if path.parents.is_empty() {
610                            Range {
611                                start: root_head.position(),
612                                end: root_head.position(),
613                            }
614                        } else {
615                            Range {
616                                start: <SPPFTree<'_, I, P, TK> as Context<'_, I, S, TK>>::range(
617                                    &path.parents[0].possibilities.borrow()[0],
618                                )
619                                .start,
620                                end: <SPPFTree<'_, I, P, TK> as Context<'_, I, S, TK>>::range(
621                                    &path.parents[path.parents.len() - 1].possibilities.borrow()
622                                        [0],
623                                )
624                                .end,
625                            }
626                        };
627                        let location = if path.parents.is_empty() {
628                            let end = root_head.location().into();
629                            Location {
630                                start: end,
631                                end: Some(end),
632                            }
633                        } else {
634                            Location {
635                                start:
636                                    <SPPFTree<'_, I, P, TK> as Context<'_, I, S, TK>>::location(
637                                        &path.parents[0].possibilities.borrow()[0],
638                                    )
639                                    .start,
640                                end: Some(
641                                    <SPPFTree<'_, I, P, TK> as Context<'_, I, S, TK>>::location(
642                                        &path.parents[path.parents.len() - 1]
643                                            .possibilities
644                                            .borrow()[0],
645                                    )
646                                    .end
647                                    .unwrap(),
648                                ),
649                            }
650                        };
651                        let solution = Rc::new(SPPFTree::NonTerm {
652                            prod: production,
653                            data: TreeData {
654                                range,
655                                location,
656                                layout: root_head.layout_ahead(),
657                            },
658                            children: RefCell::new(path.parents),
659                        });
660                        gss.parent(edge).possibilities.borrow_mut().push(solution);
661
662                        // Register actions
663                        for action in actions {
664                            match action {
665                                Action::Reduce(production, length) => {
666                                    if (edge_created && length > 0) || head_created {
667                                        let start = if length > 0 {
668                                            ReductionStart::Edge(edge)
669                                        } else {
670                                            ReductionStart::Node(head)
671                                        };
672                                        log!(
673                                            "      {} '{:?}' {} by len {}",
674                                            "Register new reduction".green(),
675                                            production,
676                                            match start {
677                                                ReductionStart::Edge(e) => format!(
678                                                    "over edge {} -> {}",
679                                                    gss.start(e).index(),
680                                                    gss.end(e).index()
681                                                ),
682                                                ReductionStart::Node(n) =>
683                                                    format!("over head {}", n.index()),
684                                            },
685                                            length
686                                        );
687                                        pending_reductions.push_back(Reduction {
688                                            start,
689                                            production,
690                                            length,
691                                        });
692                                    }
693                                }
694                                Action::Shift(s) => {
695                                    if head_created {
696                                        log!(
697                                            "      {}",
698                                            format!(
699                                                "Adding head {} to pending shifts.",
700                                                head.index()
701                                            )
702                                            .green()
703                                        );
704                                        pending_shifts.push((head, s));
705                                    }
706                                }
707                                Action::Accept => {
708                                    if head_created {
709                                        log!(
710                                            "      {}",
711                                            format!("Accepting head {}.", head.index()).red()
712                                        );
713                                        accepted_heads.push(head)
714                                    }
715                                }
716                                Action::Error => panic!("Cannot happen!"),
717                            }
718                        }
719                    }
720                }
721            }
722        }
723    }
724
725    /// Do all pending shifts and create the next frontier base.
726    fn shifter(
727        &self,
728        gss: &mut GssGraph<'i, I, S, P, TK>,
729        pending_shifts: &mut Vec<(NodeIndex, S)>,
730        frontier_idx: usize,
731    ) -> Vec<NodeIndex> {
732        log!(
733            "\n{}{}",
734            "Shifting".red(),
735            format!(
736                " - {} pending shift(s)",
737                if pending_shifts.is_empty() {
738                    "no".to_owned()
739                } else {
740                    pending_shifts.len().to_string()
741                }
742            )
743            .green()
744        );
745        let mut frontier_base = BTreeMap::new();
746        while let Some((head_idx, state)) = pending_shifts.pop() {
747            let head = gss.head(head_idx);
748            let token = head.token_ahead().cloned().unwrap();
749            let position = head.position() + token.value.len();
750            log!(
751                "{}",
752                format!(
753                    "Shifting head {} by token {:?}.",
754                    head_idx.index(),
755                    token.value
756                )
757                .green()
758            );
759            let (shifted_head_idx, range, location) = match frontier_base.get(&(state, position)) {
760                Some(&shifted_head_idx) => {
761                    log!("  {}", "Head already exists. Adding new edge.".green());
762                    let shifted_head = gss.head(shifted_head_idx);
763                    (
764                        shifted_head_idx,
765                        shifted_head.range(),
766                        shifted_head.location(),
767                    )
768                }
769                None => {
770                    let new_head = GssHead::new(
771                        state,
772                        frontier_idx,
773                        position,
774                        head.position()..position,
775                        token.location,
776                        None,
777                        None,
778                    );
779                    #[cfg(debug_assertions)]
780                    let new_head_str = format!("{new_head:?}");
781                    let (new_head_range, new_head_location) =
782                        (new_head.range(), new_head.location());
783                    let new_head_idx = gss.add_head(new_head);
784                    log!(
785                        "  {}: {new_head_str}",
786                        format!("Creating new shifted head {}", new_head_idx.index()).green()
787                    );
788                    frontier_base.insert((state, position), new_head_idx);
789                    (new_head_idx, new_head_range, new_head_location)
790                }
791            };
792            dbg!(&range, &location);
793            gss.add_solution(
794                shifted_head_idx,
795                head_idx,
796                Rc::new(SPPFTree::Term {
797                    token,
798                    data: TreeData {
799                        range,
800                        location,
801                        // FIXME:
802                        layout: None,
803                    },
804                }),
805            );
806        }
807        frontier_base.into_values().collect()
808    }
809
810    /// For the given reduction find all possible reduction paths by
811    /// backtracing through the GSS for the reduction length.
812    fn find_reduction_paths(
813        &self,
814        gss: &mut GssGraph<'i, I, S, P, TK>,
815        reduction: &Reduction<P>,
816    ) -> Vec<ReductionPath<'i, I, P, TK>> {
817        log!(
818            "  {}",
819            format!(
820                "Finding reduction paths for length {} from head {}.",
821                reduction.length,
822                match reduction.start {
823                    ReductionStart::Node(head) => head.index(),
824                    ReductionStart::Edge(start_edge) => gss.start(start_edge).index(),
825                }
826            )
827            .green()
828        );
829        let mut paths = vec![];
830        match reduction.start {
831            ReductionStart::Node(head) => {
832                debug_assert!(reduction.length == 0, "Node based reductions must be EMPTY");
833                log!(
834                    "  {}",
835                    format!("Found EMPTY reduction path for head {}", head.index()).green()
836                );
837                paths.push(ReductionPath {
838                    parents: VecDeque::new(),
839                    root_head: head,
840                });
841                return paths;
842            }
843            ReductionStart::Edge(start_edge) => {
844                debug_assert!(
845                    reduction.length != 0,
846                    "Edge based reduction must not be EMPTY"
847                );
848                #[derive(Debug)]
849                struct PendingPath<'i, I: Input + ?Sized, P, TK: Copy> {
850                    current_root: NodeIndex,
851                    left_to_go: usize,
852                    parents: VecDeque<Rc<Parent<'i, I, P, TK>>>,
853                }
854                let mut pending_paths: VecDeque<PendingPath<I, P, TK>> = VecDeque::new();
855                pending_paths.push_back(PendingPath {
856                    current_root: gss.end(start_edge),
857                    left_to_go: reduction.length - 1,
858                    parents: VecDeque::from([gss.parent(start_edge)]),
859                });
860
861                while let Some(path) = pending_paths.pop_front() {
862                    if path.left_to_go > 0 {
863                        // We still have to traverse the path
864                        for edge in gss.backedges(path.current_root) {
865                            let mut new_ambiguities = path.parents.clone();
866                            new_ambiguities.push_front(edge.weight().clone());
867                            pending_paths.push_back(PendingPath {
868                                current_root: edge.target(),
869                                left_to_go: path.left_to_go - 1,
870                                parents: new_ambiguities,
871                            })
872                        }
873                    } else {
874                        // The last traversal step
875                        let path = ReductionPath {
876                            parents: path.parents,
877                            root_head: path.current_root,
878                        };
879                        log!("  {}: {path}", "Found reduction path".green());
880                        paths.push(path);
881                    }
882                }
883            }
884        }
885        log!(
886            "  {}",
887            format!("Reduction paths found: {}", paths.len()).green()
888        );
889        paths
890    }
891
892    fn create_forest(
893        &self,
894        gss: GssGraph<'i, I, S, P, TK>,
895        accepted_heads: Vec<NodeIndex>,
896    ) -> Forest<'i, I, P, TK>
897    where
898        TK: Copy,
899    {
900        Forest::new(
901            accepted_heads
902                .into_iter()
903                .flat_map(|head| {
904                    gss.backedges(head).flat_map(|p| {
905                        p.weight()
906                            .possibilities
907                            .borrow()
908                            .iter()
909                            .map(|n| (*n).clone())
910                            .collect::<Vec<_>>()
911                    })
912                })
913                .collect::<Vec<_>>(),
914        )
915    }
916
917    /// Create error based on the last frontier when no progress can be made and
918    /// there are no heads accepted.
919    fn make_error(
920        &self,
921        gss: GssGraph<'i, I, S, P, TK>,
922        input: &I,
923        last_frontier_base: Vec<NodeIndex>,
924    ) -> Error {
925        // TODO: It would be possible that different heads have progressed
926        // differently due to context-aware lexing. Thus, error report
927        // should take into account that it is reporting for multiple
928        // possible parses.
929        let expected = {
930            let mut expected = last_frontier_base
931                .iter()
932                .flat_map(|&head_idx| {
933                    self.definition
934                        .expected_token_kinds(gss.head(head_idx).state())
935                        .into_iter()
936                        .map(|t| t.0)
937                        .collect::<Vec<_>>()
938                })
939                .collect::<Vec<_>>();
940            expected.clear_duplicates();
941            expected
942        };
943
944        let context = gss.head(
945            *last_frontier_base
946                .first()
947                .expect("There must be a head in the last frontier!"),
948        );
949
950        let error = error_expected(input, &self.file_name, context, &expected);
951
952        log!(
953            "\n{}. {}",
954            "Syntax error".red(),
955            format!("{:?}", error).green()
956        );
957        error
958    }
959}
960
961impl<'i, I, S, TK, NTK, L, P, D, B> Parser<'i, I, GssHead<'i, I, S, TK>, S, TK>
962    for GlrParser<'i, S, L, P, TK, NTK, D, I, B>
963where
964    I: Input + ?Sized + Debug,
965    L: Lexer<'i, GssHead<'i, I, S, TK>, S, TK, Input = I>,
966    S: State + Debug + Ord,
967    P: Copy + Debug + Into<NTK> + PartialEq,
968    TK: Copy + Debug + Ord + Default + 'i,
969    D: ParserDefinition<S, P, TK, NTK>,
970{
971    type Output = Forest<'i, I, P, TK>;
972
973    fn parse(&self, input: &'i I) -> Result<Self::Output> {
974        let mut context = GssHead::default();
975        context.set_position(self.start_position);
976        self.parse_with_context(&mut context, input)
977    }
978
979    fn parse_with_context(
980        &self,
981        context: &mut GssHead<'i, I, S, TK>,
982        input: &'i I,
983    ) -> Result<Self::Output> {
984        let mut gss: GssGraph<'i, I, S, P, TK> = GssGraph::new();
985        let start_head = gss.add_head(context.clone());
986        if self.has_layout {
987            *self.layout_parser.borrow_mut() = Some(LRParser::new_default(
988                self.definition,
989                S::default_layout().expect("Layout state not defined."),
990                true,
991                false,
992                Rc::clone(&self.lexer),
993                RefCell::new(SliceBuilder::new(input)),
994            ))
995        }
996
997        log!("{}: {:?}", "Current state".green(), context.state());
998
999        // Frontier represents the current "shift-level" or, starting from the
1000        // shifted nodes, frontier also has all the reduced nodes up to the next
1001        // shifted nodes which will form the basis for the next frontier. All
1002        // nodes with the same LR state belonging to a frontier are considered
1003        // equal, thus we use Map structure for quick access.
1004        //
1005        // This is the base of the frontier which is created before lookaheads
1006        // are found. The full frontier will be created by `create_frontier`
1007        // method.
1008        //
1009        // The initial frontier base U0 has only the start head for state and
1010        // position taken from the context.
1011        let mut frontier_idx = 0usize;
1012        let mut frontier_base: Vec<NodeIndex> = vec![start_head];
1013
1014        // We keep track of the last base frontier for error reporting.
1015        let mut last_frontier_base: Vec<NodeIndex> = vec![];
1016
1017        // Shifts that will be the basis of the next frontier base.
1018        let mut pending_shifts: Vec<(NodeIndex, S)> = vec![];
1019
1020        // A queue of reductions that need to be done per subfrontier.
1021        let mut pending_reductions: BTreeMap<(usize, TK), VecDeque<Reduction<P>>> =
1022            Default::default();
1023
1024        let mut accepted_heads: Vec<NodeIndex> = vec![];
1025
1026        while !frontier_base.is_empty() {
1027            let mut frontier = self.create_frontier(&mut gss, &frontier_base, input);
1028            // Create initial shifts/reductions for this frontier
1029            self.initial_process_frontier(
1030                &mut gss,
1031                &frontier,
1032                &mut pending_reductions,
1033                &mut pending_shifts,
1034                &mut accepted_heads,
1035            );
1036            for ((position, token_kind), subfrontier) in frontier.iter_mut() {
1037                log!(
1038                    "\n{} {:?} {} {}.",
1039                    "Reducing for subfrontier for token".red(),
1040                    token_kind,
1041                    "at position".red(),
1042                    position
1043                );
1044                // Reduce everything that is possible for this subfrontier
1045                self.reducer(
1046                    &mut gss,
1047                    pending_reductions
1048                        .entry((*position, *token_kind))
1049                        .or_default(),
1050                    &mut pending_shifts,
1051                    &mut accepted_heads,
1052                    subfrontier,
1053                );
1054            }
1055            frontier_idx += 1;
1056            // Do shifts and create the next base frontier
1057            let fb = self.shifter(&mut gss, &mut pending_shifts, frontier_idx);
1058            if fb.is_empty() {
1059                last_frontier_base = frontier_base;
1060            }
1061            frontier_base = fb;
1062        }
1063
1064        if !accepted_heads.is_empty() {
1065            // self.success(gss, accepted_heads)
1066            let forest = self.create_forest(gss, accepted_heads);
1067            log!(
1068                "\n{}. {}",
1069                "Finished".red(),
1070                format!("{} solutions found.", forest.solutions()).green()
1071            );
1072            Ok(forest)
1073        } else {
1074            Err(self.make_error(gss, input, last_frontier_base))
1075        }
1076    }
1077
1078    fn parse_file<'a, F: AsRef<std::path::Path>>(&'a mut self, file: F) -> Result<Self::Output>
1079    where
1080        'a: 'i,
1081    {
1082        self.content = Some(I::read_file(file.as_ref())?);
1083        self.file_name = file.as_ref().to_string_lossy().into();
1084        let parsed = self.parse(self.content.as_ref().unwrap().borrow());
1085        parsed
1086    }
1087}