beans/parser/
earley.rs

1use super::ast::{
2    Ast, Attribute as AstAttribute, Element as AstElement, Expression, Item, Proxy as AstProxy,
3    Rule as AstRule, ToplevelDeclaration,
4};
5use super::grammar::{
6    Attribute, Axioms, Element, ElementType, NonTerminalDescription, NonTerminalName,
7    Nullables, Proxy, Rule, RuleId, Rules, ValueTemplate,
8};
9use super::parser::{NonTerminalId, ParseResult, Parser, Value, AST};
10use crate::typed::Spanned;
11use crate::{
12    build_system,
13    builder::{select_format, Buildable, FileResult, Format},
14    error::{Error, ErrorKind, Result},
15    lexer::{Grammar as LexerGrammar, LexedStream, Lexer, TerminalId, Token},
16    list::List,
17    regex::Allowed,
18    span::Span,
19    stream::StringStream,
20    typed::Tree,
21};
22use bincode::deserialize;
23use fragile::Fragile;
24use itertools::Itertools;
25use newty::{newty, nvec};
26use serde::{Deserialize, Serialize};
27use std::cmp::{Ordering, Reverse};
28use std::collections::VecDeque;
29use std::collections::hash_map::Entry;
30use std::collections::{HashMap, HashSet};
31use std::fmt;
32use std::fs::File;
33use std::io::Read;
34use std::path::{Path, PathBuf};
35use std::rc::Rc;
36
37pub fn print_sets(sets: &[StateSet], parser: &EarleyParser, lexer: &Lexer) {
38    for (i, set) in sets.iter().enumerate() {
39        println!("=== {} ===", i);
40        for item in set.slice() {
41            let mut line = String::new();
42            let rule = &parser.grammar().rules[item.rule];
43            line.push_str(&parser.grammar().name_of[rule.id]);
44            line.push_str(" -> ");
45            for i in 0..item.position {
46                line.push_str(&rule.elements[i].name(lexer.grammar(), parser.grammar()));
47                line.push(' ');
48            }
49            line.push_str("• ");
50            for i in item.position..rule.elements.len() {
51                line.push_str(&rule.elements[i].name(lexer.grammar(), parser.grammar()));
52                line.push(' ');
53            }
54            line.extend(format!("({})", item.origin).chars());
55            println!("{}", line);
56        }
57        println!();
58    }
59}
60
61pub fn print_final_sets(sets: &[FinalSet], parser: &EarleyParser, lexer: &Lexer) {
62    for (i, set) in sets.iter().enumerate() {
63        println!("=== {} ===", i);
64        for item in &set.set.0 {
65            let rule = &parser.grammar().rules[item.rule];
66            print!("{} -> ", parser.grammar().name_of[rule.id]);
67            for element in rule.elements.iter() {
68                print!("{}", element.name(lexer.grammar(), parser.grammar()));
69                match &element.attribute {
70                    Attribute::Indexed(i) => print!(".{}", i),
71                    Attribute::Named(n) => print!(".{}", n),
72                    Attribute::None => {}
73                }
74                if let Some(key) = &element.key {
75                    print!("@{}", key);
76                }
77                print!(" ");
78            }
79            println!("({})", item.end);
80        }
81        println!();
82    }
83}
84
85type Table = Vec<StateSet>;
86type Forest = Vec<FinalSet>;
87
88newty! {
89    #[derive(serde::Serialize, serde::Deserialize)]
90    pub vec RulesMap(Vec<RuleId>)[NonTerminalId]
91}
92
93newty! {
94    pub vec IsIn(Vec<RuleId>)[NonTerminalId]
95}
96
97/// # Summary
98/// `EarleyItem` is partially recognized handle.
99/// If `item.rule` refers to `β → α_1…α_n`, the item is `β → α_1…α_{i-1} · α_i…α_n (j)`
100/// where `i=item.position` and `j=item.origin`.
101#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
102struct EarleyItem {
103    /// `rule` is the identifier of the associated [`Rule`].
104    rule: RuleId,
105    /// `origin` is the identifier of the `EarleySet` this item was originated in.
106    origin: usize,
107    /// `position` is the advancement of the current item. It corresponds to the position of the fat dot.
108    position: usize,
109    /// `parent_has_been_shown` indicates whether a parent item should be reported in case
110    /// of failure. It should *not* be showed if it has no description, if it hash
111    /// matched something already, or if a parent item is already a candidate
112    /// for the error message.
113    parent_has_been_shown: bool,
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
117struct FinalItem {
118    /// `rule` is the identifier of the associated [`Rule`]
119    rule: RuleId,
120    end: usize,
121}
122
123impl fmt::Display for FinalItem {
124    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::result::Result<(), fmt::Error> {
125        write!(f, "#{}\t\t({})", self.rule, self.end)
126    }
127}
128
129/// # Summary
130/// `EarleyGrammar` is a grammar that uses the Earley algorithm.
131/// The general worst-time complexity for a context-free grammar is `O(n³)`.
132/// For an unambiguous grammar, the worst-time complexity is `O(n²)`.
133/// For an `LR(k)` grammar, if the Johnson algorithm is applied (which is currently not), the complexity is
134/// `O(n)`.
135/// If it is not applied, the complexity is `O(n)` unless there is right-recursion, in which case the
136/// complexity is `O(n²)`.
137#[derive(Serialize, Deserialize, Debug)]
138pub struct EarleyGrammar {
139    /// The axioms, indexed by RuleId.
140    axioms: Axioms,
141    /// The rules. The rule index is its identifier.
142    rules: Rules,
143    /// The nullables, indexed by NonTerminalId.
144    nullables: Nullables,
145    /// Maps the name of a non-terminal to its identifier.
146    id_of: HashMap<Rc<str>, NonTerminalId>,
147    /// Maps the non-terminal to its name
148    name_of: NonTerminalName,
149    description_of: NonTerminalDescription,
150    /// Maps the identifier of a non-terminal to the identifiers of its rules.
151    /// Its rules are the rules of which it is the LHS.
152    rules_of: RulesMap,
153}
154
155impl EarleyGrammar {
156    fn has_rules(&self, id: NonTerminalId) -> &[RuleId] {
157        &self.rules_of[id]
158    }
159}
160
161impl EarleyGrammar {
162    pub fn new(
163        rules: Rules,
164        axioms: Axioms,
165        id_of: HashMap<Rc<str>, NonTerminalId>,
166        name_of: NonTerminalName,
167        description_of: NonTerminalDescription,
168    ) -> Result<Self> {
169        let nb_non_terminals = axioms.len_as(); // Number of non terminals
170                                                // nullables[non_term_id]: bool is whether non terminal with
171                                                // this id is nullable, meaning it can match ε (empty string).
172        let mut nullables = Nullables::with_capacity(nb_non_terminals);
173        // rules_of[non_term_id]: [rule_id] is a Vec containing all the rules whose LHS is the non terminal
174        // this id.
175        let mut rules_of = nvec![RulesMap Vec::new(); nb_non_terminals];
176        // is_in[non_term_id]: [rule_id] is a Vec containing all the rules whose RHS contains the non terminal
177        // with this id.
178        let mut is_in = nvec![IsIn Vec::new(); nb_non_terminals];
179        let mut stack = VecDeque::with_capacity(is_in.len());
180        for (i, rule) in rules.iter().enumerate() {
181            let rule_id = RuleId(i);
182            let lhs_id = rule.id;
183            rules_of[lhs_id].push(rule_id);
184            if rule.elements.is_empty() {
185                nullables.insert(lhs_id);
186                stack.push_front(lhs_id);
187            }
188            for element in rule.elements.iter() {
189                if let ElementType::NonTerminal(rhs_id) = element.element_type {
190                    is_in[rhs_id].push(rule_id);
191                }
192            }
193        }
194
195        while let Some(current) = stack.pop_back() {
196            for &rule_id in &is_in[current] {
197                let lhs_id = rules[rule_id].id;
198                if !nullables.contains(lhs_id)
199                    && rules[rule_id].elements.iter().all(|element| {
200                        match element.element_type {
201                            ElementType::NonTerminal(id) => nullables.contains(id),
202                            _ => false,
203                        }
204                    })
205                {
206                    nullables.insert(lhs_id);
207                    stack.push_front(lhs_id);
208                }
209            }
210        }
211
212        Ok(Self {
213            axioms,
214            rules,
215            nullables,
216            id_of,
217            name_of,
218            description_of,
219            rules_of,
220        })
221    }
222
223    pub fn name_of(&self, id: NonTerminalId) -> Rc<str> {
224        self.name_of[id].clone()
225    }
226
227    pub fn description_of(&self, id: NonTerminalId) -> Option<Rc<str>> {
228        self.description_of[id].as_ref().cloned()
229    }
230
231    pub fn id_of(&self, name: Rc<str>) -> NonTerminalId {
232        self.id_of[&name]
233    }
234}
235
236impl EarleyGrammar {
237    const PLAIN_EXTENSION: &str = "gr";
238    const COMPILED_EXTENSION: &str = "cgr";
239    const AST_EXTENSION: &str = "gr.ast";
240
241    pub fn build_from_compiled(
242        blob: &[u8],
243        path: impl ToOwned<Owned = PathBuf>,
244    ) -> Result<Self> {
245        deserialize(blob).map_err(|error| Error::with_file(error, path.to_owned()))
246    }
247
248    pub fn build_from_ast(ast: AST, lexer_grammar: &LexerGrammar) -> Result<Self> {
249        type InvokedMacros = HashMap<(Rc<str>, Rc<[ElementType]>), NonTerminalId>;
250        type MacroDeclarations = HashMap<Rc<str>, (Vec<Spanned<Rc<str>>>, Vec<AstRule>, Span)>;
251        type FoundNonTerminals = HashMap<Rc<str>, (NonTerminalId, Span)>;
252
253        let typed_ast = Ast::read(ast)?;
254        // `macro_declarations` holds every macro declaration found in a grammar. This will be
255        // used to invoke the macros.
256        //
257        // It maps a macro name to a tuple containing the name of its arguments, and the rules
258        // it produces.
259        let mut macro_declarations = HashMap::new();
260        let mut non_terminal_declarations = Vec::new();
261        // `found_nonterminals` contains the *name* of the nonterminals found.
262        // It's useful to decide, as soon as we find a name later on, whether it's a terminal,
263        // a non-terminal, or something undefined (and therefore to raise an error).
264        let mut found_nonterminals = HashMap::new();
265        let mut available_id = NonTerminalId(0);
266        let mut id_of = HashMap::new();
267        let mut name_of = NonTerminalName::new();
268        let mut description_of = NonTerminalDescription::new();
269
270        for decl in typed_ast.decls {
271            match decl.inner {
272                ToplevelDeclaration::Macro(macro_decl) => {
273                    if let Some((_, _, old_span)) = macro_declarations.insert(
274                        macro_decl.name.inner.clone(),
275                        (
276                            macro_decl.args,
277                            macro_decl.rules,
278                            macro_decl.name.span.clone(),
279                        ),
280                    ) {
281                        return ErrorKind::GrammarDuplicateMacroDefinition {
282                            span: macro_decl.name.span.into(),
283                            old_span: old_span.into(),
284                            name: macro_decl.name.inner.to_string(),
285                        }
286                        .err();
287                    }
288                }
289                ToplevelDeclaration::Decl(decl) => {
290                    let id = available_id.next();
291                    if let Some((_, old_span)) = found_nonterminals
292                        .insert(decl.name.inner.clone(), (id, decl.name.span.clone()))
293                    {
294                        return ErrorKind::GrammarDuplicateDefinition {
295                            name: decl.name.inner.to_string(),
296                            span: decl.name.span.into(),
297                            old_span: old_span.into(),
298                        }
299                        .err();
300                    }
301                    id_of.insert(decl.name.inner.clone(), id);
302                    name_of.push(decl.name.inner.clone());
303                    description_of.push(decl.comment.as_ref().map(|o| o.inner.clone()));
304                    non_terminal_declarations.push((decl, id));
305                }
306            }
307        }
308
309        fn eval_rule(
310            rule: &AstRule,
311            macro_id: NonTerminalId,
312            available_id: &mut NonTerminalId,
313            rules: &mut Rules,
314            invoked_macros: &mut InvokedMacros,
315            name_of: &mut NonTerminalName,
316            description_of: &mut NonTerminalDescription,
317            id_of: &mut HashMap<Rc<str>, NonTerminalId>,
318            found_nonterminals: &FoundNonTerminals,
319            macro_declarations: &MacroDeclarations,
320            scope: &HashMap<Rc<str>, ElementType>,
321            lexer_grammar: &LexerGrammar,
322        ) -> Result<Rule> {
323            let mut new_elements = Vec::with_capacity(rule.elements.len());
324            for element in rule.elements.iter() {
325                let el = eval_element(
326                    element,
327                    macro_id,
328                    available_id,
329                    rules,
330                    invoked_macros,
331                    name_of,
332                    description_of,
333                    id_of,
334                    found_nonterminals,
335                    macro_declarations,
336                    scope,
337                    lexer_grammar,
338                )?;
339                new_elements.push(el);
340            }
341            let proxy = eval_proxy(
342                &rule.proxy,
343                found_nonterminals,
344            )?;
345            Ok(Rule::new(
346                macro_id,
347                new_elements,
348                proxy,
349                rule.left_associative
350                    .as_ref()
351                    .map(|Spanned { inner, .. }| (*inner).into())
352                    .unwrap_or(true),
353            ))
354        }
355
356        fn invoke_macro(
357            name: Spanned<Rc<str>>,
358            args: Rc<[ElementType]>,
359            macro_id: NonTerminalId,
360            available_id: &mut NonTerminalId,
361            rules: &mut Rules,
362            invoked_macros: &mut InvokedMacros,
363            name_of: &mut NonTerminalName,
364            description_of: &mut NonTerminalDescription,
365            id_of: &mut HashMap<Rc<str>, NonTerminalId>,
366            found_nonterminals: &FoundNonTerminals,
367            macro_declarations: &MacroDeclarations,
368            lexer_grammar: &LexerGrammar,
369        ) -> Result<()> {
370            let Some((arg_names, macro_rules, definition_span)) = macro_declarations.get(&name.inner) else {
371		return ErrorKind::GrammarUndefinedMacro {
372		    name: name.inner.to_string(),
373		    span: name.span.into(),
374		}
375		.err();
376	    };
377
378            if args.len() != arg_names.len() {
379                return ErrorKind::GrammarArityMismatch {
380                    macro_name: name.inner.to_string(),
381                    definition_arity: arg_names.len(),
382                    call_arity: args.len(),
383                    definition_span: definition_span.into(),
384                    call_span: name.span.into(),
385                }
386                .err();
387            }
388
389            let scope = arg_names
390                .iter()
391                .map(|x| x.inner.clone())
392                .zip(args.iter().copied())
393                .collect();
394
395            for rule in macro_rules {
396                let actual_rule = eval_rule(
397                    rule,
398                    macro_id,
399                    available_id,
400                    rules,
401                    invoked_macros,
402                    name_of,
403                    description_of,
404                    id_of,
405                    found_nonterminals,
406                    macro_declarations,
407                    &scope,
408                    lexer_grammar,
409                )?;
410                rules.push(actual_rule);
411            }
412            Ok(())
413        }
414
415        fn eval_expression(
416            expr: &Spanned<Item>,
417            self_id: NonTerminalId,
418            available_id: &mut NonTerminalId,
419            rules: &mut Rules,
420            invoked_macros: &mut InvokedMacros,
421            name_of: &mut NonTerminalName,
422            description_of: &mut NonTerminalDescription,
423            id_of: &mut HashMap<Rc<str>, NonTerminalId>,
424            found_nonterminals: &FoundNonTerminals,
425            macro_declarations: &MacroDeclarations,
426            scope: &HashMap<Rc<str>, ElementType>,
427            lexer_grammar: &LexerGrammar,
428        ) -> Result<ElementType> {
429            let res = match &expr.inner {
430                Item::SelfNonTerminal => ElementType::NonTerminal(self_id),
431                Item::Regular { name } => {
432                    if let Some(element) = scope.get(&name.inner) {
433                        *element
434                    } else if let Some((id, _)) = found_nonterminals.get(&name.inner) {
435                        // The item is a non terminal
436                        ElementType::NonTerminal(*id)
437                    } else if let Some(id) = lexer_grammar.id(&name.inner) {
438                        ElementType::Terminal(id)
439                    } else {
440                        return ErrorKind::GrammarUndefinedNonTerminal {
441                            name: name.inner.to_string(),
442                            span: name.span.clone().into(),
443                        }
444                        .err();
445                    }
446                }
447                Item::MacroInvocation { name, arguments } => {
448                    let mut args = Vec::new();
449                    for arg in arguments {
450                        let evaled = eval_expression(
451                            arg,
452                            self_id,
453                            available_id,
454                            rules,
455                            invoked_macros,
456                            name_of,
457                            description_of,
458                            id_of,
459                            found_nonterminals,
460                            macro_declarations,
461                            scope,
462                            lexer_grammar,
463                        )?;
464                        args.push(evaled);
465                    }
466                    let args: Rc<[_]> = Rc::from(args);
467                    if let Entry::Vacant(e) = invoked_macros.entry((name.inner.clone(), args.clone())) {
468                        let id = available_id.next();
469                        let mut complete_name = name.inner.to_string();
470                        complete_name.push('[');
471                        complete_name.extend(
472                            args.iter()
473                                .map(|element| match element {
474                                    ElementType::NonTerminal(id) => &*name_of[*id],
475                                    ElementType::Terminal(id) => lexer_grammar.name(*id),
476                                })
477                                .intersperse(", "),
478                        );
479                        complete_name.push(']');
480                        let complete_name: Rc<str> = Rc::from(complete_name);
481                        id_of.insert(complete_name.clone(), id);
482                        name_of.push(complete_name);
483                        description_of.push(None);
484                        e.insert(id);
485                        invoke_macro(
486                            name.clone(),
487                            args.clone(),
488                            id,
489                            available_id,
490                            rules,
491                            invoked_macros,
492                            name_of,
493                            description_of,
494                            id_of,
495                            found_nonterminals,
496                            macro_declarations,
497                            lexer_grammar,
498                        )?;
499                    }
500                    ElementType::NonTerminal(invoked_macros[&(name.inner.clone(), args)])
501                }
502            };
503            Ok(res)
504        }
505
506        fn eval_element(
507            element: &AstElement,
508            id: NonTerminalId,
509            available_id: &mut NonTerminalId,
510            rules: &mut Rules,
511            invoked_macros: &mut InvokedMacros,
512            name_of: &mut NonTerminalName,
513            description_of: &mut NonTerminalDescription,
514            id_of: &mut HashMap<Rc<str>, NonTerminalId>,
515            found_nonterminals: &HashMap<Rc<str>, (NonTerminalId, Span)>,
516            macro_declarations: &MacroDeclarations,
517            scope: &HashMap<Rc<str>, ElementType>,
518            lexer_grammar: &LexerGrammar,
519        ) -> Result<Element> {
520            let attribute = match &element.attribute {
521                Some(AstAttribute {
522                    attribute,
523                    named: Spanned { inner: true, .. },
524                    span: _span,
525                }) => Attribute::Named(attribute.inner.clone()),
526                Some(AstAttribute {
527                    attribute,
528                    named: Spanned { inner: false, .. },
529                    span,
530                }) => {
531                    let index =
532                        attribute
533                            .inner
534                            .parse()
535                            .map_err(|_| ErrorKind::IntegerTooBig {
536                                string: attribute.inner.to_string(),
537                                span: span.into(),
538                            })?;
539                    Attribute::Indexed(index)
540                }
541                None => Attribute::None,
542            };
543            let key = element.key.as_ref().map(|k| k.0.clone());
544            let element_type = eval_expression(
545                &element.item,
546                id,
547                available_id,
548                rules,
549                invoked_macros,
550                name_of,
551                description_of,
552                id_of,
553                found_nonterminals,
554                macro_declarations,
555                scope,
556                lexer_grammar,
557            )?;
558            Ok(Element::new(attribute, key.map(|o| o.inner), element_type))
559        }
560
561        fn eval_proxy(
562            proxy: &AstProxy,
563            found_nonterminals: &FoundNonTerminals,
564        ) -> Result<Proxy> {
565            let mut actual_proxy = HashMap::new();
566            if let Some(ref variant) = proxy.variant {
567                actual_proxy.insert(
568                    "variant".into(),
569                    ValueTemplate::String(variant.inner.clone()),
570                );
571            }
572            for (key, (expression, _)) in proxy.items.iter() {
573                let value = match &expression.inner {
574                    Expression::String(string) => ValueTemplate::String(string.clone()),
575                    Expression::Id(id) => ValueTemplate::Variable(id.clone()),
576                    Expression::Instanciation {
577                        name,
578                        children,
579                        variant,
580                    } => {
581                        let Some((nonterminal, _)) = found_nonterminals.get(&name.inner) else {
582			    return ErrorKind::GrammarUndefinedNonTerminal {
583				name: name.inner.to_string(),
584				span: name.span.clone().into()
585			    }
586			    .err(); 
587			};
588
589                        let fake_proxy = AstProxy {
590                            variant: variant.as_ref().cloned(),
591                            items: children.clone(),
592                            span: expression.span.clone(),
593                        };
594                        let attributes = eval_proxy(
595                            &fake_proxy,
596                            found_nonterminals,
597                        )?;
598                        ValueTemplate::InlineRule {
599                            non_terminal: *nonterminal,
600                            attributes,
601                        }
602                    }
603                };
604                actual_proxy.insert(key.clone(), value);
605            }
606            Ok(actual_proxy)
607        }
608
609        let mut invoked_macros: InvokedMacros = HashMap::new();
610        let mut found_axioms = Vec::new();
611        let mut rules = Rules::new();
612        let empty_scope = HashMap::new();
613        for (declaration, id) in non_terminal_declarations {
614            if declaration.axiom.inner {
615                found_axioms.push(id);
616            }
617            for rule in declaration.rules {
618                let parsed_rule = eval_rule(
619                    &rule,
620                    id,
621                    &mut available_id,
622                    &mut rules,
623                    &mut invoked_macros,
624                    &mut name_of,
625                    &mut description_of,
626                    &mut id_of,
627                    &found_nonterminals,
628                    &macro_declarations,
629                    &empty_scope,
630                    lexer_grammar,
631                )?;
632                rules.push(parsed_rule);
633            }
634        }
635        let mut axioms = Axioms::with_capacity(available_id.next());
636        for axiom in found_axioms {
637            axioms.put(axiom);
638        }
639        let res = Self::new(rules, axioms, id_of, name_of, description_of)?;
640        Ok(res)
641    }
642
643    pub fn build_from_plain(
644        mut source: StringStream,
645        lexer_grammar: &LexerGrammar,
646    ) -> Result<Self> {
647        let (lexer, parser) = build_system!(
648            lexer => "parser.clx",
649            parser => "parser.cgr",
650        )?;
651        let mut input = lexer.lex(&mut source);
652        let result = parser.parse(&mut input)?;
653        let grammar = Self::build_from_ast(result.tree, lexer_grammar)?;
654        Ok(grammar)
655    }
656
657    pub fn build_from_path(path: &Path, lexer_grammar: &LexerGrammar) -> Result<Self> {
658        let ast: AST = match select_format(
659            path,
660            &[
661                (Self::COMPILED_EXTENSION, Format::Compiled),
662                (Self::AST_EXTENSION, Format::Ast),
663                (Self::PLAIN_EXTENSION, Format::Plain),
664            ],
665        ) {
666            FileResult::Valid((actual_path, Format::Ast)) => {
667                let file = File::open(&actual_path)
668                    .map_err(|error| Error::with_file(error, &actual_path))?;
669                serde_json::from_reader(file).map_err(|error| ErrorKind::IllformedAst {
670                    error,
671                    path: actual_path,
672                })?
673            }
674            FileResult::Valid((actual_path, Format::Plain)) => {
675                let stream = StringStream::from_file(actual_path)?;
676                let result = Self::build_from_plain(stream, lexer_grammar)?;
677                return Ok(result);
678            }
679            FileResult::Valid((actual_path, Format::Compiled)) => {
680                let mut file = File::open(&actual_path)
681                    .map_err(|err| Error::with_file(err, &actual_path))?;
682                let mut buffer = Vec::new();
683                file.read_to_end(&mut buffer)
684                    .map_err(|err| Error::with_file(err, &actual_path))?;
685                let result = Self::build_from_compiled(&buffer, actual_path)?;
686                return Ok(result);
687            }
688            FileResult::WrongExtension(extension) => {
689                return ErrorKind::UnrecognisedExtension {
690                    extension,
691                    path: path.to_owned(),
692                }
693                .err();
694            }
695            FileResult::NonExisting => {
696                return ErrorKind::GrammarNotFound {
697                    path: path.to_owned(),
698                }
699                .err();
700            }
701        };
702        let parser = Self::build_from_ast(ast, lexer_grammar)?;
703        Ok(parser)
704    }
705
706    pub fn build_from_blob(
707        blob: &[u8],
708        path: &Path,
709        lexer_grammar: &LexerGrammar,
710    ) -> Result<Self> {
711        let ast: AST = match select_format(
712            path,
713            &[
714                (Self::COMPILED_EXTENSION, Format::Compiled),
715                (Self::AST_EXTENSION, Format::Ast),
716                (Self::PLAIN_EXTENSION, Format::Plain),
717            ],
718        ) {
719            FileResult::Valid((actual_path, Format::Compiled)) => {
720                let result = Self::build_from_compiled(blob, actual_path)?;
721                return Ok(result);
722            }
723            FileResult::Valid((actual_path, Format::Ast)) => {
724                let string = std::str::from_utf8(blob)
725		    .map_err(|error| Error::with_file(error, actual_path.clone()))?;
726                serde_json::from_str(string).map_err(|error| Error::with_file(error, actual_path))?
727            }
728            FileResult::Valid((actual_path, Format::Plain)) => {
729                let string = String::from_utf8(blob.to_vec())
730		    .map_err(|error| Error::with_file(error, &actual_path))?;
731                let stream = StringStream::new(actual_path, string);
732                let result = Self::build_from_plain(stream, lexer_grammar)?;
733                return Ok(result);
734            }
735            FileResult::NonExisting => {
736                return ErrorKind::GrammarNotFound {
737                    path: path.to_owned(),
738                }
739                .err();
740            }
741            FileResult::WrongExtension(extension) => {
742                return ErrorKind::UnrecognisedExtension {
743                    extension,
744                    path: path.to_owned(),
745                }
746                .err();
747            }
748        };
749        let grammar = Self::build_from_ast(ast, lexer_grammar)?;
750        Ok(grammar)
751    }
752}
753
754newty! {
755    id FinalItemId
756}
757newty! {
758    #[derive(PartialEq, Eq, Clone)]
759    vec FinalSetVec(FinalItem)[FinalItemId]
760}
761newty! {
762    #[derive(Clone)]
763    map FinalSetIndex(Vec<FinalItemId>)[NonTerminalId]
764}
765
766#[derive(Default, Debug, Clone, Eq)]
767pub struct FinalSet {
768    /// An index mapping a nonterminal to every item in the set derived from that nonterminal.
769    index: FinalSetIndex,
770    /// The set of items.
771    set: FinalSetVec,
772    /// The starting position of every item in this set, in the raw input.
773    position: usize,
774}
775
776impl PartialEq for FinalSet {
777    fn eq(&self, rhs: &FinalSet) -> bool {
778        self.set == rhs.set && self.position == rhs.position
779    }
780}
781
782impl FinalSet {
783    fn add(&mut self, item: FinalItem, grammar: &EarleyGrammar) {
784        self.index
785            .0
786            .entry(grammar.rules[item.rule].id)
787            .or_default()
788            .push(self.set.len_as());
789        self.set.push(item);
790    }
791
792    fn iter(&self) -> impl Iterator<Item = &FinalItem> + '_ {
793        self.set.iter()
794    }
795}
796
797impl std::fmt::Display for FinalSet {
798    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::result::Result<(), fmt::Error> {
799        write!(
800            f,
801            r"== ({}) ==
802{}",
803            self.position,
804            self.set
805                .iter()
806                .map(|item| format!("{}", item))
807                .collect::<Vec<_>>()
808                .join("\n")
809        )
810    }
811}
812
813#[derive(Default, Debug)]
814pub struct StateSet {
815    cache: HashSet<EarleyItem>,
816    set: Vec<EarleyItem>,
817    position: usize,
818}
819
820impl StateSet {
821    fn add(&mut self, item: EarleyItem) {
822        if !self.cache.contains(&item) {
823            self.cache.insert(item);
824            self.set.push(item);
825        }
826    }
827
828    fn next(&mut self) -> Option<&EarleyItem> {
829        if let Some(e) = self.set.get(self.position) {
830            self.position += 1;
831            Some(e)
832        } else {
833            None
834        }
835    }
836
837    fn is_empty(&self) -> bool {
838        self.set.is_empty()
839    }
840
841    fn slice(&self) -> &[EarleyItem] {
842        &self.set
843    }
844
845    fn iter(&self) -> impl Iterator<Item = &EarleyItem> + '_ {
846        self.set.iter()
847    }
848}
849
850#[derive(Clone, Debug)]
851struct SyntaxicItem {
852    kind: SyntaxicItemKind,
853    start: usize,
854    end: usize,
855}
856
857#[derive(Clone, Debug)]
858enum SyntaxicItemKind {
859    Rule(RuleId),
860    Token(Token),
861}
862
863// #[derive(Debug, Clone)]
864// struct ScanItem<'a> {
865//     item: &'a FinalItem,
866//     depth: usize,
867//     nodes_so_far: Vec<AST>,
868// }
869
870// #[derive(Debug)]
871// struct SearchItem<'a> {
872//     /// Index of the next scan in the raw input.
873//     position: usize,
874//     /// Current item to be searched for.
875//     current: ScanItem<'a>,
876//     /// Items scanned so far.
877//     stack: Vec<ScanItem<'a>>,
878// }
879
880// #[derive(Debug)]
881// struct ScItem {
882//     rule: usize,
883//     start: usize,
884//     end: usize,
885//     depth: usize,
886// }
887
888// #[derive(Debug)]
889// struct SItem {
890//     current: ScItem,
891//     items: Vec<ScItem>,
892// }
893
894/// # Summary
895/// [`EarleyParser`] is the parser related to the [`EarleyGrammar`](EarleyGrammar).
896#[derive(Debug)]
897pub struct EarleyParser {
898    grammar: EarleyGrammar,
899}
900
901impl EarleyParser {
902    pub fn build_from_blob(
903        blob: &[u8],
904        path: &Path,
905        lexer_grammar: &LexerGrammar,
906    ) -> Result<Self> {
907        let grammar = EarleyGrammar::build_from_blob(blob, path, lexer_grammar)?;
908        Ok(Self { grammar })
909    }
910
911    fn find_children(
912        &self,
913        element: SyntaxicItem,
914        forest: &[FinalSet],
915        raw_input: &[Token],
916    ) -> Vec<SyntaxicItem> {
917        match element.kind {
918            SyntaxicItemKind::Rule(rule) => {
919                let mut boundary = vec![(List::default(), element.start)];
920                for elem in self.grammar.rules[rule].elements.iter() {
921                    let mut next_boundary = Vec::new();
922                    for (children, curpos) in boundary.drain(..) {
923                        match elem.element_type {
924                            ElementType::NonTerminal(id) => {
925                                if let Some(rules) = forest[curpos].index.get(&id) {
926                                    for final_item in rules
927                                        .iter()
928                                        .map(|&rule| &forest[curpos].set[rule])
929                                        .filter(|final_item| final_item.end <= element.end)
930                                    {
931                                        next_boundary.push((
932                                            children.cons(SyntaxicItem {
933                                                kind: SyntaxicItemKind::Rule(final_item.rule),
934                                                start: curpos,
935                                                end: final_item.end,
936                                            }),
937                                            final_item.end,
938                                        ))
939                                    }
940                                }
941                            }
942                            ElementType::Terminal(id)
943                                if curpos < element.end && raw_input[curpos].id() == id =>
944                            {
945                                next_boundary.push((
946                                    children.cons(SyntaxicItem {
947                                        kind: SyntaxicItemKind::Token(
948                                            raw_input[curpos].clone(),
949                                        ),
950                                        start: curpos,
951                                        end: curpos + 1,
952                                    }),
953                                    curpos + 1,
954                                ))
955                            }
956                            _ => {}
957                        }
958                    }
959                    boundary.extend(next_boundary.into_iter().rev());
960                }
961                let children = boundary
962                    .into_iter()
963                    .filter_map(|(children, pos)| {
964                        if pos == element.end {
965                            Some(children)
966                        } else {
967                            None
968                        }
969                    })
970                    .max_by(|left_children, right_children| {
971                        for (left, right) in left_children.iter().zip(right_children.iter()) {
972                            let SyntaxicItemKind::Rule(left_rule) = left.kind else {
973				continue;
974			    };
975                            let SyntaxicItemKind::Rule(right_rule) = right.kind else {
976				continue;
977			    };
978                            let assoc_ord = if self.grammar.rules[rule].left_associative {
979                                left.start.cmp(&right.start)
980                            } else {
981                                right.start.cmp(&left.start)
982                            };
983                            let ord = match assoc_ord {
984                                Ordering::Equal => left_rule.cmp(&right_rule),
985                                other => other,
986                            };
987                            match ord {
988                                Ordering::Equal => continue,
989                                other => return other,
990                            }
991                        }
992                        Ordering::Equal
993                    })
994                    .unwrap();
995                children
996                    .iter()
997                    .cloned()
998                    .collect::<Vec<_>>()
999                    .into_iter()
1000                    .rev()
1001                    .collect()
1002            }
1003            SyntaxicItemKind::Token(_) => Vec::new(),
1004        }
1005    }
1006
1007    fn build_ast(
1008        &self,
1009        item: SyntaxicItem,
1010        forest: &[FinalSet],
1011        raw_input: &[Token],
1012        last_span: &Span,
1013    ) -> AST {
1014        match item.kind {
1015            SyntaxicItemKind::Rule(rule) => {
1016                let span = if raw_input.is_empty() {
1017                    last_span.clone()
1018                } else if item.end == item.start {
1019                    raw_input[item.start].span().clone()
1020                } else {
1021                    raw_input[item.start]
1022                        .span()
1023                        .sup(raw_input[item.end - 1].span())
1024                };
1025                let all_attributes = self
1026                    .find_children(item, forest, raw_input)
1027                    .into_iter()
1028                    .map(|item| self.build_ast(item, forest, raw_input, last_span))
1029                    .zip(self.grammar.rules[rule].elements.iter())
1030                    .filter_map(|(item, element)| {
1031                        element.key.as_ref().map(|key| match &element.attribute {
1032                            Attribute::Named(attr) => {
1033                                let AST::Node { attributes, .. } = item else {
1034				    unreachable!("{item:?}.{attr}")
1035				};
1036                                (key.clone(), attributes[attr].clone())
1037                            }
1038                            Attribute::Indexed(idx) => {
1039                                let AST::Terminal(token) = item else {
1040				    unreachable!("{item:?}.{idx}")
1041				};
1042                                (
1043                                    key.clone(),
1044                                    AST::Literal {
1045                                        value: Value::Str(Rc::from(
1046                                            token.attributes()[idx].as_str(),
1047                                        )),
1048                                        span: Some(token.span().clone()),
1049                                    },
1050                                )
1051                            }
1052                            Attribute::None => (key.clone(), item),
1053                        })
1054                    })
1055                    .collect::<HashMap<Rc<str>, _>>();
1056                let mut removed: HashSet<Rc<str>> = HashSet::new();
1057                let nonterminal = self.grammar.rules[rule].id;
1058                let mut attributes: HashMap<_, _> = self.grammar.rules[rule]
1059                    .proxy
1060                    .iter()
1061                    .map(|(key, wanted)| {
1062                        (
1063                            key.clone(),
1064                            wanted.evaluate(&all_attributes, &mut removed, &span),
1065                        )
1066                    })
1067                    .collect();
1068                attributes.extend(
1069                    all_attributes
1070                        .into_iter()
1071                        .filter(|(key, _)| !removed.contains(key)),
1072                );
1073                AST::Node {
1074                    nonterminal,
1075                    attributes,
1076                    span,
1077                }
1078            }
1079            SyntaxicItemKind::Token(token) => AST::Terminal(token),
1080        }
1081    }
1082
1083    /// Select one AST, assuming there is one.
1084    pub fn select_ast(
1085        &self,
1086        forest: &[FinalSet],
1087        raw_input: &[Token],
1088        last_span: &Span,
1089    ) -> AST {
1090        forest[0]
1091            .iter()
1092            .filter(|item| {
1093                item.end == raw_input.len()
1094                    && self
1095                        .grammar
1096                        .axioms
1097                        .contains(self.grammar.rules[item.rule].id)
1098            })
1099            .sorted_unstable_by_key(|item| Reverse(item.rule))
1100            .map(|item| SyntaxicItem {
1101                start: 0,
1102                end: raw_input.len(),
1103                kind: SyntaxicItemKind::Rule(item.rule),
1104            })
1105            .map(|item| self.build_ast(item, forest, raw_input, last_span))
1106            .next()
1107            .unwrap()
1108    }
1109
1110    pub fn to_forest(&self, table: &[StateSet], raw_input: &[Token]) -> Result<Forest> {
1111        let mut forest = vec![FinalSet::default(); table.len()];
1112        for (i, set) in table.iter().enumerate() {
1113            forest[i].position = i;
1114            if set.is_empty() {
1115                return ErrorKind::InternalError {
1116                    message: format!(
1117                        "While creating the forest, could not find any item in set {}, at {}",
1118                        i,
1119                        raw_input[i].span(),
1120                    ),
1121                }
1122                .err();
1123            }
1124            set.iter()
1125                .filter(|item| item.position == self.grammar.rules[item.rule].elements.len())
1126                .for_each(|item| {
1127                    forest[item.origin].add(
1128                        FinalItem {
1129                            end: i,
1130                            rule: item.rule,
1131                        },
1132                        &self.grammar,
1133                    )
1134                });
1135        }
1136        Ok(forest)
1137    }
1138
1139    pub fn recognise<'input, 'linput: 'input>(
1140        &self,
1141        input: &'input mut LexedStream<'linput, 'linput>,
1142    ) -> Result<(Table, Vec<Token>)> {
1143        let mut sets = Vec::new();
1144        let mut first_state = StateSet::default();
1145        let mut possible_first_nonterminals = HashSet::new();
1146        let mut possible_first_terminals = HashSet::new();
1147        (0..self.grammar().rules.len())
1148            .map(RuleId)
1149            .filter(|id| self.grammar.axioms.contains(self.grammar.rules[*id].id))
1150            .for_each(|id| {
1151                let parent_has_been_shown = if let Some(description) =
1152                    self.grammar().description_of(self.grammar().rules[id].id)
1153                {
1154                    possible_first_nonterminals.insert(description);
1155                    true
1156                } else {
1157                    false
1158                };
1159
1160                first_state.add(EarleyItem {
1161                    rule: id,
1162                    origin: 0,
1163                    position: 0,
1164                    parent_has_been_shown,
1165                })
1166            });
1167        let mut raw_input = Vec::new();
1168        sets.push(first_state);
1169        let mut pos = 0;
1170        'outer: loop {
1171            let mut next_state = StateSet::default();
1172            let mut scans: HashMap<TerminalId, Vec<_>> = HashMap::new();
1173            '_inner: while let Some(&item) = sets.last_mut().unwrap().next() {
1174                let mut to_be_added = Vec::new();
1175                match self.grammar().rules[item.rule].elements.get(item.position) {
1176                    Some(element) => match element.element_type {
1177                        // Prediction
1178                        ElementType::NonTerminal(id) => {
1179                            for &rule in self.grammar().has_rules(id) {
1180                                let parent_has_been_shown = item.parent_has_been_shown
1181                                    || if let Some(description) = self
1182                                        .grammar
1183                                        .description_of(self.grammar().rules[rule].id)
1184                                    {
1185                                        possible_first_nonterminals.insert(description);
1186                                        true
1187                                    } else {
1188                                        false
1189                                    };
1190                                to_be_added.push(EarleyItem {
1191                                    rule,
1192                                    origin: pos,
1193                                    position: 0,
1194                                    parent_has_been_shown,
1195                                });
1196                            }
1197                            if self.grammar.nullables.contains(id) {
1198                                to_be_added.push(EarleyItem {
1199                                    rule: item.rule,
1200                                    origin: item.origin,
1201                                    position: item.position + 1,
1202                                    parent_has_been_shown: item.parent_has_been_shown,
1203                                });
1204                            }
1205                        }
1206                        // Scan
1207                        ElementType::Terminal(id) => {
1208                            if !item.parent_has_been_shown {
1209                                if let Some(message) =
1210                                    input.lexer().grammar().description_of(id)
1211                                {
1212                                    possible_first_terminals.insert(message.to_string());
1213                                } else {
1214                                    possible_first_terminals
1215                                        .insert(input.lexer().grammar().name(id).to_string());
1216                                };
1217                            }
1218                            scans.entry(id).or_default().push(EarleyItem {
1219                                rule: item.rule,
1220                                origin: item.origin,
1221                                position: item.position + 1,
1222                                parent_has_been_shown: false,
1223                            })
1224                        }
1225                    },
1226                    // Completion
1227                    None => {
1228                        for &parent in sets[item.origin].slice() {
1229                            if let Some(Element {
1230                                element_type: ElementType::NonTerminal(nonterminal),
1231                                ..
1232                            }) = self.grammar().rules[parent.rule]
1233                                .elements
1234                                .get(parent.position)
1235                            {
1236                                if *nonterminal == self.grammar().rules[item.rule].id {
1237                                    to_be_added.push(EarleyItem {
1238                                        rule: parent.rule,
1239                                        origin: parent.origin,
1240                                        position: parent.position + 1,
1241                                        parent_has_been_shown: item.parent_has_been_shown,
1242                                    })
1243                                }
1244                            }
1245                        }
1246                    }
1247                }
1248                for item in to_be_added {
1249                    sets.last_mut().unwrap().add(item);
1250                }
1251            }
1252
1253            let possible_scans = input
1254                .lexer()
1255                .grammar()
1256                .default_allowed()
1257                .chain(scans.keys().cloned())
1258                .collect::<Vec<_>>();
1259            let allowed = Allowed::Some(possible_scans.clone());
1260            let next_token = match input.next(allowed) {
1261                Ok(r) => r,
1262                Err(error) => {
1263                    if let ErrorKind::LexingError { .. } = *error.kind {
1264                        let error = if let Some(token) = input.next(Allowed::All)? {
1265                            let span = token.span().clone();
1266                            let name = {
1267                                let id = token.id();
1268                                let name = token.name().to_string();
1269                                if let Some(description) =
1270                                    input.lexer().grammar().description_of(id)
1271                                {
1272                                    description.to_string()
1273                                } else {
1274                                    name
1275                                }
1276                            };
1277                            ErrorKind::SyntaxError {
1278                                name,
1279                                alternatives: possible_first_nonterminals
1280                                    .drain()
1281                                    .map(|x| x.to_string())
1282                                    .chain(possible_first_terminals.drain())
1283                                    .collect(),
1284                                span: Fragile::new(span),
1285                            }
1286                        } else {
1287                            ErrorKind::SyntaxErrorValidPrefix {
1288                                span: input.last_span().into(),
1289                            }
1290                        };
1291                        return error.err();
1292                    } else {
1293                        return Err(error);
1294                    }
1295                }
1296            };
1297            possible_first_nonterminals.clear();
1298            possible_first_terminals.clear();
1299            if let Some(token) = next_token {
1300                for item in scans.entry(token.id()).or_default() {
1301                    next_state.add(*item);
1302                }
1303                raw_input.push(token.clone());
1304            } else if sets.last().unwrap().set.iter().any(|item| {
1305                let rule = &self.grammar.rules[item.rule];
1306                item.origin == 0
1307                    && self.grammar.axioms.contains(rule.id)
1308                    && rule.elements.len() == item.position
1309            }) {
1310                break 'outer Ok((sets, raw_input));
1311            } else {
1312                return ErrorKind::SyntaxErrorValidPrefix {
1313                    span: input.last_span().into(),
1314                }
1315                .err();
1316            };
1317
1318            sets.push(next_state);
1319            pos += 1;
1320        }
1321    }
1322}
1323
1324impl Parser<'_> for EarleyParser {
1325    type Grammar = EarleyGrammar;
1326
1327    fn new(grammar: Self::Grammar) -> Self {
1328        Self { grammar }
1329    }
1330
1331    fn grammar(&self) -> &Self::Grammar {
1332        &self.grammar
1333    }
1334
1335    fn is_valid<'input>(&self, input: &'input mut LexedStream<'input, 'input>) -> bool {
1336        self.recognise(input).is_ok()
1337    }
1338
1339    fn parse<'input>(
1340        &self,
1341        input: &'input mut LexedStream<'input, 'input>,
1342    ) -> Result<ParseResult> {
1343        let (table, raw_input) = self.recognise(input)?;
1344        let forest = self.to_forest(&table, &raw_input)?;
1345        // print_final_sets(&forest, self);
1346        let tree = self.select_ast(&forest, &raw_input, input.last_span());
1347        Ok(ParseResult { tree })
1348    }
1349}
1350
1351// impl Buildable for EarleyParser {
1352//     const RAW_EXTENSION: &'static str = "gr";
1353//     const COMPILED_EXTENSION: &'static str = "cgr";
1354
1355//     fn build_from_ast(ast: AST) -> Result<Self> {
1356//         EarleyGrammar::build_from_ast(ast).map(|ws| ws.map(Self::new))
1357//     }
1358
1359//     fn build_from_compiled(blob: &[u8]) -> Result<Self> {
1360//         EarleyGrammar::build_from_compiled(blob).map(|ws| ws.map(Self::new))
1361//     }
1362
1363//     fn build_from_plain(raw: StringStream) -> Result<Self> {
1364//         EarleyGrammar::build_from_plain(raw).map(|ws| ws.map(Self::new))
1365//     }
1366// }
1367
1368#[cfg(test)]
1369mod tests {
1370    use super::*;
1371
1372    const GRAMMAR_NUMBERS_LEXER: &str = r#"
1373NUMBER ::= ([0-9])
1374PM ::= [-+]
1375TD ::= [*/]
1376LPAR ::= \(
1377RPAR ::= \)
1378"#;
1379
1380    const GRAMMAR_NUMBERS: &str = r#"
1381@Sum ::= Sum@left PM Product@right <>
1382 Product@self <>;
1383
1384Product ::= Product@left TD Factor@right <>
1385 Factor.self@self <>;
1386
1387Factor ::= LPAR Sum@self RPAR <>
1388 NUMBER.0@self <>;"#;
1389
1390    const GRAMMAR_PROXY_LEXER: &str = r#"
1391NUMBER ::= ([0-9]+)
1392OP ::= \+
1393LPAR ::= \(
1394RPAR ::= \)
1395"#;
1396    const GRAMMAR_NUMBERS_IMPROVED: &str = r#"
1397@Expr ::=
1398  NUMBER.0@value <Literal>
1399  (left-assoc) Expr@left TD Expr@right <MulDiv>
1400  (right-assoc) Expr@left PM Expr@right <AddSub>
1401  LPAR Expr@value RPAR <Through>;
1402"#;
1403
1404    const GRAMMAR_PROXY: &str = r#"
1405@Expression ::=
1406  NUMBER.0@value <Literal>
1407  Expression@left OP Expression@right <Operation>
1408  OP Expression@right <Operation, left: Expression {Literal, value: "0"}>
1409  LPAR Expression@value RPAR <Parenthesized>;
1410"#;
1411    const GRAMMAR_PROXY_WRONG_1: &str = r#"
1412@Expression ::=
1413  NUMBER.0@value <Literal>
1414  Expression@left OP Expression@right <Operation>
1415  OP Expression@right <
1416    Operation,
1417    left: Expression {variant: Literal value: "0"}
1418  >
1419  LPAR Expression@value RPAR <Parenthesized>;
1420"#;
1421    const GRAMMAR_PROXY_WRONG_2: &str = r#"
1422@Expression ::=
1423  NUMBER.0@value <Literal>
1424  Expression@left OP Expression@right <Operation>
1425  OP Expression@right <
1426    Operation
1427    left: Expression {Literal value: "0"}
1428  >
1429  LPAR Expression@value RPAR <Parenthesized>;
1430"#;
1431
1432    const GRAMMAR_C_LEXER: &str = include_str!("gmrs/petitc.lx");
1433    const GRAMMAR_C: &str = include_str!("gmrs/petitc.gr");
1434
1435    struct TestEarleyItem {
1436        name: &'static str,
1437        left_elements: Vec<&'static str>,
1438        right_elements: Vec<&'static str>,
1439        origin: usize,
1440    }
1441
1442    impl TestEarleyItem {
1443        fn matches(
1444            &self,
1445            other: &EarleyItem,
1446            parser: &EarleyParser,
1447            lexer: &Lexer,
1448            set_id: usize,
1449            item_id: usize,
1450        ) {
1451            let error_message = format!("Set #{}, item #{}: no match:", set_id, item_id);
1452            let item = &parser.grammar().rules[other.rule];
1453            assert_eq!(
1454                self.name,
1455                &*parser.grammar().name_of[item.id],
1456                "{} name.",
1457                error_message
1458            );
1459            assert_eq!(
1460                self.left_elements.len() + self.right_elements.len(),
1461                item.elements.len(),
1462                "{} origin set.\nExpected: [{:?}, {:?}]\nGot: {:?}",
1463                error_message,
1464                self.left_elements,
1465                self.right_elements,
1466                item.elements,
1467            );
1468            assert_eq!(
1469                self.left_elements.len(),
1470                other.position,
1471                "{} fat dot position.",
1472                error_message,
1473            );
1474            assert_eq!(self.origin, other.origin, "{} origin set.", error_message);
1475            for i in 0..self.left_elements.len() {
1476                assert_eq!(
1477                    self.left_elements[i],
1478                    &*item.elements[i].name(lexer.grammar(), parser.grammar()),
1479                    "{} element #{}.",
1480                    error_message,
1481                    i
1482                );
1483            }
1484            for i in 0..self.right_elements.len() {
1485                assert_eq!(
1486                    self.right_elements[i],
1487                    &*item.elements[i + other.position].name(lexer.grammar(), parser.grammar()),
1488                    "{} elements #{}.",
1489                    error_message,
1490                    i + other.position
1491                );
1492            }
1493        }
1494    }
1495
1496    /// `sets!` eases the creation of mock Earley Sets.
1497    /// Useful for tests.
1498    ///
1499    /// The syntax is aimed to be simple and intuitive, matching the one
1500    /// usually used in the literature.
1501    macro_rules! sets {
1502	(
1503	    $(
1504		== $(
1505		    $name: ident -> $($left_element: ident)* . $($right_element: ident)* ($origin: literal)
1506		)*
1507	    )*
1508	) => {
1509	    {
1510		#[allow(unused_mut)]
1511		let mut sets = Vec::new();
1512		$(
1513		    #[allow(unused_mut)]
1514		    let mut set = Vec::new();
1515		    $(
1516			set.push(earley_item!($name -> $($left_element)* . $($right_element)* ($origin)));
1517		)*
1518			sets.push(set);
1519		)*
1520		    sets
1521	    }
1522	};
1523    }
1524
1525    /// `earley_item` creates mock Earley Items.
1526    /// Useful for tests.
1527    macro_rules! earley_item {
1528	($name: ident -> $($left_element: ident)* . $($right_element: ident)* ($origin: literal)) => {
1529	    {
1530		let left_elements = vec![$(
1531		    stringify!($left_element)
1532		),*];
1533		let right_elements = vec![$(
1534		    stringify!($right_element)
1535		),*];
1536		TestEarleyItem {
1537		    name: stringify!($name),
1538		    left_elements,
1539		    right_elements,
1540		    origin: $origin
1541		}
1542	    }
1543	};
1544    }
1545
1546    macro_rules! final_sets {
1547	(
1548	    ($grammar:expr)
1549            ($lexer:expr)
1550	    $(
1551		== $(
1552		    $name: ident -> $($element: ident)* ($end: literal)
1553		)*
1554	    )*
1555	) => {{
1556	    #[allow(unused_mut)]
1557	    let mut sets = Vec::new();
1558	    fn find_item(grammar: &EarleyGrammar, lexer_grammar: &$crate::lexer::Grammar, name: &str, elements: &[&str], end: usize) -> FinalItem {
1559		for &rule_identifier in grammar
1560		    .id_of
1561		    .get(&Rc::from(name))
1562		    .map(|&identifier| &grammar.rules_of[identifier])
1563		    .expect(format!("The non-terminal {} does not exist.", name).as_str())
1564		    .iter()
1565		{
1566		    if elements.len() == grammar
1567			.rules[rule_identifier]
1568			.elements.len()
1569			&& elements
1570			.iter()
1571			.zip(grammar.rules[rule_identifier].elements.iter())
1572			.all(|(&left, right)| left == &*right.name(lexer_grammar, grammar))
1573		    {
1574			return FinalItem {
1575			    rule: rule_identifier,
1576			    end
1577			};
1578		    }
1579		}
1580		panic!("The rule {} -> {} is not in the grammar.", name, elements.join(" "));
1581	    }
1582	    $(
1583		#[allow(unused_mut)]
1584		let mut set = FinalSet::default();
1585		set.position = sets.len();
1586		$(
1587		    set.add(find_item($grammar, $lexer, stringify!($name), &[$(stringify!($element)),*], $end), $grammar);
1588		)*
1589		sets.push(set);
1590	    )*
1591	    sets
1592	}};
1593    }
1594
1595    #[derive(Debug, Clone)]
1596    struct TestToken {
1597        name: &'static str,
1598        attributes: Vec<(usize, &'static str)>,
1599    }
1600
1601    impl PartialEq<Token> for TestToken {
1602        fn eq(&self, other: &Token) -> bool {
1603            other.name() == self.name
1604                && other
1605                    .attributes()
1606                    .iter()
1607                    .map(|(&key, value)| (key, value.as_str()))
1608                    .collect::<Vec<_>>()
1609                    == self.attributes
1610        }
1611    }
1612
1613    #[derive(Clone)]
1614    struct MapVec(Vec<(&'static str, TestAST)>);
1615
1616    impl std::fmt::Debug for MapVec {
1617        fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1618            formatter
1619                .debug_map()
1620                .entries(self.0.iter().map(|(k, v)| (k, v)))
1621                .finish()
1622        }
1623    }
1624
1625    impl From<Vec<(&'static str, TestAST)>> for MapVec {
1626        fn from(o: Vec<(&'static str, TestAST)>) -> Self {
1627            Self(o)
1628        }
1629    }
1630
1631    #[derive(Debug, Clone)]
1632    enum TestAST {
1633        Node {
1634            id: usize,
1635            attributes: MapVec,
1636        },
1637        #[allow(unused)]
1638        Terminal(TestToken),
1639        Literal(super::super::parser::Value),
1640    }
1641
1642    impl PartialEq<TestAST> for AST {
1643        fn eq(&self, other: &TestAST) -> bool {
1644            other == self
1645        }
1646    }
1647
1648    impl PartialEq<AST> for TestAST {
1649        fn eq(&self, other: &AST) -> bool {
1650            match (self, other) {
1651                (
1652                    TestAST::Node {
1653                        id: tid,
1654                        attributes: tattributes,
1655                    },
1656                    AST::Node {
1657                        nonterminal: id,
1658                        attributes,
1659                        ..
1660                    },
1661                ) => {
1662                    NonTerminalId::from(*tid) == *id && {
1663                        let tattributes = tattributes
1664                            .0
1665                            .iter()
1666                            .map(|(key, value)| (Rc::<str>::from(*key), value))
1667                            .collect::<HashMap<_, _>>();
1668                        tattributes.len() == attributes.len()
1669                            && tattributes.iter().all(|(key, value)| {
1670                                attributes.get(key).map_or(false, |v| *value == v)
1671                            })
1672                    }
1673                }
1674                (TestAST::Terminal(ttoken), AST::Terminal(token)) => ttoken == token,
1675                (TestAST::Literal(tvalue), AST::Literal { value, .. }) => tvalue == value,
1676                _ => false,
1677            }
1678        }
1679    }
1680
1681    #[test]
1682    fn complex_proxy() {
1683        let lexer = Lexer::build_from_plain(StringStream::new(
1684            Path::new("<PROXY>"),
1685            GRAMMAR_PROXY_LEXER,
1686        ))
1687        .unwrap();
1688        EarleyGrammar::build_from_plain(
1689            StringStream::new(Path::new("<PROXY>"), GRAMMAR_PROXY),
1690            lexer.grammar(),
1691        )
1692        .unwrap();
1693        assert!(EarleyGrammar::build_from_plain(
1694            StringStream::new(Path::new("<PROXY>"), GRAMMAR_PROXY_WRONG_1),
1695            lexer.grammar()
1696        )
1697        .is_err());
1698        assert!(EarleyGrammar::build_from_plain(
1699            StringStream::new(Path::new("<PROXY>"), GRAMMAR_PROXY_WRONG_2),
1700            lexer.grammar()
1701        )
1702        .is_err());
1703    }
1704
1705    #[test]
1706    fn recognise_handle_empty_rules() {
1707        let lexer_input = r#""#;
1708        let grammar_input = r#"
1709@A ::= <>
1710 B <>;
1711B ::= A <>;"#;
1712        let input = r#""#;
1713        let lexer =
1714            Lexer::build_from_plain(StringStream::new(Path::new("<lexer input>"), lexer_input))
1715                .unwrap();
1716        let grammar = EarleyGrammar::build_from_plain(
1717            StringStream::new(Path::new("<grammar input>"), grammar_input),
1718            lexer.grammar(),
1719        )
1720        .unwrap();
1721        let parser = EarleyParser::new(grammar);
1722        let sets = sets!(
1723            ==
1724            A -> . (0)
1725            A -> . B (0)
1726            B -> . A (0)
1727            A -> B . (0)
1728            B -> A . (0)
1729        );
1730        let (recognised, _) = parser
1731            .recognise(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1732            .unwrap();
1733        print_sets(&recognised, &parser, &lexer);
1734        verify_sets(sets, recognised, &parser, &lexer);
1735    }
1736
1737    #[test]
1738    fn grammar_c() {
1739        let input = r#"
1740#include <stdlib.h>
1741#include <stdio.h>
1742#include <stdbool.h>
1743
1744int main() {
1745  return sizeof(bool ****);
1746}
1747"#;
1748        let lexer =
1749            Lexer::build_from_plain(StringStream::new(Path::new("petitc.lx"), GRAMMAR_C_LEXER))
1750                .unwrap();
1751
1752        let grammar = EarleyGrammar::build_from_plain(
1753            StringStream::new(Path::new("petitc.gr"), GRAMMAR_C),
1754            lexer.grammar(),
1755        )
1756        .unwrap();
1757        let parser = EarleyParser::new(grammar);
1758        let _ast = parser
1759            .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1760            .unwrap();
1761    }
1762
1763    #[test]
1764    fn grammar_c_prior_assoc() {
1765        let input = r#"
1766#include <stdlib.h>
1767#include <stdio.h>
1768#include <stdbool.h>
1769
1770int main() {
1771  int a;
1772  int b;
1773  a = b = 3+3*2;
1774  a = a < b > a < b > a;
1775}
1776"#;
1777        let lexer =
1778            Lexer::build_from_plain(StringStream::new(Path::new("petitc.lx"), GRAMMAR_C_LEXER))
1779                .unwrap();
1780        let grammar = EarleyGrammar::build_from_plain(
1781            StringStream::new(Path::new("petitc.gr"), GRAMMAR_C),
1782            lexer.grammar(),
1783        )
1784        .unwrap();
1785        let parser = EarleyParser::new(grammar);
1786        let _ast = parser
1787            .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1788            .unwrap();
1789        // print_ast(&_ast.tree).unwrap();
1790    }
1791
1792    #[test]
1793    fn valid_prefix() {
1794        let input = r#"1+2+"#;
1795        let lexer = Lexer::build_from_plain(StringStream::new(
1796            Path::new("<NUMBERS LEXER>"),
1797            GRAMMAR_NUMBERS_LEXER,
1798        ))
1799        .unwrap();
1800        let grammar = EarleyGrammar::build_from_plain(
1801            StringStream::new(Path::new("<NUMBERS>"), GRAMMAR_NUMBERS),
1802            lexer.grammar(),
1803        )
1804        .unwrap();
1805        let parser = EarleyParser::new(grammar);
1806        assert!(parser
1807            .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)),)
1808            .is_err());
1809    }
1810
1811    #[test]
1812    fn priority_associativity() {
1813        // Expected tree:
1814        // 1+(2+(((3*4)*5)+(6+(7*8))))
1815        //
1816        let input = r"1+2+3*4*5+6+7*8";
1817        let lexer = Lexer::build_from_plain(StringStream::new(
1818            Path::new("<NUMBERS LEXER>"),
1819            GRAMMAR_NUMBERS_LEXER,
1820        ))
1821        .unwrap();
1822        let grammar = EarleyGrammar::build_from_plain(
1823            StringStream::new(Path::new("<NUMBERS IMPROVED>"), GRAMMAR_NUMBERS_IMPROVED),
1824            lexer.grammar(),
1825        )
1826        .unwrap();
1827        let parser = EarleyParser::new(grammar);
1828        let ast = parser
1829            .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1830            .unwrap();
1831        let test_ast = {
1832            use super::super::parser::Value::*;
1833            use TestAST::*;
1834            let add = Literal(Str("AddSub".into()));
1835            let literal = Literal(Str("Literal".into()));
1836            let mul = Literal(Str("MulDiv".into()));
1837            Node {
1838                id: 0,
1839                attributes: vec![
1840                    ("variant", add.clone()),
1841                    (
1842                        "left",
1843                        Node {
1844                            id: 0,
1845                            attributes: vec![
1846                                ("variant", literal.clone()),
1847                                ("value", Literal(Str("1".into()))),
1848                            ]
1849                            .into(),
1850                        },
1851                    ),
1852                    (
1853                        "right",
1854                        Node {
1855                            id: 0,
1856                            attributes: vec![
1857                                ("variant", add.clone()),
1858                                (
1859                                    "left",
1860                                    Node {
1861                                        id: 0,
1862                                        attributes: vec![
1863                                            ("variant", literal.clone()),
1864                                            ("value", Literal(Str("2".into()))),
1865                                        ]
1866                                        .into(),
1867                                    },
1868                                ),
1869                                (
1870                                    "right",
1871                                    Node {
1872                                        id: 0,
1873                                        attributes: vec![
1874                                            ("variant", add.clone()),
1875                                            (
1876                                                "left",
1877                                                Node {
1878                                                    id: 0,
1879                                                    attributes: vec![
1880                                                        ("variant", mul.clone()),
1881                                                        (
1882                                                            "left",
1883                                                            Node {
1884                                                                id: 0,
1885                                                                attributes: vec![
1886                                                                    ("variant", mul.clone()),
1887                                                                    (
1888                                                                        "left",
1889                                                                        Node {
1890                                                                            id: 0,
1891                                                                            attributes: vec![
1892									    ("variant", literal.clone()),
1893									    ("value", Literal(Str("3".into()))),
1894									]
1895                                                                            .into(),
1896                                                                        },
1897                                                                    ),
1898                                                                    (
1899                                                                        "right",
1900                                                                        Node {
1901                                                                            id: 0,
1902                                                                            attributes: vec![
1903									    ("variant", literal.clone()),
1904									    ("value", Literal(Str("4".into()))),
1905									]
1906                                                                            .into(),
1907                                                                        },
1908                                                                    ),
1909                                                                ]
1910                                                                .into(),
1911                                                            },
1912                                                        ),
1913                                                        (
1914                                                            "right",
1915                                                            Node {
1916                                                                id: 0,
1917                                                                attributes: vec![
1918                                                                    (
1919                                                                        "variant",
1920                                                                        literal.clone(),
1921                                                                    ),
1922                                                                    (
1923                                                                        "value",
1924                                                                        Literal(
1925                                                                            Str("5".into()),
1926                                                                        ),
1927                                                                    ),
1928                                                                ]
1929                                                                .into(),
1930                                                            },
1931                                                        ),
1932                                                    ]
1933                                                    .into(),
1934                                                },
1935                                            ),
1936                                            (
1937                                                "right",
1938                                                Node {
1939                                                    id: 0,
1940                                                    attributes: vec![
1941                                                        ("variant", add),
1942                                                        (
1943                                                            "left",
1944                                                            Node {
1945                                                                id: 0,
1946                                                                attributes: vec![
1947                                                                    (
1948                                                                        "variant",
1949                                                                        literal.clone(),
1950                                                                    ),
1951                                                                    (
1952                                                                        "value",
1953                                                                        Literal(
1954                                                                            Str("6".into()),
1955                                                                        ),
1956                                                                    ),
1957                                                                ]
1958                                                                .into(),
1959                                                            },
1960                                                        ),
1961                                                        (
1962                                                            "right",
1963                                                            Node {
1964                                                                id: 0,
1965                                                                attributes: vec![
1966                                                                    ("variant", mul),
1967                                                                    (
1968                                                                        "left",
1969                                                                        Node {
1970                                                                            id: 0,
1971                                                                            attributes: vec![
1972									("variant", literal.clone()),
1973									("value", Literal(Str("7".into()))),
1974								    ]
1975                                                                            .into(),
1976                                                                        },
1977                                                                    ),
1978                                                                    (
1979                                                                        "right",
1980                                                                        Node {
1981                                                                            id: 0,
1982                                                                            attributes: vec![
1983									("variant", literal),
1984									("value", Literal(Str("8".into()))),
1985								    ]
1986                                                                            .into(),
1987                                                                        },
1988                                                                    ),
1989                                                                ]
1990                                                                .into(),
1991                                                            },
1992                                                        ),
1993                                                    ]
1994                                                    .into(),
1995                                                },
1996                                            ),
1997                                        ]
1998                                        .into(),
1999                                    },
2000                                ),
2001                            ]
2002                            .into(),
2003                        },
2004                    ),
2005                ]
2006                .into(),
2007            }
2008        };
2009        assert_eq!(ast.tree, test_ast,);
2010    }
2011
2012    #[test]
2013    fn ast_builder() {
2014        let input = r#"1+(2*3-4)"#;
2015
2016        let lexer = Lexer::build_from_plain(StringStream::new(
2017            Path::new("<lexer input>"),
2018            GRAMMAR_NUMBERS_LEXER,
2019        ))
2020        .unwrap();
2021        let grammar = EarleyGrammar::build_from_plain(
2022            StringStream::new(Path::new("<grammar input>"), GRAMMAR_NUMBERS),
2023            lexer.grammar(),
2024        )
2025        .unwrap();
2026        let parser = EarleyParser::new(grammar);
2027        let mut input_stream = StringStream::new(Path::new("<input>"), input);
2028        let mut lexed_input = lexer.lex(&mut input_stream);
2029        let (table, raw_input) = parser.recognise(&mut lexed_input).unwrap();
2030        let forest = parser.to_forest(&table, &raw_input).unwrap();
2031        let ast = parser.select_ast(&forest, &raw_input, lexed_input.last_span());
2032
2033        let test_ast = {
2034            use super::super::parser::Value::*;
2035            use TestAST::*;
2036            Node {
2037                id: 0,
2038                attributes: vec![
2039                    (
2040                        "right",
2041                        Node {
2042                            id: 1,
2043                            attributes: vec![(
2044                                "self",
2045                                Node {
2046                                    id: 0,
2047                                    attributes: vec![
2048                                        (
2049                                            "right",
2050                                            Node {
2051                                                id: 1,
2052                                                attributes: vec![(
2053                                                    "self",
2054                                                    Literal(Str("4".into())),
2055                                                )]
2056                                                .into(),
2057                                            },
2058                                        ),
2059                                        (
2060                                            "left",
2061                                            Node {
2062                                                id: 0,
2063                                                attributes: vec![(
2064                                                    "self",
2065                                                    Node {
2066                                                        id: 1,
2067                                                        attributes: vec![
2068                                                            (
2069                                                                "right",
2070                                                                Node {
2071                                                                    id: 2,
2072                                                                    attributes: vec![(
2073                                                                        "self",
2074                                                                        Literal(
2075                                                                            Str("3".into()),
2076                                                                        ),
2077                                                                    )]
2078                                                                    .into(),
2079                                                                },
2080                                                            ),
2081                                                            (
2082                                                                "left",
2083                                                                Node {
2084                                                                    id: 1,
2085                                                                    attributes: vec![(
2086                                                                        "self",
2087                                                                        Literal(
2088                                                                            Str("2".into()),
2089                                                                        ),
2090                                                                    )]
2091                                                                    .into(),
2092                                                                },
2093                                                            ),
2094                                                        ]
2095                                                        .into(),
2096                                                    },
2097                                                )]
2098                                                .into(),
2099                                            },
2100                                        ),
2101                                    ]
2102                                    .into(),
2103                                },
2104                            )]
2105                            .into(),
2106                        },
2107                    ),
2108                    (
2109                        "left",
2110                        Node {
2111                            id: 0,
2112                            attributes: vec![(
2113                                "self",
2114                                Node {
2115                                    id: 1,
2116                                    attributes: vec![("self", Literal(Str("1".into())))].into(),
2117                                },
2118                            )]
2119                            .into(),
2120                        },
2121                    ),
2122                ]
2123                .into(),
2124            }
2125        };
2126
2127        assert_eq!(ast, test_ast, "Expected\n{:#?}\n\nGot\n{:?}", test_ast, ast);
2128    }
2129
2130    #[test]
2131    fn forest_builder() {
2132        let input = r#"1+(2*3-4)"#;
2133
2134        let lexer = Lexer::build_from_plain(StringStream::new(
2135            Path::new("<lexer input>"),
2136            GRAMMAR_NUMBERS_LEXER,
2137        ))
2138        .unwrap();
2139        let grammar = EarleyGrammar::build_from_plain(
2140            StringStream::new(Path::new("<grammar input>"), GRAMMAR_NUMBERS),
2141            lexer.grammar(),
2142        )
2143        .unwrap();
2144
2145        let parser = EarleyParser::new(grammar);
2146        let sets = final_sets!(
2147            (parser.grammar())
2148        (lexer.grammar())
2149            ==
2150            Factor -> NUMBER (1)
2151            Product -> Factor (1)
2152            Sum -> Product (1)
2153            Sum -> Sum PM Product (9)
2154
2155            ==
2156
2157            ==
2158            Factor -> LPAR Sum RPAR (9)
2159            Product -> Factor (9)
2160
2161            ==
2162            Factor -> NUMBER (4)
2163            Product -> Factor (4)
2164            Sum -> Product (4)
2165            Product -> Product TD Factor (6)
2166            Sum -> Product (6)
2167            Sum -> Sum PM Product (8)
2168
2169            ==
2170
2171            ==
2172            Factor -> NUMBER (6)
2173
2174            ==
2175
2176        ==
2177        Factor -> NUMBER (8)
2178            Product -> Factor (8)
2179
2180            ==
2181
2182            ==
2183            );
2184
2185        let (table, raw_input) = parser
2186            .recognise(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
2187            .unwrap();
2188        let forest = parser.to_forest(&table, &raw_input).unwrap();
2189        assert_eq!(
2190            forest,
2191            sets,
2192            "Parsed forest:\n{}\nExpected forest:\n{}",
2193            forest
2194                .iter()
2195                .map(|set| format!("{}", set))
2196                .collect::<Vec<_>>()
2197                .join("\n"),
2198            sets.iter()
2199                .map(|set| format!("{}", set))
2200                .collect::<Vec<_>>()
2201                .join("\n")
2202        );
2203    }
2204
2205    #[test]
2206    fn recogniser() {
2207        let input = r#"1+(2*3-4)"#;
2208
2209        let lexer = Lexer::build_from_plain(StringStream::new(
2210            Path::new("<lexer input>"),
2211            GRAMMAR_NUMBERS_LEXER,
2212        ))
2213        .unwrap();
2214        let grammar = EarleyGrammar::build_from_plain(
2215            StringStream::new(Path::new("<grammar input>"), GRAMMAR_NUMBERS),
2216            lexer.grammar(),
2217        )
2218        .unwrap();
2219        let parser = EarleyParser::new(grammar);
2220        let sets = sets!(
2221            ==
2222            Sum -> . Sum PM Product (0)
2223            Sum -> . Product (0)
2224            Product -> . Product TD Factor (0)
2225            Product -> . Factor (0)
2226            Factor -> . LPAR Sum RPAR (0)
2227            Factor -> . NUMBER (0)
2228
2229            ==
2230            Factor -> NUMBER . (0)
2231            Product -> Factor . (0)
2232            Sum -> Product . (0)
2233            Product -> Product . TD Factor (0)
2234            Sum -> Sum . PM Product (0)
2235
2236            ==
2237            Sum -> Sum PM . Product (0)
2238            Product -> . Product TD Factor (2)
2239            Product -> . Factor (2)
2240            Factor -> . LPAR Sum RPAR (2)
2241            Factor -> . NUMBER (2)
2242
2243            ==
2244            Factor -> LPAR . Sum RPAR (2)
2245            Sum -> . Sum PM Product (3)
2246            Sum -> . Product (3)
2247            Product -> . Product TD Factor (3)
2248            Product -> . Factor (3)
2249            Factor -> . LPAR Sum RPAR (3)
2250            Factor -> . NUMBER (3)
2251
2252            ==
2253            Factor -> NUMBER . (3)
2254            Product -> Factor . (3)
2255            Sum -> Product . (3)
2256            Product -> Product . TD Factor (3)
2257            Factor -> LPAR Sum . RPAR (2)
2258            Sum -> Sum . PM Product (3)
2259
2260            ==
2261            Product -> Product TD . Factor (3)
2262            Factor -> . LPAR Sum RPAR (5)
2263            Factor -> . NUMBER (5)
2264
2265            ==
2266            Factor -> NUMBER . (5)
2267            Product -> Product TD Factor . (3)
2268            Sum -> Product . (3)
2269            Product -> Product . TD Factor (3)
2270            Factor -> LPAR Sum . RPAR (2)
2271            Sum -> Sum . PM Product (3)
2272
2273            ==
2274            Sum -> Sum PM . Product (3)
2275            Product -> . Product TD Factor (7)
2276            Product -> . Factor (7)
2277            Factor -> . LPAR Sum RPAR (7)
2278            Factor -> . NUMBER (7)
2279
2280            ==
2281            Factor -> NUMBER . (7)
2282            Product -> Factor . (7)
2283            Sum -> Sum PM Product . (3)
2284            Product -> Product . TD Factor (7)
2285            Factor -> LPAR Sum . RPAR (2)
2286            Sum -> Sum . PM Product (3)
2287
2288            ==
2289            Factor -> LPAR Sum RPAR . (2)
2290            Product -> Factor . (2)
2291            Sum -> Sum PM Product . (0)
2292            Product -> Product . TD Factor (2)
2293            Sum -> Sum . PM Product (0)
2294        );
2295        let (recognised, _) = parser
2296            .recognise(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
2297            .unwrap();
2298        verify_sets(sets, recognised, &parser, &lexer);
2299    }
2300
2301    fn verify_sets(
2302        sets: Vec<Vec<TestEarleyItem>>,
2303        recognised: Vec<StateSet>,
2304        parser: &EarleyParser,
2305        lexer: &Lexer,
2306    ) {
2307        assert_eq!(recognised.len(), sets.len());
2308        for (set, (expected, recognised)) in sets.iter().zip(recognised.iter()).enumerate() {
2309            assert_eq!(
2310                expected.len(),
2311                recognised.set.len(),
2312                "Set #{} length does not match.",
2313                set
2314            );
2315            for (item_nb, (test_item, item)) in
2316                expected.iter().zip(recognised.set.iter()).enumerate()
2317            {
2318                test_item.matches(item, parser, lexer, set, item_nb);
2319            }
2320        }
2321    }
2322}
beans/parser/earley.rs

beans/parser/
earley.rs