1use super::ast::{
2 Ast, Attribute as AstAttribute, Element as AstElement, Expression, Item, Proxy as AstProxy,
3 Rule as AstRule, ToplevelDeclaration,
4};
5use super::grammar::{
6 Attribute, Axioms, Element, ElementType, NonTerminalDescription, NonTerminalName,
7 Nullables, Proxy, Rule, RuleId, Rules, ValueTemplate,
8};
9use super::parser::{NonTerminalId, ParseResult, Parser, Value, AST};
10use crate::typed::Spanned;
11use crate::{
12 build_system,
13 builder::{select_format, Buildable, FileResult, Format},
14 error::{Error, ErrorKind, Result},
15 lexer::{Grammar as LexerGrammar, LexedStream, Lexer, TerminalId, Token},
16 list::List,
17 regex::Allowed,
18 span::Span,
19 stream::StringStream,
20 typed::Tree,
21};
22use bincode::deserialize;
23use fragile::Fragile;
24use itertools::Itertools;
25use newty::{newty, nvec};
26use serde::{Deserialize, Serialize};
27use std::cmp::{Ordering, Reverse};
28use std::collections::VecDeque;
29use std::collections::hash_map::Entry;
30use std::collections::{HashMap, HashSet};
31use std::fmt;
32use std::fs::File;
33use std::io::Read;
34use std::path::{Path, PathBuf};
35use std::rc::Rc;
36
37pub fn print_sets(sets: &[StateSet], parser: &EarleyParser, lexer: &Lexer) {
38 for (i, set) in sets.iter().enumerate() {
39 println!("=== {} ===", i);
40 for item in set.slice() {
41 let mut line = String::new();
42 let rule = &parser.grammar().rules[item.rule];
43 line.push_str(&parser.grammar().name_of[rule.id]);
44 line.push_str(" -> ");
45 for i in 0..item.position {
46 line.push_str(&rule.elements[i].name(lexer.grammar(), parser.grammar()));
47 line.push(' ');
48 }
49 line.push_str("• ");
50 for i in item.position..rule.elements.len() {
51 line.push_str(&rule.elements[i].name(lexer.grammar(), parser.grammar()));
52 line.push(' ');
53 }
54 line.extend(format!("({})", item.origin).chars());
55 println!("{}", line);
56 }
57 println!();
58 }
59}
60
61pub fn print_final_sets(sets: &[FinalSet], parser: &EarleyParser, lexer: &Lexer) {
62 for (i, set) in sets.iter().enumerate() {
63 println!("=== {} ===", i);
64 for item in &set.set.0 {
65 let rule = &parser.grammar().rules[item.rule];
66 print!("{} -> ", parser.grammar().name_of[rule.id]);
67 for element in rule.elements.iter() {
68 print!("{}", element.name(lexer.grammar(), parser.grammar()));
69 match &element.attribute {
70 Attribute::Indexed(i) => print!(".{}", i),
71 Attribute::Named(n) => print!(".{}", n),
72 Attribute::None => {}
73 }
74 if let Some(key) = &element.key {
75 print!("@{}", key);
76 }
77 print!(" ");
78 }
79 println!("({})", item.end);
80 }
81 println!();
82 }
83}
84
85type Table = Vec<StateSet>;
86type Forest = Vec<FinalSet>;
87
88newty! {
89 #[derive(serde::Serialize, serde::Deserialize)]
90 pub vec RulesMap(Vec<RuleId>)[NonTerminalId]
91}
92
93newty! {
94 pub vec IsIn(Vec<RuleId>)[NonTerminalId]
95}
96
97#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
102struct EarleyItem {
103 rule: RuleId,
105 origin: usize,
107 position: usize,
109 parent_has_been_shown: bool,
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
117struct FinalItem {
118 rule: RuleId,
120 end: usize,
121}
122
123impl fmt::Display for FinalItem {
124 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::result::Result<(), fmt::Error> {
125 write!(f, "#{}\t\t({})", self.rule, self.end)
126 }
127}
128
129#[derive(Serialize, Deserialize, Debug)]
138pub struct EarleyGrammar {
139 axioms: Axioms,
141 rules: Rules,
143 nullables: Nullables,
145 id_of: HashMap<Rc<str>, NonTerminalId>,
147 name_of: NonTerminalName,
149 description_of: NonTerminalDescription,
150 rules_of: RulesMap,
153}
154
155impl EarleyGrammar {
156 fn has_rules(&self, id: NonTerminalId) -> &[RuleId] {
157 &self.rules_of[id]
158 }
159}
160
161impl EarleyGrammar {
162 pub fn new(
163 rules: Rules,
164 axioms: Axioms,
165 id_of: HashMap<Rc<str>, NonTerminalId>,
166 name_of: NonTerminalName,
167 description_of: NonTerminalDescription,
168 ) -> Result<Self> {
169 let nb_non_terminals = axioms.len_as(); let mut nullables = Nullables::with_capacity(nb_non_terminals);
173 let mut rules_of = nvec![RulesMap Vec::new(); nb_non_terminals];
176 let mut is_in = nvec![IsIn Vec::new(); nb_non_terminals];
179 let mut stack = VecDeque::with_capacity(is_in.len());
180 for (i, rule) in rules.iter().enumerate() {
181 let rule_id = RuleId(i);
182 let lhs_id = rule.id;
183 rules_of[lhs_id].push(rule_id);
184 if rule.elements.is_empty() {
185 nullables.insert(lhs_id);
186 stack.push_front(lhs_id);
187 }
188 for element in rule.elements.iter() {
189 if let ElementType::NonTerminal(rhs_id) = element.element_type {
190 is_in[rhs_id].push(rule_id);
191 }
192 }
193 }
194
195 while let Some(current) = stack.pop_back() {
196 for &rule_id in &is_in[current] {
197 let lhs_id = rules[rule_id].id;
198 if !nullables.contains(lhs_id)
199 && rules[rule_id].elements.iter().all(|element| {
200 match element.element_type {
201 ElementType::NonTerminal(id) => nullables.contains(id),
202 _ => false,
203 }
204 })
205 {
206 nullables.insert(lhs_id);
207 stack.push_front(lhs_id);
208 }
209 }
210 }
211
212 Ok(Self {
213 axioms,
214 rules,
215 nullables,
216 id_of,
217 name_of,
218 description_of,
219 rules_of,
220 })
221 }
222
223 pub fn name_of(&self, id: NonTerminalId) -> Rc<str> {
224 self.name_of[id].clone()
225 }
226
227 pub fn description_of(&self, id: NonTerminalId) -> Option<Rc<str>> {
228 self.description_of[id].as_ref().cloned()
229 }
230
231 pub fn id_of(&self, name: Rc<str>) -> NonTerminalId {
232 self.id_of[&name]
233 }
234}
235
236impl EarleyGrammar {
237 const PLAIN_EXTENSION: &str = "gr";
238 const COMPILED_EXTENSION: &str = "cgr";
239 const AST_EXTENSION: &str = "gr.ast";
240
241 pub fn build_from_compiled(
242 blob: &[u8],
243 path: impl ToOwned<Owned = PathBuf>,
244 ) -> Result<Self> {
245 deserialize(blob).map_err(|error| Error::with_file(error, path.to_owned()))
246 }
247
248 pub fn build_from_ast(ast: AST, lexer_grammar: &LexerGrammar) -> Result<Self> {
249 type InvokedMacros = HashMap<(Rc<str>, Rc<[ElementType]>), NonTerminalId>;
250 type MacroDeclarations = HashMap<Rc<str>, (Vec<Spanned<Rc<str>>>, Vec<AstRule>, Span)>;
251 type FoundNonTerminals = HashMap<Rc<str>, (NonTerminalId, Span)>;
252
253 let typed_ast = Ast::read(ast)?;
254 let mut macro_declarations = HashMap::new();
260 let mut non_terminal_declarations = Vec::new();
261 let mut found_nonterminals = HashMap::new();
265 let mut available_id = NonTerminalId(0);
266 let mut id_of = HashMap::new();
267 let mut name_of = NonTerminalName::new();
268 let mut description_of = NonTerminalDescription::new();
269
270 for decl in typed_ast.decls {
271 match decl.inner {
272 ToplevelDeclaration::Macro(macro_decl) => {
273 if let Some((_, _, old_span)) = macro_declarations.insert(
274 macro_decl.name.inner.clone(),
275 (
276 macro_decl.args,
277 macro_decl.rules,
278 macro_decl.name.span.clone(),
279 ),
280 ) {
281 return ErrorKind::GrammarDuplicateMacroDefinition {
282 span: macro_decl.name.span.into(),
283 old_span: old_span.into(),
284 name: macro_decl.name.inner.to_string(),
285 }
286 .err();
287 }
288 }
289 ToplevelDeclaration::Decl(decl) => {
290 let id = available_id.next();
291 if let Some((_, old_span)) = found_nonterminals
292 .insert(decl.name.inner.clone(), (id, decl.name.span.clone()))
293 {
294 return ErrorKind::GrammarDuplicateDefinition {
295 name: decl.name.inner.to_string(),
296 span: decl.name.span.into(),
297 old_span: old_span.into(),
298 }
299 .err();
300 }
301 id_of.insert(decl.name.inner.clone(), id);
302 name_of.push(decl.name.inner.clone());
303 description_of.push(decl.comment.as_ref().map(|o| o.inner.clone()));
304 non_terminal_declarations.push((decl, id));
305 }
306 }
307 }
308
309 fn eval_rule(
310 rule: &AstRule,
311 macro_id: NonTerminalId,
312 available_id: &mut NonTerminalId,
313 rules: &mut Rules,
314 invoked_macros: &mut InvokedMacros,
315 name_of: &mut NonTerminalName,
316 description_of: &mut NonTerminalDescription,
317 id_of: &mut HashMap<Rc<str>, NonTerminalId>,
318 found_nonterminals: &FoundNonTerminals,
319 macro_declarations: &MacroDeclarations,
320 scope: &HashMap<Rc<str>, ElementType>,
321 lexer_grammar: &LexerGrammar,
322 ) -> Result<Rule> {
323 let mut new_elements = Vec::with_capacity(rule.elements.len());
324 for element in rule.elements.iter() {
325 let el = eval_element(
326 element,
327 macro_id,
328 available_id,
329 rules,
330 invoked_macros,
331 name_of,
332 description_of,
333 id_of,
334 found_nonterminals,
335 macro_declarations,
336 scope,
337 lexer_grammar,
338 )?;
339 new_elements.push(el);
340 }
341 let proxy = eval_proxy(
342 &rule.proxy,
343 found_nonterminals,
344 )?;
345 Ok(Rule::new(
346 macro_id,
347 new_elements,
348 proxy,
349 rule.left_associative
350 .as_ref()
351 .map(|Spanned { inner, .. }| (*inner).into())
352 .unwrap_or(true),
353 ))
354 }
355
356 fn invoke_macro(
357 name: Spanned<Rc<str>>,
358 args: Rc<[ElementType]>,
359 macro_id: NonTerminalId,
360 available_id: &mut NonTerminalId,
361 rules: &mut Rules,
362 invoked_macros: &mut InvokedMacros,
363 name_of: &mut NonTerminalName,
364 description_of: &mut NonTerminalDescription,
365 id_of: &mut HashMap<Rc<str>, NonTerminalId>,
366 found_nonterminals: &FoundNonTerminals,
367 macro_declarations: &MacroDeclarations,
368 lexer_grammar: &LexerGrammar,
369 ) -> Result<()> {
370 let Some((arg_names, macro_rules, definition_span)) = macro_declarations.get(&name.inner) else {
371 return ErrorKind::GrammarUndefinedMacro {
372 name: name.inner.to_string(),
373 span: name.span.into(),
374 }
375 .err();
376 };
377
378 if args.len() != arg_names.len() {
379 return ErrorKind::GrammarArityMismatch {
380 macro_name: name.inner.to_string(),
381 definition_arity: arg_names.len(),
382 call_arity: args.len(),
383 definition_span: definition_span.into(),
384 call_span: name.span.into(),
385 }
386 .err();
387 }
388
389 let scope = arg_names
390 .iter()
391 .map(|x| x.inner.clone())
392 .zip(args.iter().copied())
393 .collect();
394
395 for rule in macro_rules {
396 let actual_rule = eval_rule(
397 rule,
398 macro_id,
399 available_id,
400 rules,
401 invoked_macros,
402 name_of,
403 description_of,
404 id_of,
405 found_nonterminals,
406 macro_declarations,
407 &scope,
408 lexer_grammar,
409 )?;
410 rules.push(actual_rule);
411 }
412 Ok(())
413 }
414
415 fn eval_expression(
416 expr: &Spanned<Item>,
417 self_id: NonTerminalId,
418 available_id: &mut NonTerminalId,
419 rules: &mut Rules,
420 invoked_macros: &mut InvokedMacros,
421 name_of: &mut NonTerminalName,
422 description_of: &mut NonTerminalDescription,
423 id_of: &mut HashMap<Rc<str>, NonTerminalId>,
424 found_nonterminals: &FoundNonTerminals,
425 macro_declarations: &MacroDeclarations,
426 scope: &HashMap<Rc<str>, ElementType>,
427 lexer_grammar: &LexerGrammar,
428 ) -> Result<ElementType> {
429 let res = match &expr.inner {
430 Item::SelfNonTerminal => ElementType::NonTerminal(self_id),
431 Item::Regular { name } => {
432 if let Some(element) = scope.get(&name.inner) {
433 *element
434 } else if let Some((id, _)) = found_nonterminals.get(&name.inner) {
435 ElementType::NonTerminal(*id)
437 } else if let Some(id) = lexer_grammar.id(&name.inner) {
438 ElementType::Terminal(id)
439 } else {
440 return ErrorKind::GrammarUndefinedNonTerminal {
441 name: name.inner.to_string(),
442 span: name.span.clone().into(),
443 }
444 .err();
445 }
446 }
447 Item::MacroInvocation { name, arguments } => {
448 let mut args = Vec::new();
449 for arg in arguments {
450 let evaled = eval_expression(
451 arg,
452 self_id,
453 available_id,
454 rules,
455 invoked_macros,
456 name_of,
457 description_of,
458 id_of,
459 found_nonterminals,
460 macro_declarations,
461 scope,
462 lexer_grammar,
463 )?;
464 args.push(evaled);
465 }
466 let args: Rc<[_]> = Rc::from(args);
467 if let Entry::Vacant(e) = invoked_macros.entry((name.inner.clone(), args.clone())) {
468 let id = available_id.next();
469 let mut complete_name = name.inner.to_string();
470 complete_name.push('[');
471 complete_name.extend(
472 args.iter()
473 .map(|element| match element {
474 ElementType::NonTerminal(id) => &*name_of[*id],
475 ElementType::Terminal(id) => lexer_grammar.name(*id),
476 })
477 .intersperse(", "),
478 );
479 complete_name.push(']');
480 let complete_name: Rc<str> = Rc::from(complete_name);
481 id_of.insert(complete_name.clone(), id);
482 name_of.push(complete_name);
483 description_of.push(None);
484 e.insert(id);
485 invoke_macro(
486 name.clone(),
487 args.clone(),
488 id,
489 available_id,
490 rules,
491 invoked_macros,
492 name_of,
493 description_of,
494 id_of,
495 found_nonterminals,
496 macro_declarations,
497 lexer_grammar,
498 )?;
499 }
500 ElementType::NonTerminal(invoked_macros[&(name.inner.clone(), args)])
501 }
502 };
503 Ok(res)
504 }
505
506 fn eval_element(
507 element: &AstElement,
508 id: NonTerminalId,
509 available_id: &mut NonTerminalId,
510 rules: &mut Rules,
511 invoked_macros: &mut InvokedMacros,
512 name_of: &mut NonTerminalName,
513 description_of: &mut NonTerminalDescription,
514 id_of: &mut HashMap<Rc<str>, NonTerminalId>,
515 found_nonterminals: &HashMap<Rc<str>, (NonTerminalId, Span)>,
516 macro_declarations: &MacroDeclarations,
517 scope: &HashMap<Rc<str>, ElementType>,
518 lexer_grammar: &LexerGrammar,
519 ) -> Result<Element> {
520 let attribute = match &element.attribute {
521 Some(AstAttribute {
522 attribute,
523 named: Spanned { inner: true, .. },
524 span: _span,
525 }) => Attribute::Named(attribute.inner.clone()),
526 Some(AstAttribute {
527 attribute,
528 named: Spanned { inner: false, .. },
529 span,
530 }) => {
531 let index =
532 attribute
533 .inner
534 .parse()
535 .map_err(|_| ErrorKind::IntegerTooBig {
536 string: attribute.inner.to_string(),
537 span: span.into(),
538 })?;
539 Attribute::Indexed(index)
540 }
541 None => Attribute::None,
542 };
543 let key = element.key.as_ref().map(|k| k.0.clone());
544 let element_type = eval_expression(
545 &element.item,
546 id,
547 available_id,
548 rules,
549 invoked_macros,
550 name_of,
551 description_of,
552 id_of,
553 found_nonterminals,
554 macro_declarations,
555 scope,
556 lexer_grammar,
557 )?;
558 Ok(Element::new(attribute, key.map(|o| o.inner), element_type))
559 }
560
561 fn eval_proxy(
562 proxy: &AstProxy,
563 found_nonterminals: &FoundNonTerminals,
564 ) -> Result<Proxy> {
565 let mut actual_proxy = HashMap::new();
566 if let Some(ref variant) = proxy.variant {
567 actual_proxy.insert(
568 "variant".into(),
569 ValueTemplate::String(variant.inner.clone()),
570 );
571 }
572 for (key, (expression, _)) in proxy.items.iter() {
573 let value = match &expression.inner {
574 Expression::String(string) => ValueTemplate::String(string.clone()),
575 Expression::Id(id) => ValueTemplate::Variable(id.clone()),
576 Expression::Instanciation {
577 name,
578 children,
579 variant,
580 } => {
581 let Some((nonterminal, _)) = found_nonterminals.get(&name.inner) else {
582 return ErrorKind::GrammarUndefinedNonTerminal {
583 name: name.inner.to_string(),
584 span: name.span.clone().into()
585 }
586 .err();
587 };
588
589 let fake_proxy = AstProxy {
590 variant: variant.as_ref().cloned(),
591 items: children.clone(),
592 span: expression.span.clone(),
593 };
594 let attributes = eval_proxy(
595 &fake_proxy,
596 found_nonterminals,
597 )?;
598 ValueTemplate::InlineRule {
599 non_terminal: *nonterminal,
600 attributes,
601 }
602 }
603 };
604 actual_proxy.insert(key.clone(), value);
605 }
606 Ok(actual_proxy)
607 }
608
609 let mut invoked_macros: InvokedMacros = HashMap::new();
610 let mut found_axioms = Vec::new();
611 let mut rules = Rules::new();
612 let empty_scope = HashMap::new();
613 for (declaration, id) in non_terminal_declarations {
614 if declaration.axiom.inner {
615 found_axioms.push(id);
616 }
617 for rule in declaration.rules {
618 let parsed_rule = eval_rule(
619 &rule,
620 id,
621 &mut available_id,
622 &mut rules,
623 &mut invoked_macros,
624 &mut name_of,
625 &mut description_of,
626 &mut id_of,
627 &found_nonterminals,
628 ¯o_declarations,
629 &empty_scope,
630 lexer_grammar,
631 )?;
632 rules.push(parsed_rule);
633 }
634 }
635 let mut axioms = Axioms::with_capacity(available_id.next());
636 for axiom in found_axioms {
637 axioms.put(axiom);
638 }
639 let res = Self::new(rules, axioms, id_of, name_of, description_of)?;
640 Ok(res)
641 }
642
643 pub fn build_from_plain(
644 mut source: StringStream,
645 lexer_grammar: &LexerGrammar,
646 ) -> Result<Self> {
647 let (lexer, parser) = build_system!(
648 lexer => "parser.clx",
649 parser => "parser.cgr",
650 )?;
651 let mut input = lexer.lex(&mut source);
652 let result = parser.parse(&mut input)?;
653 let grammar = Self::build_from_ast(result.tree, lexer_grammar)?;
654 Ok(grammar)
655 }
656
657 pub fn build_from_path(path: &Path, lexer_grammar: &LexerGrammar) -> Result<Self> {
658 let ast: AST = match select_format(
659 path,
660 &[
661 (Self::COMPILED_EXTENSION, Format::Compiled),
662 (Self::AST_EXTENSION, Format::Ast),
663 (Self::PLAIN_EXTENSION, Format::Plain),
664 ],
665 ) {
666 FileResult::Valid((actual_path, Format::Ast)) => {
667 let file = File::open(&actual_path)
668 .map_err(|error| Error::with_file(error, &actual_path))?;
669 serde_json::from_reader(file).map_err(|error| ErrorKind::IllformedAst {
670 error,
671 path: actual_path,
672 })?
673 }
674 FileResult::Valid((actual_path, Format::Plain)) => {
675 let stream = StringStream::from_file(actual_path)?;
676 let result = Self::build_from_plain(stream, lexer_grammar)?;
677 return Ok(result);
678 }
679 FileResult::Valid((actual_path, Format::Compiled)) => {
680 let mut file = File::open(&actual_path)
681 .map_err(|err| Error::with_file(err, &actual_path))?;
682 let mut buffer = Vec::new();
683 file.read_to_end(&mut buffer)
684 .map_err(|err| Error::with_file(err, &actual_path))?;
685 let result = Self::build_from_compiled(&buffer, actual_path)?;
686 return Ok(result);
687 }
688 FileResult::WrongExtension(extension) => {
689 return ErrorKind::UnrecognisedExtension {
690 extension,
691 path: path.to_owned(),
692 }
693 .err();
694 }
695 FileResult::NonExisting => {
696 return ErrorKind::GrammarNotFound {
697 path: path.to_owned(),
698 }
699 .err();
700 }
701 };
702 let parser = Self::build_from_ast(ast, lexer_grammar)?;
703 Ok(parser)
704 }
705
706 pub fn build_from_blob(
707 blob: &[u8],
708 path: &Path,
709 lexer_grammar: &LexerGrammar,
710 ) -> Result<Self> {
711 let ast: AST = match select_format(
712 path,
713 &[
714 (Self::COMPILED_EXTENSION, Format::Compiled),
715 (Self::AST_EXTENSION, Format::Ast),
716 (Self::PLAIN_EXTENSION, Format::Plain),
717 ],
718 ) {
719 FileResult::Valid((actual_path, Format::Compiled)) => {
720 let result = Self::build_from_compiled(blob, actual_path)?;
721 return Ok(result);
722 }
723 FileResult::Valid((actual_path, Format::Ast)) => {
724 let string = std::str::from_utf8(blob)
725 .map_err(|error| Error::with_file(error, actual_path.clone()))?;
726 serde_json::from_str(string).map_err(|error| Error::with_file(error, actual_path))?
727 }
728 FileResult::Valid((actual_path, Format::Plain)) => {
729 let string = String::from_utf8(blob.to_vec())
730 .map_err(|error| Error::with_file(error, &actual_path))?;
731 let stream = StringStream::new(actual_path, string);
732 let result = Self::build_from_plain(stream, lexer_grammar)?;
733 return Ok(result);
734 }
735 FileResult::NonExisting => {
736 return ErrorKind::GrammarNotFound {
737 path: path.to_owned(),
738 }
739 .err();
740 }
741 FileResult::WrongExtension(extension) => {
742 return ErrorKind::UnrecognisedExtension {
743 extension,
744 path: path.to_owned(),
745 }
746 .err();
747 }
748 };
749 let grammar = Self::build_from_ast(ast, lexer_grammar)?;
750 Ok(grammar)
751 }
752}
753
754newty! {
755 id FinalItemId
756}
757newty! {
758 #[derive(PartialEq, Eq, Clone)]
759 vec FinalSetVec(FinalItem)[FinalItemId]
760}
761newty! {
762 #[derive(Clone)]
763 map FinalSetIndex(Vec<FinalItemId>)[NonTerminalId]
764}
765
766#[derive(Default, Debug, Clone, Eq)]
767pub struct FinalSet {
768 index: FinalSetIndex,
770 set: FinalSetVec,
772 position: usize,
774}
775
776impl PartialEq for FinalSet {
777 fn eq(&self, rhs: &FinalSet) -> bool {
778 self.set == rhs.set && self.position == rhs.position
779 }
780}
781
782impl FinalSet {
783 fn add(&mut self, item: FinalItem, grammar: &EarleyGrammar) {
784 self.index
785 .0
786 .entry(grammar.rules[item.rule].id)
787 .or_default()
788 .push(self.set.len_as());
789 self.set.push(item);
790 }
791
792 fn iter(&self) -> impl Iterator<Item = &FinalItem> + '_ {
793 self.set.iter()
794 }
795}
796
797impl std::fmt::Display for FinalSet {
798 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::result::Result<(), fmt::Error> {
799 write!(
800 f,
801 r"== ({}) ==
802{}",
803 self.position,
804 self.set
805 .iter()
806 .map(|item| format!("{}", item))
807 .collect::<Vec<_>>()
808 .join("\n")
809 )
810 }
811}
812
813#[derive(Default, Debug)]
814pub struct StateSet {
815 cache: HashSet<EarleyItem>,
816 set: Vec<EarleyItem>,
817 position: usize,
818}
819
820impl StateSet {
821 fn add(&mut self, item: EarleyItem) {
822 if !self.cache.contains(&item) {
823 self.cache.insert(item);
824 self.set.push(item);
825 }
826 }
827
828 fn next(&mut self) -> Option<&EarleyItem> {
829 if let Some(e) = self.set.get(self.position) {
830 self.position += 1;
831 Some(e)
832 } else {
833 None
834 }
835 }
836
837 fn is_empty(&self) -> bool {
838 self.set.is_empty()
839 }
840
841 fn slice(&self) -> &[EarleyItem] {
842 &self.set
843 }
844
845 fn iter(&self) -> impl Iterator<Item = &EarleyItem> + '_ {
846 self.set.iter()
847 }
848}
849
850#[derive(Clone, Debug)]
851struct SyntaxicItem {
852 kind: SyntaxicItemKind,
853 start: usize,
854 end: usize,
855}
856
857#[derive(Clone, Debug)]
858enum SyntaxicItemKind {
859 Rule(RuleId),
860 Token(Token),
861}
862
863#[derive(Debug)]
897pub struct EarleyParser {
898 grammar: EarleyGrammar,
899}
900
901impl EarleyParser {
902 pub fn build_from_blob(
903 blob: &[u8],
904 path: &Path,
905 lexer_grammar: &LexerGrammar,
906 ) -> Result<Self> {
907 let grammar = EarleyGrammar::build_from_blob(blob, path, lexer_grammar)?;
908 Ok(Self { grammar })
909 }
910
911 fn find_children(
912 &self,
913 element: SyntaxicItem,
914 forest: &[FinalSet],
915 raw_input: &[Token],
916 ) -> Vec<SyntaxicItem> {
917 match element.kind {
918 SyntaxicItemKind::Rule(rule) => {
919 let mut boundary = vec![(List::default(), element.start)];
920 for elem in self.grammar.rules[rule].elements.iter() {
921 let mut next_boundary = Vec::new();
922 for (children, curpos) in boundary.drain(..) {
923 match elem.element_type {
924 ElementType::NonTerminal(id) => {
925 if let Some(rules) = forest[curpos].index.get(&id) {
926 for final_item in rules
927 .iter()
928 .map(|&rule| &forest[curpos].set[rule])
929 .filter(|final_item| final_item.end <= element.end)
930 {
931 next_boundary.push((
932 children.cons(SyntaxicItem {
933 kind: SyntaxicItemKind::Rule(final_item.rule),
934 start: curpos,
935 end: final_item.end,
936 }),
937 final_item.end,
938 ))
939 }
940 }
941 }
942 ElementType::Terminal(id)
943 if curpos < element.end && raw_input[curpos].id() == id =>
944 {
945 next_boundary.push((
946 children.cons(SyntaxicItem {
947 kind: SyntaxicItemKind::Token(
948 raw_input[curpos].clone(),
949 ),
950 start: curpos,
951 end: curpos + 1,
952 }),
953 curpos + 1,
954 ))
955 }
956 _ => {}
957 }
958 }
959 boundary.extend(next_boundary.into_iter().rev());
960 }
961 let children = boundary
962 .into_iter()
963 .filter_map(|(children, pos)| {
964 if pos == element.end {
965 Some(children)
966 } else {
967 None
968 }
969 })
970 .max_by(|left_children, right_children| {
971 for (left, right) in left_children.iter().zip(right_children.iter()) {
972 let SyntaxicItemKind::Rule(left_rule) = left.kind else {
973 continue;
974 };
975 let SyntaxicItemKind::Rule(right_rule) = right.kind else {
976 continue;
977 };
978 let assoc_ord = if self.grammar.rules[rule].left_associative {
979 left.start.cmp(&right.start)
980 } else {
981 right.start.cmp(&left.start)
982 };
983 let ord = match assoc_ord {
984 Ordering::Equal => left_rule.cmp(&right_rule),
985 other => other,
986 };
987 match ord {
988 Ordering::Equal => continue,
989 other => return other,
990 }
991 }
992 Ordering::Equal
993 })
994 .unwrap();
995 children
996 .iter()
997 .cloned()
998 .collect::<Vec<_>>()
999 .into_iter()
1000 .rev()
1001 .collect()
1002 }
1003 SyntaxicItemKind::Token(_) => Vec::new(),
1004 }
1005 }
1006
1007 fn build_ast(
1008 &self,
1009 item: SyntaxicItem,
1010 forest: &[FinalSet],
1011 raw_input: &[Token],
1012 last_span: &Span,
1013 ) -> AST {
1014 match item.kind {
1015 SyntaxicItemKind::Rule(rule) => {
1016 let span = if raw_input.is_empty() {
1017 last_span.clone()
1018 } else if item.end == item.start {
1019 raw_input[item.start].span().clone()
1020 } else {
1021 raw_input[item.start]
1022 .span()
1023 .sup(raw_input[item.end - 1].span())
1024 };
1025 let all_attributes = self
1026 .find_children(item, forest, raw_input)
1027 .into_iter()
1028 .map(|item| self.build_ast(item, forest, raw_input, last_span))
1029 .zip(self.grammar.rules[rule].elements.iter())
1030 .filter_map(|(item, element)| {
1031 element.key.as_ref().map(|key| match &element.attribute {
1032 Attribute::Named(attr) => {
1033 let AST::Node { attributes, .. } = item else {
1034 unreachable!("{item:?}.{attr}")
1035 };
1036 (key.clone(), attributes[attr].clone())
1037 }
1038 Attribute::Indexed(idx) => {
1039 let AST::Terminal(token) = item else {
1040 unreachable!("{item:?}.{idx}")
1041 };
1042 (
1043 key.clone(),
1044 AST::Literal {
1045 value: Value::Str(Rc::from(
1046 token.attributes()[idx].as_str(),
1047 )),
1048 span: Some(token.span().clone()),
1049 },
1050 )
1051 }
1052 Attribute::None => (key.clone(), item),
1053 })
1054 })
1055 .collect::<HashMap<Rc<str>, _>>();
1056 let mut removed: HashSet<Rc<str>> = HashSet::new();
1057 let nonterminal = self.grammar.rules[rule].id;
1058 let mut attributes: HashMap<_, _> = self.grammar.rules[rule]
1059 .proxy
1060 .iter()
1061 .map(|(key, wanted)| {
1062 (
1063 key.clone(),
1064 wanted.evaluate(&all_attributes, &mut removed, &span),
1065 )
1066 })
1067 .collect();
1068 attributes.extend(
1069 all_attributes
1070 .into_iter()
1071 .filter(|(key, _)| !removed.contains(key)),
1072 );
1073 AST::Node {
1074 nonterminal,
1075 attributes,
1076 span,
1077 }
1078 }
1079 SyntaxicItemKind::Token(token) => AST::Terminal(token),
1080 }
1081 }
1082
1083 pub fn select_ast(
1085 &self,
1086 forest: &[FinalSet],
1087 raw_input: &[Token],
1088 last_span: &Span,
1089 ) -> AST {
1090 forest[0]
1091 .iter()
1092 .filter(|item| {
1093 item.end == raw_input.len()
1094 && self
1095 .grammar
1096 .axioms
1097 .contains(self.grammar.rules[item.rule].id)
1098 })
1099 .sorted_unstable_by_key(|item| Reverse(item.rule))
1100 .map(|item| SyntaxicItem {
1101 start: 0,
1102 end: raw_input.len(),
1103 kind: SyntaxicItemKind::Rule(item.rule),
1104 })
1105 .map(|item| self.build_ast(item, forest, raw_input, last_span))
1106 .next()
1107 .unwrap()
1108 }
1109
1110 pub fn to_forest(&self, table: &[StateSet], raw_input: &[Token]) -> Result<Forest> {
1111 let mut forest = vec![FinalSet::default(); table.len()];
1112 for (i, set) in table.iter().enumerate() {
1113 forest[i].position = i;
1114 if set.is_empty() {
1115 return ErrorKind::InternalError {
1116 message: format!(
1117 "While creating the forest, could not find any item in set {}, at {}",
1118 i,
1119 raw_input[i].span(),
1120 ),
1121 }
1122 .err();
1123 }
1124 set.iter()
1125 .filter(|item| item.position == self.grammar.rules[item.rule].elements.len())
1126 .for_each(|item| {
1127 forest[item.origin].add(
1128 FinalItem {
1129 end: i,
1130 rule: item.rule,
1131 },
1132 &self.grammar,
1133 )
1134 });
1135 }
1136 Ok(forest)
1137 }
1138
1139 pub fn recognise<'input, 'linput: 'input>(
1140 &self,
1141 input: &'input mut LexedStream<'linput, 'linput>,
1142 ) -> Result<(Table, Vec<Token>)> {
1143 let mut sets = Vec::new();
1144 let mut first_state = StateSet::default();
1145 let mut possible_first_nonterminals = HashSet::new();
1146 let mut possible_first_terminals = HashSet::new();
1147 (0..self.grammar().rules.len())
1148 .map(RuleId)
1149 .filter(|id| self.grammar.axioms.contains(self.grammar.rules[*id].id))
1150 .for_each(|id| {
1151 let parent_has_been_shown = if let Some(description) =
1152 self.grammar().description_of(self.grammar().rules[id].id)
1153 {
1154 possible_first_nonterminals.insert(description);
1155 true
1156 } else {
1157 false
1158 };
1159
1160 first_state.add(EarleyItem {
1161 rule: id,
1162 origin: 0,
1163 position: 0,
1164 parent_has_been_shown,
1165 })
1166 });
1167 let mut raw_input = Vec::new();
1168 sets.push(first_state);
1169 let mut pos = 0;
1170 'outer: loop {
1171 let mut next_state = StateSet::default();
1172 let mut scans: HashMap<TerminalId, Vec<_>> = HashMap::new();
1173 '_inner: while let Some(&item) = sets.last_mut().unwrap().next() {
1174 let mut to_be_added = Vec::new();
1175 match self.grammar().rules[item.rule].elements.get(item.position) {
1176 Some(element) => match element.element_type {
1177 ElementType::NonTerminal(id) => {
1179 for &rule in self.grammar().has_rules(id) {
1180 let parent_has_been_shown = item.parent_has_been_shown
1181 || if let Some(description) = self
1182 .grammar
1183 .description_of(self.grammar().rules[rule].id)
1184 {
1185 possible_first_nonterminals.insert(description);
1186 true
1187 } else {
1188 false
1189 };
1190 to_be_added.push(EarleyItem {
1191 rule,
1192 origin: pos,
1193 position: 0,
1194 parent_has_been_shown,
1195 });
1196 }
1197 if self.grammar.nullables.contains(id) {
1198 to_be_added.push(EarleyItem {
1199 rule: item.rule,
1200 origin: item.origin,
1201 position: item.position + 1,
1202 parent_has_been_shown: item.parent_has_been_shown,
1203 });
1204 }
1205 }
1206 ElementType::Terminal(id) => {
1208 if !item.parent_has_been_shown {
1209 if let Some(message) =
1210 input.lexer().grammar().description_of(id)
1211 {
1212 possible_first_terminals.insert(message.to_string());
1213 } else {
1214 possible_first_terminals
1215 .insert(input.lexer().grammar().name(id).to_string());
1216 };
1217 }
1218 scans.entry(id).or_default().push(EarleyItem {
1219 rule: item.rule,
1220 origin: item.origin,
1221 position: item.position + 1,
1222 parent_has_been_shown: false,
1223 })
1224 }
1225 },
1226 None => {
1228 for &parent in sets[item.origin].slice() {
1229 if let Some(Element {
1230 element_type: ElementType::NonTerminal(nonterminal),
1231 ..
1232 }) = self.grammar().rules[parent.rule]
1233 .elements
1234 .get(parent.position)
1235 {
1236 if *nonterminal == self.grammar().rules[item.rule].id {
1237 to_be_added.push(EarleyItem {
1238 rule: parent.rule,
1239 origin: parent.origin,
1240 position: parent.position + 1,
1241 parent_has_been_shown: item.parent_has_been_shown,
1242 })
1243 }
1244 }
1245 }
1246 }
1247 }
1248 for item in to_be_added {
1249 sets.last_mut().unwrap().add(item);
1250 }
1251 }
1252
1253 let possible_scans = input
1254 .lexer()
1255 .grammar()
1256 .default_allowed()
1257 .chain(scans.keys().cloned())
1258 .collect::<Vec<_>>();
1259 let allowed = Allowed::Some(possible_scans.clone());
1260 let next_token = match input.next(allowed) {
1261 Ok(r) => r,
1262 Err(error) => {
1263 if let ErrorKind::LexingError { .. } = *error.kind {
1264 let error = if let Some(token) = input.next(Allowed::All)? {
1265 let span = token.span().clone();
1266 let name = {
1267 let id = token.id();
1268 let name = token.name().to_string();
1269 if let Some(description) =
1270 input.lexer().grammar().description_of(id)
1271 {
1272 description.to_string()
1273 } else {
1274 name
1275 }
1276 };
1277 ErrorKind::SyntaxError {
1278 name,
1279 alternatives: possible_first_nonterminals
1280 .drain()
1281 .map(|x| x.to_string())
1282 .chain(possible_first_terminals.drain())
1283 .collect(),
1284 span: Fragile::new(span),
1285 }
1286 } else {
1287 ErrorKind::SyntaxErrorValidPrefix {
1288 span: input.last_span().into(),
1289 }
1290 };
1291 return error.err();
1292 } else {
1293 return Err(error);
1294 }
1295 }
1296 };
1297 possible_first_nonterminals.clear();
1298 possible_first_terminals.clear();
1299 if let Some(token) = next_token {
1300 for item in scans.entry(token.id()).or_default() {
1301 next_state.add(*item);
1302 }
1303 raw_input.push(token.clone());
1304 } else if sets.last().unwrap().set.iter().any(|item| {
1305 let rule = &self.grammar.rules[item.rule];
1306 item.origin == 0
1307 && self.grammar.axioms.contains(rule.id)
1308 && rule.elements.len() == item.position
1309 }) {
1310 break 'outer Ok((sets, raw_input));
1311 } else {
1312 return ErrorKind::SyntaxErrorValidPrefix {
1313 span: input.last_span().into(),
1314 }
1315 .err();
1316 };
1317
1318 sets.push(next_state);
1319 pos += 1;
1320 }
1321 }
1322}
1323
1324impl Parser<'_> for EarleyParser {
1325 type Grammar = EarleyGrammar;
1326
1327 fn new(grammar: Self::Grammar) -> Self {
1328 Self { grammar }
1329 }
1330
1331 fn grammar(&self) -> &Self::Grammar {
1332 &self.grammar
1333 }
1334
1335 fn is_valid<'input>(&self, input: &'input mut LexedStream<'input, 'input>) -> bool {
1336 self.recognise(input).is_ok()
1337 }
1338
1339 fn parse<'input>(
1340 &self,
1341 input: &'input mut LexedStream<'input, 'input>,
1342 ) -> Result<ParseResult> {
1343 let (table, raw_input) = self.recognise(input)?;
1344 let forest = self.to_forest(&table, &raw_input)?;
1345 let tree = self.select_ast(&forest, &raw_input, input.last_span());
1347 Ok(ParseResult { tree })
1348 }
1349}
1350
1351#[cfg(test)]
1369mod tests {
1370 use super::*;
1371
1372 const GRAMMAR_NUMBERS_LEXER: &str = r#"
1373NUMBER ::= ([0-9])
1374PM ::= [-+]
1375TD ::= [*/]
1376LPAR ::= \(
1377RPAR ::= \)
1378"#;
1379
1380 const GRAMMAR_NUMBERS: &str = r#"
1381@Sum ::= Sum@left PM Product@right <>
1382 Product@self <>;
1383
1384Product ::= Product@left TD Factor@right <>
1385 Factor.self@self <>;
1386
1387Factor ::= LPAR Sum@self RPAR <>
1388 NUMBER.0@self <>;"#;
1389
1390 const GRAMMAR_PROXY_LEXER: &str = r#"
1391NUMBER ::= ([0-9]+)
1392OP ::= \+
1393LPAR ::= \(
1394RPAR ::= \)
1395"#;
1396 const GRAMMAR_NUMBERS_IMPROVED: &str = r#"
1397@Expr ::=
1398 NUMBER.0@value <Literal>
1399 (left-assoc) Expr@left TD Expr@right <MulDiv>
1400 (right-assoc) Expr@left PM Expr@right <AddSub>
1401 LPAR Expr@value RPAR <Through>;
1402"#;
1403
1404 const GRAMMAR_PROXY: &str = r#"
1405@Expression ::=
1406 NUMBER.0@value <Literal>
1407 Expression@left OP Expression@right <Operation>
1408 OP Expression@right <Operation, left: Expression {Literal, value: "0"}>
1409 LPAR Expression@value RPAR <Parenthesized>;
1410"#;
1411 const GRAMMAR_PROXY_WRONG_1: &str = r#"
1412@Expression ::=
1413 NUMBER.0@value <Literal>
1414 Expression@left OP Expression@right <Operation>
1415 OP Expression@right <
1416 Operation,
1417 left: Expression {variant: Literal value: "0"}
1418 >
1419 LPAR Expression@value RPAR <Parenthesized>;
1420"#;
1421 const GRAMMAR_PROXY_WRONG_2: &str = r#"
1422@Expression ::=
1423 NUMBER.0@value <Literal>
1424 Expression@left OP Expression@right <Operation>
1425 OP Expression@right <
1426 Operation
1427 left: Expression {Literal value: "0"}
1428 >
1429 LPAR Expression@value RPAR <Parenthesized>;
1430"#;
1431
1432 const GRAMMAR_C_LEXER: &str = include_str!("gmrs/petitc.lx");
1433 const GRAMMAR_C: &str = include_str!("gmrs/petitc.gr");
1434
1435 struct TestEarleyItem {
1436 name: &'static str,
1437 left_elements: Vec<&'static str>,
1438 right_elements: Vec<&'static str>,
1439 origin: usize,
1440 }
1441
1442 impl TestEarleyItem {
1443 fn matches(
1444 &self,
1445 other: &EarleyItem,
1446 parser: &EarleyParser,
1447 lexer: &Lexer,
1448 set_id: usize,
1449 item_id: usize,
1450 ) {
1451 let error_message = format!("Set #{}, item #{}: no match:", set_id, item_id);
1452 let item = &parser.grammar().rules[other.rule];
1453 assert_eq!(
1454 self.name,
1455 &*parser.grammar().name_of[item.id],
1456 "{} name.",
1457 error_message
1458 );
1459 assert_eq!(
1460 self.left_elements.len() + self.right_elements.len(),
1461 item.elements.len(),
1462 "{} origin set.\nExpected: [{:?}, {:?}]\nGot: {:?}",
1463 error_message,
1464 self.left_elements,
1465 self.right_elements,
1466 item.elements,
1467 );
1468 assert_eq!(
1469 self.left_elements.len(),
1470 other.position,
1471 "{} fat dot position.",
1472 error_message,
1473 );
1474 assert_eq!(self.origin, other.origin, "{} origin set.", error_message);
1475 for i in 0..self.left_elements.len() {
1476 assert_eq!(
1477 self.left_elements[i],
1478 &*item.elements[i].name(lexer.grammar(), parser.grammar()),
1479 "{} element #{}.",
1480 error_message,
1481 i
1482 );
1483 }
1484 for i in 0..self.right_elements.len() {
1485 assert_eq!(
1486 self.right_elements[i],
1487 &*item.elements[i + other.position].name(lexer.grammar(), parser.grammar()),
1488 "{} elements #{}.",
1489 error_message,
1490 i + other.position
1491 );
1492 }
1493 }
1494 }
1495
1496 macro_rules! sets {
1502 (
1503 $(
1504 == $(
1505 $name: ident -> $($left_element: ident)* . $($right_element: ident)* ($origin: literal)
1506 )*
1507 )*
1508 ) => {
1509 {
1510 #[allow(unused_mut)]
1511 let mut sets = Vec::new();
1512 $(
1513 #[allow(unused_mut)]
1514 let mut set = Vec::new();
1515 $(
1516 set.push(earley_item!($name -> $($left_element)* . $($right_element)* ($origin)));
1517 )*
1518 sets.push(set);
1519 )*
1520 sets
1521 }
1522 };
1523 }
1524
1525 macro_rules! earley_item {
1528 ($name: ident -> $($left_element: ident)* . $($right_element: ident)* ($origin: literal)) => {
1529 {
1530 let left_elements = vec![$(
1531 stringify!($left_element)
1532 ),*];
1533 let right_elements = vec![$(
1534 stringify!($right_element)
1535 ),*];
1536 TestEarleyItem {
1537 name: stringify!($name),
1538 left_elements,
1539 right_elements,
1540 origin: $origin
1541 }
1542 }
1543 };
1544 }
1545
1546 macro_rules! final_sets {
1547 (
1548 ($grammar:expr)
1549 ($lexer:expr)
1550 $(
1551 == $(
1552 $name: ident -> $($element: ident)* ($end: literal)
1553 )*
1554 )*
1555 ) => {{
1556 #[allow(unused_mut)]
1557 let mut sets = Vec::new();
1558 fn find_item(grammar: &EarleyGrammar, lexer_grammar: &$crate::lexer::Grammar, name: &str, elements: &[&str], end: usize) -> FinalItem {
1559 for &rule_identifier in grammar
1560 .id_of
1561 .get(&Rc::from(name))
1562 .map(|&identifier| &grammar.rules_of[identifier])
1563 .expect(format!("The non-terminal {} does not exist.", name).as_str())
1564 .iter()
1565 {
1566 if elements.len() == grammar
1567 .rules[rule_identifier]
1568 .elements.len()
1569 && elements
1570 .iter()
1571 .zip(grammar.rules[rule_identifier].elements.iter())
1572 .all(|(&left, right)| left == &*right.name(lexer_grammar, grammar))
1573 {
1574 return FinalItem {
1575 rule: rule_identifier,
1576 end
1577 };
1578 }
1579 }
1580 panic!("The rule {} -> {} is not in the grammar.", name, elements.join(" "));
1581 }
1582 $(
1583 #[allow(unused_mut)]
1584 let mut set = FinalSet::default();
1585 set.position = sets.len();
1586 $(
1587 set.add(find_item($grammar, $lexer, stringify!($name), &[$(stringify!($element)),*], $end), $grammar);
1588 )*
1589 sets.push(set);
1590 )*
1591 sets
1592 }};
1593 }
1594
1595 #[derive(Debug, Clone)]
1596 struct TestToken {
1597 name: &'static str,
1598 attributes: Vec<(usize, &'static str)>,
1599 }
1600
1601 impl PartialEq<Token> for TestToken {
1602 fn eq(&self, other: &Token) -> bool {
1603 other.name() == self.name
1604 && other
1605 .attributes()
1606 .iter()
1607 .map(|(&key, value)| (key, value.as_str()))
1608 .collect::<Vec<_>>()
1609 == self.attributes
1610 }
1611 }
1612
1613 #[derive(Clone)]
1614 struct MapVec(Vec<(&'static str, TestAST)>);
1615
1616 impl std::fmt::Debug for MapVec {
1617 fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1618 formatter
1619 .debug_map()
1620 .entries(self.0.iter().map(|(k, v)| (k, v)))
1621 .finish()
1622 }
1623 }
1624
1625 impl From<Vec<(&'static str, TestAST)>> for MapVec {
1626 fn from(o: Vec<(&'static str, TestAST)>) -> Self {
1627 Self(o)
1628 }
1629 }
1630
1631 #[derive(Debug, Clone)]
1632 enum TestAST {
1633 Node {
1634 id: usize,
1635 attributes: MapVec,
1636 },
1637 #[allow(unused)]
1638 Terminal(TestToken),
1639 Literal(super::super::parser::Value),
1640 }
1641
1642 impl PartialEq<TestAST> for AST {
1643 fn eq(&self, other: &TestAST) -> bool {
1644 other == self
1645 }
1646 }
1647
1648 impl PartialEq<AST> for TestAST {
1649 fn eq(&self, other: &AST) -> bool {
1650 match (self, other) {
1651 (
1652 TestAST::Node {
1653 id: tid,
1654 attributes: tattributes,
1655 },
1656 AST::Node {
1657 nonterminal: id,
1658 attributes,
1659 ..
1660 },
1661 ) => {
1662 NonTerminalId::from(*tid) == *id && {
1663 let tattributes = tattributes
1664 .0
1665 .iter()
1666 .map(|(key, value)| (Rc::<str>::from(*key), value))
1667 .collect::<HashMap<_, _>>();
1668 tattributes.len() == attributes.len()
1669 && tattributes.iter().all(|(key, value)| {
1670 attributes.get(key).map_or(false, |v| *value == v)
1671 })
1672 }
1673 }
1674 (TestAST::Terminal(ttoken), AST::Terminal(token)) => ttoken == token,
1675 (TestAST::Literal(tvalue), AST::Literal { value, .. }) => tvalue == value,
1676 _ => false,
1677 }
1678 }
1679 }
1680
1681 #[test]
1682 fn complex_proxy() {
1683 let lexer = Lexer::build_from_plain(StringStream::new(
1684 Path::new("<PROXY>"),
1685 GRAMMAR_PROXY_LEXER,
1686 ))
1687 .unwrap();
1688 EarleyGrammar::build_from_plain(
1689 StringStream::new(Path::new("<PROXY>"), GRAMMAR_PROXY),
1690 lexer.grammar(),
1691 )
1692 .unwrap();
1693 assert!(EarleyGrammar::build_from_plain(
1694 StringStream::new(Path::new("<PROXY>"), GRAMMAR_PROXY_WRONG_1),
1695 lexer.grammar()
1696 )
1697 .is_err());
1698 assert!(EarleyGrammar::build_from_plain(
1699 StringStream::new(Path::new("<PROXY>"), GRAMMAR_PROXY_WRONG_2),
1700 lexer.grammar()
1701 )
1702 .is_err());
1703 }
1704
1705 #[test]
1706 fn recognise_handle_empty_rules() {
1707 let lexer_input = r#""#;
1708 let grammar_input = r#"
1709@A ::= <>
1710 B <>;
1711B ::= A <>;"#;
1712 let input = r#""#;
1713 let lexer =
1714 Lexer::build_from_plain(StringStream::new(Path::new("<lexer input>"), lexer_input))
1715 .unwrap();
1716 let grammar = EarleyGrammar::build_from_plain(
1717 StringStream::new(Path::new("<grammar input>"), grammar_input),
1718 lexer.grammar(),
1719 )
1720 .unwrap();
1721 let parser = EarleyParser::new(grammar);
1722 let sets = sets!(
1723 ==
1724 A -> . (0)
1725 A -> . B (0)
1726 B -> . A (0)
1727 A -> B . (0)
1728 B -> A . (0)
1729 );
1730 let (recognised, _) = parser
1731 .recognise(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1732 .unwrap();
1733 print_sets(&recognised, &parser, &lexer);
1734 verify_sets(sets, recognised, &parser, &lexer);
1735 }
1736
1737 #[test]
1738 fn grammar_c() {
1739 let input = r#"
1740#include <stdlib.h>
1741#include <stdio.h>
1742#include <stdbool.h>
1743
1744int main() {
1745 return sizeof(bool ****);
1746}
1747"#;
1748 let lexer =
1749 Lexer::build_from_plain(StringStream::new(Path::new("petitc.lx"), GRAMMAR_C_LEXER))
1750 .unwrap();
1751
1752 let grammar = EarleyGrammar::build_from_plain(
1753 StringStream::new(Path::new("petitc.gr"), GRAMMAR_C),
1754 lexer.grammar(),
1755 )
1756 .unwrap();
1757 let parser = EarleyParser::new(grammar);
1758 let _ast = parser
1759 .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1760 .unwrap();
1761 }
1762
1763 #[test]
1764 fn grammar_c_prior_assoc() {
1765 let input = r#"
1766#include <stdlib.h>
1767#include <stdio.h>
1768#include <stdbool.h>
1769
1770int main() {
1771 int a;
1772 int b;
1773 a = b = 3+3*2;
1774 a = a < b > a < b > a;
1775}
1776"#;
1777 let lexer =
1778 Lexer::build_from_plain(StringStream::new(Path::new("petitc.lx"), GRAMMAR_C_LEXER))
1779 .unwrap();
1780 let grammar = EarleyGrammar::build_from_plain(
1781 StringStream::new(Path::new("petitc.gr"), GRAMMAR_C),
1782 lexer.grammar(),
1783 )
1784 .unwrap();
1785 let parser = EarleyParser::new(grammar);
1786 let _ast = parser
1787 .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1788 .unwrap();
1789 }
1791
1792 #[test]
1793 fn valid_prefix() {
1794 let input = r#"1+2+"#;
1795 let lexer = Lexer::build_from_plain(StringStream::new(
1796 Path::new("<NUMBERS LEXER>"),
1797 GRAMMAR_NUMBERS_LEXER,
1798 ))
1799 .unwrap();
1800 let grammar = EarleyGrammar::build_from_plain(
1801 StringStream::new(Path::new("<NUMBERS>"), GRAMMAR_NUMBERS),
1802 lexer.grammar(),
1803 )
1804 .unwrap();
1805 let parser = EarleyParser::new(grammar);
1806 assert!(parser
1807 .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)),)
1808 .is_err());
1809 }
1810
1811 #[test]
1812 fn priority_associativity() {
1813 let input = r"1+2+3*4*5+6+7*8";
1817 let lexer = Lexer::build_from_plain(StringStream::new(
1818 Path::new("<NUMBERS LEXER>"),
1819 GRAMMAR_NUMBERS_LEXER,
1820 ))
1821 .unwrap();
1822 let grammar = EarleyGrammar::build_from_plain(
1823 StringStream::new(Path::new("<NUMBERS IMPROVED>"), GRAMMAR_NUMBERS_IMPROVED),
1824 lexer.grammar(),
1825 )
1826 .unwrap();
1827 let parser = EarleyParser::new(grammar);
1828 let ast = parser
1829 .parse(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
1830 .unwrap();
1831 let test_ast = {
1832 use super::super::parser::Value::*;
1833 use TestAST::*;
1834 let add = Literal(Str("AddSub".into()));
1835 let literal = Literal(Str("Literal".into()));
1836 let mul = Literal(Str("MulDiv".into()));
1837 Node {
1838 id: 0,
1839 attributes: vec![
1840 ("variant", add.clone()),
1841 (
1842 "left",
1843 Node {
1844 id: 0,
1845 attributes: vec![
1846 ("variant", literal.clone()),
1847 ("value", Literal(Str("1".into()))),
1848 ]
1849 .into(),
1850 },
1851 ),
1852 (
1853 "right",
1854 Node {
1855 id: 0,
1856 attributes: vec![
1857 ("variant", add.clone()),
1858 (
1859 "left",
1860 Node {
1861 id: 0,
1862 attributes: vec![
1863 ("variant", literal.clone()),
1864 ("value", Literal(Str("2".into()))),
1865 ]
1866 .into(),
1867 },
1868 ),
1869 (
1870 "right",
1871 Node {
1872 id: 0,
1873 attributes: vec![
1874 ("variant", add.clone()),
1875 (
1876 "left",
1877 Node {
1878 id: 0,
1879 attributes: vec![
1880 ("variant", mul.clone()),
1881 (
1882 "left",
1883 Node {
1884 id: 0,
1885 attributes: vec![
1886 ("variant", mul.clone()),
1887 (
1888 "left",
1889 Node {
1890 id: 0,
1891 attributes: vec![
1892 ("variant", literal.clone()),
1893 ("value", Literal(Str("3".into()))),
1894 ]
1895 .into(),
1896 },
1897 ),
1898 (
1899 "right",
1900 Node {
1901 id: 0,
1902 attributes: vec![
1903 ("variant", literal.clone()),
1904 ("value", Literal(Str("4".into()))),
1905 ]
1906 .into(),
1907 },
1908 ),
1909 ]
1910 .into(),
1911 },
1912 ),
1913 (
1914 "right",
1915 Node {
1916 id: 0,
1917 attributes: vec![
1918 (
1919 "variant",
1920 literal.clone(),
1921 ),
1922 (
1923 "value",
1924 Literal(
1925 Str("5".into()),
1926 ),
1927 ),
1928 ]
1929 .into(),
1930 },
1931 ),
1932 ]
1933 .into(),
1934 },
1935 ),
1936 (
1937 "right",
1938 Node {
1939 id: 0,
1940 attributes: vec![
1941 ("variant", add),
1942 (
1943 "left",
1944 Node {
1945 id: 0,
1946 attributes: vec![
1947 (
1948 "variant",
1949 literal.clone(),
1950 ),
1951 (
1952 "value",
1953 Literal(
1954 Str("6".into()),
1955 ),
1956 ),
1957 ]
1958 .into(),
1959 },
1960 ),
1961 (
1962 "right",
1963 Node {
1964 id: 0,
1965 attributes: vec![
1966 ("variant", mul),
1967 (
1968 "left",
1969 Node {
1970 id: 0,
1971 attributes: vec![
1972 ("variant", literal.clone()),
1973 ("value", Literal(Str("7".into()))),
1974 ]
1975 .into(),
1976 },
1977 ),
1978 (
1979 "right",
1980 Node {
1981 id: 0,
1982 attributes: vec![
1983 ("variant", literal),
1984 ("value", Literal(Str("8".into()))),
1985 ]
1986 .into(),
1987 },
1988 ),
1989 ]
1990 .into(),
1991 },
1992 ),
1993 ]
1994 .into(),
1995 },
1996 ),
1997 ]
1998 .into(),
1999 },
2000 ),
2001 ]
2002 .into(),
2003 },
2004 ),
2005 ]
2006 .into(),
2007 }
2008 };
2009 assert_eq!(ast.tree, test_ast,);
2010 }
2011
2012 #[test]
2013 fn ast_builder() {
2014 let input = r#"1+(2*3-4)"#;
2015
2016 let lexer = Lexer::build_from_plain(StringStream::new(
2017 Path::new("<lexer input>"),
2018 GRAMMAR_NUMBERS_LEXER,
2019 ))
2020 .unwrap();
2021 let grammar = EarleyGrammar::build_from_plain(
2022 StringStream::new(Path::new("<grammar input>"), GRAMMAR_NUMBERS),
2023 lexer.grammar(),
2024 )
2025 .unwrap();
2026 let parser = EarleyParser::new(grammar);
2027 let mut input_stream = StringStream::new(Path::new("<input>"), input);
2028 let mut lexed_input = lexer.lex(&mut input_stream);
2029 let (table, raw_input) = parser.recognise(&mut lexed_input).unwrap();
2030 let forest = parser.to_forest(&table, &raw_input).unwrap();
2031 let ast = parser.select_ast(&forest, &raw_input, lexed_input.last_span());
2032
2033 let test_ast = {
2034 use super::super::parser::Value::*;
2035 use TestAST::*;
2036 Node {
2037 id: 0,
2038 attributes: vec![
2039 (
2040 "right",
2041 Node {
2042 id: 1,
2043 attributes: vec![(
2044 "self",
2045 Node {
2046 id: 0,
2047 attributes: vec![
2048 (
2049 "right",
2050 Node {
2051 id: 1,
2052 attributes: vec![(
2053 "self",
2054 Literal(Str("4".into())),
2055 )]
2056 .into(),
2057 },
2058 ),
2059 (
2060 "left",
2061 Node {
2062 id: 0,
2063 attributes: vec![(
2064 "self",
2065 Node {
2066 id: 1,
2067 attributes: vec![
2068 (
2069 "right",
2070 Node {
2071 id: 2,
2072 attributes: vec![(
2073 "self",
2074 Literal(
2075 Str("3".into()),
2076 ),
2077 )]
2078 .into(),
2079 },
2080 ),
2081 (
2082 "left",
2083 Node {
2084 id: 1,
2085 attributes: vec![(
2086 "self",
2087 Literal(
2088 Str("2".into()),
2089 ),
2090 )]
2091 .into(),
2092 },
2093 ),
2094 ]
2095 .into(),
2096 },
2097 )]
2098 .into(),
2099 },
2100 ),
2101 ]
2102 .into(),
2103 },
2104 )]
2105 .into(),
2106 },
2107 ),
2108 (
2109 "left",
2110 Node {
2111 id: 0,
2112 attributes: vec![(
2113 "self",
2114 Node {
2115 id: 1,
2116 attributes: vec![("self", Literal(Str("1".into())))].into(),
2117 },
2118 )]
2119 .into(),
2120 },
2121 ),
2122 ]
2123 .into(),
2124 }
2125 };
2126
2127 assert_eq!(ast, test_ast, "Expected\n{:#?}\n\nGot\n{:?}", test_ast, ast);
2128 }
2129
2130 #[test]
2131 fn forest_builder() {
2132 let input = r#"1+(2*3-4)"#;
2133
2134 let lexer = Lexer::build_from_plain(StringStream::new(
2135 Path::new("<lexer input>"),
2136 GRAMMAR_NUMBERS_LEXER,
2137 ))
2138 .unwrap();
2139 let grammar = EarleyGrammar::build_from_plain(
2140 StringStream::new(Path::new("<grammar input>"), GRAMMAR_NUMBERS),
2141 lexer.grammar(),
2142 )
2143 .unwrap();
2144
2145 let parser = EarleyParser::new(grammar);
2146 let sets = final_sets!(
2147 (parser.grammar())
2148 (lexer.grammar())
2149 ==
2150 Factor -> NUMBER (1)
2151 Product -> Factor (1)
2152 Sum -> Product (1)
2153 Sum -> Sum PM Product (9)
2154
2155 ==
2156
2157 ==
2158 Factor -> LPAR Sum RPAR (9)
2159 Product -> Factor (9)
2160
2161 ==
2162 Factor -> NUMBER (4)
2163 Product -> Factor (4)
2164 Sum -> Product (4)
2165 Product -> Product TD Factor (6)
2166 Sum -> Product (6)
2167 Sum -> Sum PM Product (8)
2168
2169 ==
2170
2171 ==
2172 Factor -> NUMBER (6)
2173
2174 ==
2175
2176 ==
2177 Factor -> NUMBER (8)
2178 Product -> Factor (8)
2179
2180 ==
2181
2182 ==
2183 );
2184
2185 let (table, raw_input) = parser
2186 .recognise(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
2187 .unwrap();
2188 let forest = parser.to_forest(&table, &raw_input).unwrap();
2189 assert_eq!(
2190 forest,
2191 sets,
2192 "Parsed forest:\n{}\nExpected forest:\n{}",
2193 forest
2194 .iter()
2195 .map(|set| format!("{}", set))
2196 .collect::<Vec<_>>()
2197 .join("\n"),
2198 sets.iter()
2199 .map(|set| format!("{}", set))
2200 .collect::<Vec<_>>()
2201 .join("\n")
2202 );
2203 }
2204
2205 #[test]
2206 fn recogniser() {
2207 let input = r#"1+(2*3-4)"#;
2208
2209 let lexer = Lexer::build_from_plain(StringStream::new(
2210 Path::new("<lexer input>"),
2211 GRAMMAR_NUMBERS_LEXER,
2212 ))
2213 .unwrap();
2214 let grammar = EarleyGrammar::build_from_plain(
2215 StringStream::new(Path::new("<grammar input>"), GRAMMAR_NUMBERS),
2216 lexer.grammar(),
2217 )
2218 .unwrap();
2219 let parser = EarleyParser::new(grammar);
2220 let sets = sets!(
2221 ==
2222 Sum -> . Sum PM Product (0)
2223 Sum -> . Product (0)
2224 Product -> . Product TD Factor (0)
2225 Product -> . Factor (0)
2226 Factor -> . LPAR Sum RPAR (0)
2227 Factor -> . NUMBER (0)
2228
2229 ==
2230 Factor -> NUMBER . (0)
2231 Product -> Factor . (0)
2232 Sum -> Product . (0)
2233 Product -> Product . TD Factor (0)
2234 Sum -> Sum . PM Product (0)
2235
2236 ==
2237 Sum -> Sum PM . Product (0)
2238 Product -> . Product TD Factor (2)
2239 Product -> . Factor (2)
2240 Factor -> . LPAR Sum RPAR (2)
2241 Factor -> . NUMBER (2)
2242
2243 ==
2244 Factor -> LPAR . Sum RPAR (2)
2245 Sum -> . Sum PM Product (3)
2246 Sum -> . Product (3)
2247 Product -> . Product TD Factor (3)
2248 Product -> . Factor (3)
2249 Factor -> . LPAR Sum RPAR (3)
2250 Factor -> . NUMBER (3)
2251
2252 ==
2253 Factor -> NUMBER . (3)
2254 Product -> Factor . (3)
2255 Sum -> Product . (3)
2256 Product -> Product . TD Factor (3)
2257 Factor -> LPAR Sum . RPAR (2)
2258 Sum -> Sum . PM Product (3)
2259
2260 ==
2261 Product -> Product TD . Factor (3)
2262 Factor -> . LPAR Sum RPAR (5)
2263 Factor -> . NUMBER (5)
2264
2265 ==
2266 Factor -> NUMBER . (5)
2267 Product -> Product TD Factor . (3)
2268 Sum -> Product . (3)
2269 Product -> Product . TD Factor (3)
2270 Factor -> LPAR Sum . RPAR (2)
2271 Sum -> Sum . PM Product (3)
2272
2273 ==
2274 Sum -> Sum PM . Product (3)
2275 Product -> . Product TD Factor (7)
2276 Product -> . Factor (7)
2277 Factor -> . LPAR Sum RPAR (7)
2278 Factor -> . NUMBER (7)
2279
2280 ==
2281 Factor -> NUMBER . (7)
2282 Product -> Factor . (7)
2283 Sum -> Sum PM Product . (3)
2284 Product -> Product . TD Factor (7)
2285 Factor -> LPAR Sum . RPAR (2)
2286 Sum -> Sum . PM Product (3)
2287
2288 ==
2289 Factor -> LPAR Sum RPAR . (2)
2290 Product -> Factor . (2)
2291 Sum -> Sum PM Product . (0)
2292 Product -> Product . TD Factor (2)
2293 Sum -> Sum . PM Product (0)
2294 );
2295 let (recognised, _) = parser
2296 .recognise(&mut lexer.lex(&mut StringStream::new(Path::new("<input>"), input)))
2297 .unwrap();
2298 verify_sets(sets, recognised, &parser, &lexer);
2299 }
2300
2301 fn verify_sets(
2302 sets: Vec<Vec<TestEarleyItem>>,
2303 recognised: Vec<StateSet>,
2304 parser: &EarleyParser,
2305 lexer: &Lexer,
2306 ) {
2307 assert_eq!(recognised.len(), sets.len());
2308 for (set, (expected, recognised)) in sets.iter().zip(recognised.iter()).enumerate() {
2309 assert_eq!(
2310 expected.len(),
2311 recognised.set.len(),
2312 "Set #{} length does not match.",
2313 set
2314 );
2315 for (item_nb, (test_item, item)) in
2316 expected.iter().zip(recognised.set.iter()).enumerate()
2317 {
2318 test_item.matches(item, parser, lexer, set, item_nb);
2319 }
2320 }
2321 }
2322}