1use crate::grammar::SymbolId;
2
3pub trait ErrorType {
19 type Error;
20}
21
22pub trait AstNode {
34 type Output;
35}
36
37#[doc(hidden)]
43pub trait FromAstNode<N: AstNode> {
44 fn from(node: N) -> Self;
45}
46
47impl<N: AstNode> FromAstNode<N> for N {
49 fn from(node: N) -> N {
50 node
51 }
52}
53
54#[derive(Debug, Clone, Copy)]
59pub struct Ignore;
60
61impl<N: AstNode> FromAstNode<N> for Ignore {
63 fn from(_: N) -> Self {
64 Ignore
65 }
66}
67
68impl<N: AstNode> FromAstNode<N> for alloc::boxed::Box<N> {
70 fn from(node: N) -> alloc::boxed::Box<N> {
71 alloc::boxed::Box::new(node)
72 }
73}
74
75pub trait Action<N: AstNode>: ErrorType {
80 fn build(&mut self, node: N) -> Result<N::Output, Self::Error>;
81}
82
83impl<N: AstNode, A: ErrorType> Action<N> for A
85where
86 N::Output: FromAstNode<N>,
87{
88 fn build(&mut self, node: N) -> Result<N::Output, Self::Error> {
89 Ok(FromAstNode::from(node))
90 }
91}
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub(crate) enum ParserOp {
96 Shift(usize),
98 Reduce(usize),
100 ShiftOrReduce {
102 shift_state: usize,
103 reduce_rule: usize,
104 },
105 Error,
107}
108
109#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111#[repr(transparent)]
112pub(crate) struct OpEntry(pub(crate) u32);
113
114impl OpEntry {
115 pub fn shift(state: usize) -> Self {
116 debug_assert!(state > 0, "Shift(0) is reserved for Error");
117 debug_assert!(state < 0x80000000, "Shift state too large");
118 OpEntry(state as u32)
119 }
120
121 pub fn reduce(rule: usize) -> Self {
122 debug_assert!(rule < 0x1000, "Reduce rule too large (max 4095)");
123 OpEntry(!(rule as u32))
124 }
125
126 pub fn shift_or_reduce(shift_state: usize, reduce_rule: usize) -> Self {
127 debug_assert!(shift_state > 0, "Shift(0) is reserved for Error");
128 debug_assert!(shift_state < 0x80000, "Shift state too large (max 19 bits)");
129 debug_assert!(reduce_rule < 0x1000, "Reduce rule too large (max 4095)");
130 OpEntry(!((reduce_rule as u32) | ((shift_state as u32) << 12)))
131 }
132
133 pub fn decode(&self) -> ParserOp {
134 let v = self.0 as i32;
135 if v > 0 {
136 ParserOp::Shift(v as usize)
137 } else if v == 0 {
138 ParserOp::Error
139 } else {
140 let payload = !self.0;
141 let r = (payload & 0xFFF) as usize;
142 let s = ((payload >> 12) & 0x7FFFF) as usize;
143 if s == 0 {
144 ParserOp::Reduce(r)
145 } else {
146 ParserOp::ShiftOrReduce {
147 shift_state: s,
148 reduce_rule: r,
149 }
150 }
151 }
152 }
153}
154
155#[doc(hidden)]
161#[derive(Debug, Clone, Copy)]
162pub struct ParseTable<'a> {
163 data: &'a [u32],
164 check: &'a [u32],
165 action_base: &'a [i32],
166 goto_base: &'a [i32],
167 rules: &'a [(u32, u8)],
168 pub(crate) num_terminals: u32,
169 default_reduce: &'a [u32],
170 default_goto: &'a [u32],
171}
172
173impl<'a> ParseTable<'a> {
174 #[allow(clippy::too_many_arguments)]
176 pub const fn new(
177 data: &'a [u32],
178 check: &'a [u32],
179 action_base: &'a [i32],
180 goto_base: &'a [i32],
181 rules: &'a [(u32, u8)],
182 num_terminals: u32,
183 default_reduce: &'a [u32],
184 default_goto: &'a [u32],
185 ) -> Self {
186 ParseTable {
187 data,
188 check,
189 action_base,
190 goto_base,
191 rules,
192 num_terminals,
193 default_reduce,
194 default_goto,
195 }
196 }
197
198 fn lookup(&self, base: &[i32], row: usize, col: u32) -> Option<u32> {
200 let idx = (base[row] + col as i32) as usize;
201 if idx < self.check.len() && self.check[idx] == col {
202 Some(self.data[idx])
203 } else {
204 None
205 }
206 }
207
208 pub(crate) fn action(&self, state: usize, terminal: SymbolId) -> ParserOp {
210 if let Some(v) = self.lookup(self.action_base, state, terminal.0) {
211 OpEntry(v).decode()
212 } else {
213 let rule = self.default_reduce[state];
214 if rule > 0 {
215 ParserOp::Reduce(rule as usize)
216 } else {
217 ParserOp::Error
218 }
219 }
220 }
221
222 pub(crate) fn goto(&self, state: usize, non_terminal: SymbolId) -> Option<usize> {
225 let nt_idx = (non_terminal.0 - self.num_terminals) as usize;
226 if let Some(v) = self.lookup(self.goto_base, nt_idx, state as u32) {
227 Some(v as usize)
228 } else {
229 let default = self.default_goto[nt_idx];
230 if default < u32::MAX {
231 Some(default as usize)
232 } else {
233 None
234 }
235 }
236 }
237
238 pub(crate) fn rule_info(&self, rule: usize) -> (SymbolId, usize) {
240 let (lhs, len) = self.rules[rule];
241 (SymbolId(lhs), len as usize)
242 }
243
244 pub(crate) fn rules(&self) -> &[(u32, u8)] {
246 self.rules
247 }
248}
249
250pub trait ErrorContext {
255 fn symbol_name(&self, id: SymbolId) -> &str;
257 fn state_symbol(&self, state: usize) -> SymbolId;
259 fn state_items(&self, state: usize) -> &[(u16, u8)];
262 fn rule_rhs(&self, rule: usize) -> &[u32];
264}
265
266#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
271pub enum Precedence {
272 Left(u8),
274 Right(u8),
276}
277
278impl Precedence {
279 pub fn level(&self) -> u8 {
281 match self {
282 Precedence::Left(l) | Precedence::Right(l) => *l,
283 }
284 }
285}
286
287#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
289pub enum Resolution {
290 Prec(Precedence),
292 Shift,
294 Reduce,
296}
297
298#[derive(Debug, Clone)]
307pub enum ParseError<E = core::convert::Infallible> {
308 Syntax { terminal: SymbolId },
310 Action(E),
312}
313
314impl ParseError<core::convert::Infallible> {
315 pub fn cast<F>(self) -> ParseError<F> {
317 match self {
318 ParseError::Syntax { terminal } => ParseError::Syntax { terminal },
319 ParseError::Action(e) => match e {},
320 }
321 }
322}
323
324#[derive(Debug, Clone, Copy)]
329pub struct Token {
330 pub terminal: SymbolId,
332 pub resolution: Option<Resolution>,
334}
335
336impl Token {
337 pub fn new(terminal: SymbolId) -> Self {
339 Self {
340 terminal,
341 resolution: None,
342 }
343 }
344
345 pub fn with_prec(terminal: SymbolId, prec: Precedence) -> Self {
347 Self {
348 terminal,
349 resolution: Some(Resolution::Prec(prec)),
350 }
351 }
352
353 pub fn with_resolution(terminal: SymbolId, resolution: Resolution) -> Self {
355 Self {
356 terminal,
357 resolution: Some(resolution),
358 }
359 }
360}
361
362use alloc::collections::BTreeSet;
367use alloc::collections::BinaryHeap;
368use alloc::rc::Rc;
369use alloc::string::{String, ToString};
370use alloc::{format, vec, vec::Vec};
371use core::cmp::Reverse;
372
373fn format_sym(s: &str) -> String {
376 if let Some(base) = s.strip_prefix("__").and_then(|s| s.strip_suffix("_star")) {
377 format!("{}*", base)
378 } else if let Some(base) = s.strip_prefix("__").and_then(|s| s.strip_suffix("_plus")) {
379 format!("{}+", base)
380 } else if let Some(base) = s.strip_prefix("__").and_then(|s| s.strip_suffix("_opt")) {
381 format!("{}?", base)
382 } else if let Some(rest) = s.strip_prefix("__") {
383 if let Some(idx) = rest.find("_sep_") {
384 let base = &rest[..idx];
385 let sep = &rest[idx + 5..];
386 return format!("{} % {}", base, sep);
387 }
388 s.to_string()
389 } else {
390 s.to_string()
391 }
392}
393
394type RecoveryState<'a> = (SimState<'a>, usize, Option<(usize, Repair)>);
395
396fn compute_nullable(table: &ParseTable, ctx: &impl ErrorContext) -> Vec<bool> {
398 let rules = table.rules();
399 let num_terminals = table.num_terminals as usize;
400
401 let mut max_sym = num_terminals;
403 for (rule_idx, &(lhs, _)) in rules.iter().enumerate() {
404 let lhs = lhs as usize;
405 if lhs >= max_sym {
406 max_sym = lhs + 1;
407 }
408 for &sym in ctx.rule_rhs(rule_idx) {
409 let id = sym as usize;
410 if id >= max_sym {
411 max_sym = id + 1;
412 }
413 }
414 }
415
416 let mut nullable: Vec<bool> = vec![false; max_sym];
417
418 let mut changed = true;
420 while changed {
421 changed = false;
422
423 for (rule_idx, &(lhs, _)) in rules.iter().enumerate() {
424 let lhs = lhs as usize;
425 let rhs = ctx.rule_rhs(rule_idx);
426
427 let all_nullable = rhs.iter().all(|&sym| nullable[sym as usize]);
429 if all_nullable && !nullable[lhs] {
430 nullable[lhs] = true;
431 changed = true;
432 }
433 }
434 }
435
436 nullable
437}
438
439fn expected_from_sequence(
442 sequence: &[u32],
443 table: &ParseTable,
444 ctx: &impl ErrorContext,
445 nullable: &[bool],
446 num_terminals: usize,
447) -> BTreeSet<usize> {
448 let mut result = BTreeSet::new();
449 for &sym in sequence {
450 let sym_id = sym as usize;
451 if sym_id < num_terminals || !nullable.get(sym_id).copied().unwrap_or(false) {
452 result.insert(sym_id);
454 break;
455 }
456 expand_nullable(
458 sym_id,
459 table,
460 ctx,
461 nullable,
462 num_terminals,
463 &mut result,
464 &mut BTreeSet::new(),
465 );
466 }
468 result
469}
470
471fn expand_nullable(
473 sym: usize,
474 table: &ParseTable,
475 ctx: &impl ErrorContext,
476 nullable: &[bool],
477 num_terminals: usize,
478 result: &mut BTreeSet<usize>,
479 visited: &mut BTreeSet<usize>,
480) {
481 if !visited.insert(sym) {
482 return;
483 }
484 for (rule_idx, &(lhs, _)) in table.rules().iter().enumerate() {
485 if lhs as usize != sym {
486 continue;
487 }
488 for &s in ctx.rule_rhs(rule_idx) {
489 let s_id = s as usize;
490 if s_id < num_terminals || !nullable.get(s_id).copied().unwrap_or(false) {
491 result.insert(s_id);
492 break;
493 }
494 expand_nullable(s_id, table, ctx, nullable, num_terminals, result, visited);
495 }
496 }
497}
498
499fn is_sequence_nullable(sequence: &[u32], nullable: &[bool]) -> bool {
501 sequence
502 .iter()
503 .all(|&sym| nullable.get(sym as usize).copied().unwrap_or(false))
504}
505
506#[derive(Debug, Clone, Copy)]
509struct StackEntry {
510 state: usize,
511 prec: Option<Precedence>,
512 token_idx: usize,
514}
515
516#[derive(Clone)]
521struct LrStack {
522 entries: Vec<StackEntry>,
523 len: usize,
524}
525
526impl LrStack {
527 fn new() -> Self {
528 Self {
529 entries: Vec::new(),
530 len: 0,
531 }
532 }
533
534 fn len(&self) -> usize {
535 self.len
536 }
537
538 fn push(&mut self, entry: StackEntry) {
539 if self.len < self.entries.len() {
540 self.entries[self.len] = entry;
541 } else {
542 self.entries.push(entry);
543 }
544 self.len += 1;
545 }
546
547 fn truncate(&mut self, new_len: usize) {
548 debug_assert!(new_len <= self.len);
549 self.len = new_len;
550 }
551
552 fn set_len(&mut self, new_len: usize) {
553 debug_assert!(new_len <= self.entries.len());
554 self.len = new_len;
555 }
556
557 fn last(&self) -> Option<&StackEntry> {
558 if self.len > 0 {
559 Some(&self.entries[self.len - 1])
560 } else {
561 None
562 }
563 }
564
565 fn to_vec(&self) -> Vec<StackEntry> {
566 self.entries[..self.len].to_vec()
567 }
568}
569
570impl core::ops::Index<usize> for LrStack {
571 type Output = StackEntry;
572 fn index(&self, idx: usize) -> &StackEntry {
573 debug_assert!(idx < self.len);
574 &self.entries[idx]
575 }
576}
577
578#[derive(Clone)]
582pub struct Parser<'a> {
583 table: ParseTable<'a>,
584 state: StackEntry,
586 stack: LrStack,
588 token_count: usize,
590 checkpoint_state: StackEntry,
593 checkpoint_len: usize,
594 overwrites: Vec<(usize, StackEntry)>,
595}
596
597impl<'a> Parser<'a> {
598 pub fn new(table: ParseTable<'a>) -> Self {
600 let initial = StackEntry {
601 state: 0,
602 prec: None,
603 token_idx: 0,
604 };
605 Self {
606 table,
607 state: initial,
608 stack: LrStack::new(),
609 token_count: 0,
610 checkpoint_state: initial,
611 checkpoint_len: 0,
612 overwrites: Vec::new(),
613 }
614 }
615
616 pub fn maybe_reduce(
623 &mut self,
624 lookahead: Option<Token>,
625 ) -> Result<Option<(usize, usize, usize)>, ParseError> {
626 let terminal = lookahead.map(|t| t.terminal).unwrap_or(SymbolId::EOF);
627 let resolution = lookahead.and_then(|t| t.resolution);
628
629 match self.table.action(self.state.state, terminal) {
630 ParserOp::Reduce(rule) => {
631 if rule == 0 {
632 Ok(Some((0, 0, 0))) } else {
634 let (len, start_idx) = self.do_reduce(rule);
635 Ok(Some((rule, len, start_idx)))
636 }
637 }
638 ParserOp::Shift(_) => Ok(None),
639 ParserOp::ShiftOrReduce { reduce_rule, .. } => {
640 let should_reduce = match (self.state.prec, resolution) {
641 (_, Some(Resolution::Reduce)) => true,
642 (_, Some(Resolution::Shift)) => false,
643 (Some(sp), Some(Resolution::Prec(tp))) => {
644 if tp.level() > sp.level() {
645 false
646 } else if tp.level() < sp.level() {
647 true
648 } else {
649 matches!(sp, Precedence::Left(_))
650 }
651 }
652 _ => false,
653 };
654
655 if should_reduce {
656 let (len, start_idx) = self.do_reduce(reduce_rule);
657 Ok(Some((reduce_rule, len, start_idx)))
658 } else {
659 Ok(None)
660 }
661 }
662 ParserOp::Error => Err(ParseError::Syntax { terminal }),
663 }
664 }
665
666 pub fn shift(&mut self, token: Token) {
671 let next_state = match self.table.action(self.state.state, token.terminal) {
672 ParserOp::Shift(s) => s,
673 ParserOp::ShiftOrReduce { shift_state, .. } => shift_state,
674 _ => panic!("shift called when action is not shift"),
675 };
676
677 let token_prec = match token.resolution {
678 Some(Resolution::Prec(p)) => Some(p),
679 _ => None,
680 };
681 let prec = token_prec.or(self.state.prec);
682 self.stack.push(self.state);
683 self.state = StackEntry {
684 state: next_state,
685 prec,
686 token_idx: self.token_count,
687 };
688 self.token_count += 1;
689 self.save_checkpoint();
690 }
691
692 fn do_reduce(&mut self, rule: usize) -> (usize, usize) {
693 let (lhs, len) = self.table.rule_info(rule);
694
695 let start_idx = match len {
697 0 => self.token_count, 1 => self.state.token_idx, _ => self.stack[self.stack.len() - len + 1].token_idx, };
701
702 if len == 0 {
703 if let Some(next_state) = self.table.goto(self.state.state, lhs) {
705 if self.stack.len() < self.checkpoint_len {
707 self.overwrites
708 .push((self.stack.len(), self.stack.entries[self.stack.len()]));
709 }
710 self.stack.push(self.state);
711 self.state = StackEntry {
712 state: next_state,
713 prec: None,
714 token_idx: start_idx,
715 };
716 }
717 } else {
718 self.stack.truncate(self.stack.len() - (len - 1));
720 let anchor = *self.stack.last().unwrap();
721 let captured_prec = if len == 1 {
725 self.state.prec
726 } else {
727 anchor.prec
728 };
729 if let Some(next_state) = self.table.goto(anchor.state, lhs) {
730 self.state = StackEntry {
731 state: next_state,
732 prec: captured_prec,
733 token_idx: start_idx,
734 };
735 }
736 }
737
738 (len, start_idx)
739 }
740
741 fn save_checkpoint(&mut self) {
742 self.checkpoint_state = self.state;
743 self.checkpoint_len = self.stack.len();
744 self.overwrites.clear();
745 }
746
747 pub fn restore_checkpoint(&mut self) {
753 for &(idx, entry) in self.overwrites.iter().rev() {
754 self.stack.entries[idx] = entry;
755 }
756 self.stack.set_len(self.checkpoint_len);
757 self.state = self.checkpoint_state;
758 self.overwrites.clear();
759 }
760
761 pub fn state(&self) -> usize {
763 self.state.state
764 }
765
766 pub fn token_count(&self) -> usize {
768 self.token_count
769 }
770
771 pub fn state_at(&self, depth: usize) -> usize {
773 let idx = depth + 1;
774 if idx < self.stack.len() {
775 self.stack[idx].state
776 } else {
777 self.state.state
778 }
779 }
780
781 pub fn format_error(
807 &self,
808 terminal: SymbolId,
809 ctx: &impl ErrorContext,
810 display_names: Option<&[(&str, &str)]>,
811 tokens: Option<&[&str]>,
812 ) -> String {
813 let display_names = display_names.unwrap_or(&[]);
814 let tokens = tokens.unwrap_or(&[]);
815 let mut full_stack: Vec<StackEntry> = self.stack.to_vec();
817 full_stack.push(self.state);
818 let error_token_idx = self.token_count;
819
820 let display = |id: SymbolId| -> &str {
821 let name = ctx.symbol_name(id);
822 display_names
823 .iter()
824 .find(|(k, _)| *k == name)
825 .map(|(_, v)| *v)
826 .unwrap_or(name)
827 };
828
829 let stack_spans = || -> Vec<(usize, usize, usize)> {
831 let mut spans = Vec::with_capacity(full_stack.len());
832 for i in 0..full_stack.len() {
833 let start = full_stack[i].token_idx;
834 let end = if i + 1 < full_stack.len() {
835 full_stack[i + 1].token_idx
836 } else {
837 error_token_idx
838 };
839 spans.push((start, end, full_stack[i].state));
840 }
841 spans
842 };
843
844 let nullable = compute_nullable(&self.table, ctx);
845 let num_terminals = self.table.num_terminals as usize;
846
847 let mut relevant_items = Vec::new();
849 self.collect_relevant_items(
850 ctx,
851 self.state.state,
852 self.stack.len() + 1,
853 &mut relevant_items,
854 );
855 let expected_syms = self.compute_expected(ctx, &relevant_items, &nullable, num_terminals);
856
857 let mut expected: Vec<_> = expected_syms
859 .iter()
860 .map(|&sym| format_sym(display(SymbolId(sym as u32))))
861 .collect();
862 expected.sort();
863
864 let found_name = tokens
866 .get(error_token_idx)
867 .copied()
868 .unwrap_or_else(|| display(terminal));
869
870 let mut msg = format!("unexpected '{}'", found_name);
871 if !expected.is_empty() {
872 msg.push_str(&format!(", expected: {}", expected.join(", ")));
873 }
874
875 if !tokens.is_empty() && error_token_idx <= tokens.len() {
877 let spans = stack_spans();
878 let relevant: Vec<_> = spans
880 .into_iter()
881 .skip(1) .filter(|(start, end, _)| end > start) .collect();
884
885 if !relevant.is_empty() {
886 let mut token_line = String::new();
888 let mut label_line = String::new();
889
890 for (start, end, state) in relevant.iter().rev().take(4).rev() {
891 let sym = ctx.state_symbol(*state);
892 let name = format_sym(display(sym));
893
894 let span_text = if end - start == 1 {
896 tokens[*start].to_string()
897 } else if end - start <= 3 {
898 tokens[*start..*end].join(" ")
899 } else {
900 format!("{} ... {}", tokens[*start], tokens[end - 1])
901 };
902
903 let width = span_text.chars().count().max(name.len());
904
905 if !token_line.is_empty() {
906 token_line.push_str(" ");
907 label_line.push_str(" ");
908 }
909 token_line.push_str(&format!("{:^width$}", span_text, width = width));
910 label_line.push_str(&format!("{:^width$}", name, width = width));
911 }
912
913 msg.push_str(&format!("\n {}\n {}", token_line, label_line));
914 }
915 } else if full_stack.len() > 1 {
916 let path: Vec<_> = full_stack[1..]
918 .iter()
919 .map(|e| display(ctx.state_symbol(e.state)))
920 .collect();
921 msg.push_str(&format!("\n after: {}", path.join(" ")));
922 }
923
924 let display_items = &relevant_items;
926 let mut seen = BTreeSet::new();
927
928 for &(rule, dot) in display_items {
929 let rhs = ctx.rule_rhs(rule);
930 let lhs = self.table.rule_info(rule).0;
931 if ctx.symbol_name(lhs) == "__start" {
932 continue;
933 }
934 let lhs_name = format_sym(display(lhs));
935
936 let before: Vec<_> = rhs[..dot]
937 .iter()
938 .map(|&id| format_sym(display(SymbolId(id))))
939 .collect();
940 let after: Vec<_> = rhs[dot..]
941 .iter()
942 .map(|&id| format_sym(display(SymbolId(id))))
943 .collect();
944 let line = format!(
945 "\n in {}: {} \u{2022} {}",
946 lhs_name,
947 before.join(" "),
948 after.join(" ")
949 );
950 if seen.insert(line.clone()) {
951 msg.push_str(&line);
952 }
953 }
954 msg
955 }
956
957 fn collect_relevant_items(
962 &self,
963 ctx: &impl ErrorContext,
964 state: usize,
965 stack_len: usize,
966 result: &mut Vec<(usize, usize)>,
967 ) {
968 let mut visited = Vec::new();
969 self.collect_relevant_items_inner(ctx, state, stack_len, result, &mut visited);
970 }
971
972 fn collect_relevant_items_inner(
973 &self,
974 ctx: &impl ErrorContext,
975 state: usize,
976 stack_len: usize,
977 result: &mut Vec<(usize, usize)>,
978 visited: &mut Vec<(usize, usize)>,
979 ) {
980 if visited.contains(&(state, stack_len)) {
981 return;
982 }
983 visited.push((state, stack_len));
984
985 for &(rule, dot) in ctx.state_items(state) {
986 let rule = rule as usize;
987 let dot = dot as usize;
988 let rhs = ctx.rule_rhs(rule);
989 let lhs = self.table.rule_info(rule).0;
990
991 if ctx.symbol_name(lhs) == "__start" {
992 result.push((rule, dot));
993 continue;
994 }
995
996 if dot == 0 {
997 continue;
998 }
999
1000 if dot < rhs.len() {
1001 result.push((rule, dot));
1002 } else {
1003 let consumed = rhs.len();
1005 if stack_len > consumed {
1006 let caller_state = self.state_at_idx(stack_len - consumed - 1);
1007 if let Some(goto_state) = self.table.goto(caller_state, lhs) {
1008 self.collect_relevant_items_inner(
1009 ctx,
1010 goto_state,
1011 stack_len - consumed + 1,
1012 result,
1013 visited,
1014 );
1015 }
1016 }
1017 }
1018 }
1019 }
1020
1021 fn compute_expected(
1023 &self,
1024 ctx: &impl ErrorContext,
1025 items: &[(usize, usize)],
1026 nullable: &[bool],
1027 num_terminals: usize,
1028 ) -> BTreeSet<usize> {
1029 let mut expected = BTreeSet::new();
1030 let stack_len = self.stack.len() + 1;
1031
1032 for &(rule, dot) in items {
1033 let rhs = ctx.rule_rhs(rule);
1034 let lhs = self.table.rule_info(rule).0;
1035 let suffix = &rhs[dot..];
1036
1037 expected.extend(expected_from_sequence(
1038 suffix,
1039 &self.table,
1040 ctx,
1041 nullable,
1042 num_terminals,
1043 ));
1044
1045 if is_sequence_nullable(suffix, nullable) && stack_len > dot {
1046 expected.extend(self.compute_follow_from_context(
1047 ctx,
1048 lhs,
1049 stack_len - dot,
1050 nullable,
1051 num_terminals,
1052 &mut BTreeSet::new(),
1053 ));
1054 }
1055 }
1056
1057 expected
1058 }
1059
1060 fn state_at_idx(&self, idx: usize) -> usize {
1062 if idx < self.stack.len() {
1063 self.stack[idx].state
1064 } else {
1065 self.state.state
1066 }
1067 }
1068
1069 fn compute_follow_from_context(
1071 &self,
1072 ctx: &impl ErrorContext,
1073 nonterminal: SymbolId,
1074 caller_idx: usize,
1075 nullable: &[bool],
1076 num_terminals: usize,
1077 visited: &mut BTreeSet<(usize, u32)>,
1078 ) -> BTreeSet<usize> {
1079 if nonterminal == self.table.rule_info(0).0 {
1081 let mut result = BTreeSet::new();
1082 result.insert(0); return result;
1084 }
1085
1086 if caller_idx == 0 {
1087 let mut result = BTreeSet::new();
1088 result.insert(0); return result;
1090 }
1091
1092 let caller_state = self.state_at_idx(caller_idx - 1);
1093
1094 if !visited.insert((caller_idx, nonterminal.0)) {
1096 return BTreeSet::new();
1097 }
1098
1099 let mut expected = BTreeSet::new();
1100
1101 for &(rule, dot) in ctx.state_items(caller_state) {
1103 let rule = rule as usize;
1104 let dot = dot as usize;
1105 let rhs = ctx.rule_rhs(rule);
1106 if dot < rhs.len() && rhs[dot] == nonterminal.0 {
1107 let suffix = &rhs[dot + 1..];
1108 let lhs = self.table.rule_info(rule).0;
1109 let consumed = dot;
1110
1111 if suffix.is_empty() {
1112 if caller_idx > consumed {
1114 expected.extend(self.compute_follow_from_context(
1115 ctx,
1116 lhs,
1117 caller_idx - consumed,
1118 nullable,
1119 num_terminals,
1120 visited,
1121 ));
1122 } else {
1123 expected.insert(0);
1124 }
1125 } else {
1126 expected.extend(expected_from_sequence(
1127 suffix,
1128 &self.table,
1129 ctx,
1130 nullable,
1131 num_terminals,
1132 ));
1133
1134 if is_sequence_nullable(suffix, nullable) {
1135 if caller_idx > consumed {
1136 expected.extend(self.compute_follow_from_context(
1137 ctx,
1138 lhs,
1139 caller_idx - consumed,
1140 nullable,
1141 num_terminals,
1142 visited,
1143 ));
1144 } else {
1145 expected.insert(0);
1146 }
1147 }
1148 }
1149 }
1150 }
1151
1152 expected
1153 }
1154
1155 pub(crate) fn try_shift(&self, token: Token) -> Option<Parser<'a>> {
1158 let mut sim = self.clone();
1159 let mut iters = 0;
1160 loop {
1161 iters += 1;
1162 if iters > 1000 {
1163 return None;
1164 }
1165 match sim.maybe_reduce(Some(token)) {
1166 Ok(None) => {
1167 sim.shift(token);
1168 return Some(sim);
1169 }
1170 Ok(Some((0, _, _))) => return Some(sim), Ok(Some(_)) => continue, Err(_) => return None,
1173 }
1174 }
1175 }
1176
1177 pub fn recover(&mut self, buffer: &[Token]) -> Vec<RecoveryInfo> {
1185 let mut start = 0;
1187 while start < buffer.len() {
1188 if let Some(advanced) = self.try_shift(buffer[start]) {
1189 *self = advanced;
1190 start += 1;
1191 } else {
1192 break;
1193 }
1194 }
1195
1196 let buf_len = buffer.len();
1200 let mut pq: BinaryHeap<Reverse<(usize, usize, usize)>> = BinaryHeap::new();
1201 let mut states: Vec<RecoveryState<'a>> = Vec::new();
1203 let mut visited: BTreeSet<(usize, usize, usize)> = BTreeSet::new();
1204
1205 states.push((SimState::from_parser(self), start, None));
1206 pq.push(Reverse((0, buf_len - start, 0)));
1207
1208 while let Some(Reverse((cost, _, state_idx))) = pq.pop() {
1209 if states.len() > 5000 {
1210 break;
1211 }
1212
1213 let sim = states[state_idx].0.clone();
1214 let pos = states[state_idx].1;
1215 let remaining = buf_len - pos;
1216
1217 let key = (sim.state, sim.depth, pos);
1218 if !visited.insert(key) {
1219 continue;
1220 }
1221
1222 if pos >= buf_len {
1224 let mut candidate = sim.clone();
1225 if candidate.try_accept() {
1226 let edits = Self::reconstruct_edits(&states, state_idx);
1227 return Self::edits_to_errors(&edits, start);
1228 }
1229 }
1230
1231 if pos < buf_len
1233 && let Some(sim2) = sim.try_shift(buffer[pos])
1234 {
1235 let idx = states.len();
1236 states.push((sim2, pos + 1, Some((state_idx, Repair::Shift))));
1237 pq.push(Reverse((cost, remaining - 1, idx)));
1238 }
1239
1240 let num_terms = self.table.num_terminals;
1242 for t in 1..num_terms {
1243 let token = Token::new(SymbolId(t));
1244 if let Some(sim2) = sim.try_shift(token) {
1245 let idx = states.len();
1246 states.push((sim2, pos, Some((state_idx, Repair::Insert(SymbolId(t))))));
1247 pq.push(Reverse((cost + 1, remaining, idx)));
1248 }
1249 }
1250
1251 if pos < buf_len {
1253 let idx = states.len();
1254 states.push((
1255 sim,
1256 pos + 1,
1257 Some((state_idx, Repair::Delete(buffer[pos].terminal))),
1258 ));
1259 pq.push(Reverse((cost + 1, remaining - 1, idx)));
1260 }
1261 }
1262
1263 vec![]
1265 }
1266
1267 fn reconstruct_edits(states: &[RecoveryState<'a>], mut idx: usize) -> Vec<Repair> {
1269 let mut edits = Vec::new();
1270 while let Some((parent, ref edit)) = states[idx].2 {
1271 edits.push(edit.clone());
1272 idx = parent;
1273 }
1274 edits.reverse();
1275 edits
1276 }
1277
1278 fn edits_to_errors(edits: &[Repair], start: usize) -> Vec<RecoveryInfo> {
1280 let mut errors = Vec::new();
1281 let mut pos = start;
1282 let mut current_repairs: Vec<Repair> = Vec::new();
1283 let mut error_pos = pos;
1284
1285 for edit in edits {
1286 match edit {
1287 Repair::Shift => {
1288 if !current_repairs.is_empty() {
1289 errors.push(RecoveryInfo {
1290 position: error_pos,
1291 repairs: core::mem::take(&mut current_repairs),
1292 });
1293 }
1294 pos += 1;
1295 error_pos = pos;
1296 }
1297 Repair::Insert(t) => {
1298 if current_repairs.is_empty() {
1299 error_pos = pos;
1300 }
1301 current_repairs.push(Repair::Insert(*t));
1302 }
1303 Repair::Delete(t) => {
1304 if current_repairs.is_empty() {
1305 error_pos = pos;
1306 }
1307 current_repairs.push(Repair::Delete(*t));
1308 pos += 1;
1309 }
1310 }
1311 }
1312 if !current_repairs.is_empty() {
1313 errors.push(RecoveryInfo {
1314 position: error_pos,
1315 repairs: current_repairs,
1316 });
1317 }
1318 errors
1319 }
1320}
1321
1322#[derive(Clone)]
1326struct SimState<'a> {
1327 table: ParseTable<'a>,
1328 state: usize,
1329 prec: Option<Precedence>,
1330 token_idx: usize,
1331 stack: Option<Rc<SimStackNode>>,
1332 depth: usize,
1333}
1334
1335struct SimStackNode {
1336 state: usize,
1337 prec: Option<Precedence>,
1338 token_idx: usize,
1339 parent: Option<Rc<SimStackNode>>,
1340}
1341
1342impl<'a> SimState<'a> {
1343 fn from_parser(parser: &Parser<'a>) -> Self {
1344 let mut node: Option<Rc<SimStackNode>> = None;
1345 for i in 0..parser.stack.len() {
1346 node = Some(Rc::new(SimStackNode {
1347 state: parser.stack[i].state,
1348 prec: parser.stack[i].prec,
1349 token_idx: parser.stack[i].token_idx,
1350 parent: node,
1351 }));
1352 }
1353 SimState {
1354 table: parser.table,
1355 state: parser.state.state,
1356 prec: parser.state.prec,
1357 token_idx: parser.state.token_idx,
1358 stack: node,
1359 depth: parser.stack.len(),
1360 }
1361 }
1362
1363 fn try_shift(&self, token: Token) -> Option<SimState<'a>> {
1364 let mut sim = self.clone();
1365 let mut iters = 0;
1366 loop {
1367 iters += 1;
1368 if iters > 1000 {
1369 return None;
1370 }
1371 match sim.maybe_reduce(Some(token)) {
1372 Ok(true) => return Some(sim), Ok(false) => {
1374 sim.shift(token);
1375 return Some(sim);
1376 }
1377 Err(true) => continue, Err(false) => return None, }
1380 }
1381 }
1382
1383 fn try_accept(&mut self) -> bool {
1385 let mut iters = 0;
1386 loop {
1387 iters += 1;
1388 if iters > 1000 {
1389 return false;
1390 }
1391 match self.maybe_reduce(None) {
1392 Ok(true) => return true, Ok(false) => return false, Err(true) => continue, Err(false) => return false, }
1397 }
1398 }
1399
1400 fn maybe_reduce(&mut self, lookahead: Option<Token>) -> Result<bool, bool> {
1406 let terminal = lookahead.map(|t| t.terminal).unwrap_or(SymbolId::EOF);
1407 let resolution = lookahead.and_then(|t| t.resolution);
1408
1409 match self.table.action(self.state, terminal) {
1410 ParserOp::Reduce(rule) => {
1411 if rule == 0 {
1412 return Ok(true);
1413 }
1414 self.do_reduce(rule);
1415 Err(true)
1416 }
1417 ParserOp::Shift(_) => Ok(false),
1418 ParserOp::ShiftOrReduce { reduce_rule, .. } => {
1419 let should_reduce = match (self.prec, resolution) {
1420 (_, Some(Resolution::Reduce)) => true,
1421 (_, Some(Resolution::Shift)) => false,
1422 (Some(sp), Some(Resolution::Prec(tp))) => {
1423 if tp.level() > sp.level() {
1424 false
1425 } else if tp.level() < sp.level() {
1426 true
1427 } else {
1428 matches!(sp, Precedence::Left(_))
1429 }
1430 }
1431 _ => false,
1432 };
1433 if should_reduce {
1434 self.do_reduce(reduce_rule);
1435 Err(true)
1436 } else {
1437 Ok(false)
1438 }
1439 }
1440 ParserOp::Error => Err(false),
1441 }
1442 }
1443
1444 fn shift(&mut self, token: Token) {
1445 let next_state = match self.table.action(self.state, token.terminal) {
1446 ParserOp::Shift(s) => s,
1447 ParserOp::ShiftOrReduce { shift_state, .. } => shift_state,
1448 _ => panic!("shift called when action is not shift"),
1449 };
1450 let token_prec = match token.resolution {
1451 Some(Resolution::Prec(p)) => Some(p),
1452 _ => None,
1453 };
1454 let prec = token_prec.or(self.prec);
1455 self.stack = Some(Rc::new(SimStackNode {
1456 state: self.state,
1457 prec: self.prec,
1458 token_idx: self.token_idx,
1459 parent: self.stack.take(),
1460 }));
1461 self.depth += 1;
1462 self.state = next_state;
1463 self.prec = prec;
1464 }
1465
1466 fn do_reduce(&mut self, rule: usize) {
1467 let (lhs, len) = self.table.rule_info(rule);
1468 if len == 0 {
1469 let goto_state = match self.table.goto(self.state, lhs) {
1470 Some(s) => s,
1471 None => return,
1472 };
1473 self.stack = Some(Rc::new(SimStackNode {
1474 state: self.state,
1475 prec: self.prec,
1476 token_idx: self.token_idx,
1477 parent: self.stack.take(),
1478 }));
1479 self.depth += 1;
1480 self.state = goto_state;
1481 self.prec = None;
1482 } else {
1483 let mut anchor = self.stack.as_ref().unwrap().clone();
1485 for _ in 0..len - 1 {
1486 let parent = anchor.parent.as_ref().unwrap().clone();
1487 anchor = parent;
1488 }
1489 let captured_prec = if len == 1 { self.prec } else { anchor.prec };
1490 let start_token = if len == 1 {
1492 self.token_idx
1493 } else {
1494 let mut node = self.stack.as_ref().unwrap().clone();
1496 for _ in 0..len - 2 {
1497 node = node.parent.as_ref().unwrap().clone();
1498 }
1499 node.token_idx
1500 };
1501 let goto_state = match self.table.goto(anchor.state, lhs) {
1502 Some(s) => s,
1503 None => return,
1504 };
1505 self.stack = Some(anchor);
1507 self.depth -= len - 1;
1508 self.state = goto_state;
1509 self.prec = captured_prec;
1510 self.token_idx = start_token;
1511 }
1512 }
1513}
1514
1515#[derive(Debug, Clone)]
1518pub struct RecoveryInfo {
1519 pub position: usize,
1521 pub repairs: Vec<Repair>,
1523}
1524
1525#[derive(Debug, Clone, PartialEq, Eq)]
1528pub enum Repair {
1529 Insert(SymbolId),
1531 Delete(SymbolId),
1533 Shift,
1535}
1536
1537#[derive(Debug, Clone, PartialEq, Eq)]
1543pub enum Cst {
1544 Leaf {
1546 symbol: SymbolId,
1548 token_index: usize,
1550 },
1551 Node {
1553 rule: usize,
1555 children: Vec<Cst>,
1557 },
1558}
1559
1560pub struct CstParser<'a> {
1564 parser: Parser<'a>,
1565 stack: Vec<Cst>,
1566}
1567
1568impl<'a> CstParser<'a> {
1569 pub fn new(table: ParseTable<'a>) -> Self {
1571 CstParser {
1572 parser: Parser::new(table),
1573 stack: Vec::new(),
1574 }
1575 }
1576
1577 pub fn push(&mut self, token: Token) -> Result<(), ParseError> {
1579 loop {
1580 match self.parser.maybe_reduce(Some(token)) {
1581 Ok(Some((rule, len, _))) if rule > 0 => {
1582 let children = self.stack.drain(self.stack.len() - len..).collect();
1583 self.stack.push(Cst::Node { rule, children });
1584 }
1585 Ok(_) => break,
1586 Err(e) => {
1587 self.stack.clear();
1588 self.parser.restore_checkpoint();
1589 return Err(e);
1590 }
1591 }
1592 }
1593 let token_idx = self.parser.token_count();
1594 self.stack.push(Cst::Leaf {
1595 symbol: token.terminal,
1596 token_index: token_idx,
1597 });
1598 self.parser.shift(token);
1599 Ok(())
1600 }
1601
1602 #[allow(clippy::result_large_err)]
1604 pub fn finish(mut self) -> Result<Cst, (Self, ParseError)> {
1605 loop {
1606 match self.parser.maybe_reduce(None) {
1607 Ok(Some((0, _, _))) => {
1608 return Ok(self.stack.pop().expect("empty stack after accept"));
1609 }
1610 Ok(Some((rule, len, _))) => {
1611 let children = self.stack.drain(self.stack.len() - len..).collect();
1612 self.stack.push(Cst::Node { rule, children });
1613 }
1614 Ok(None) => unreachable!(),
1615 Err(e) => {
1616 self.stack.clear();
1617 self.parser.restore_checkpoint();
1618 return Err((self, e));
1619 }
1620 }
1621 }
1622 }
1623
1624 pub fn format_error(
1626 &self,
1627 terminal: SymbolId,
1628 ctx: &impl ErrorContext,
1629 display_names: Option<&[(&str, &str)]>,
1630 tokens: Option<&[&str]>,
1631 ) -> String {
1632 self.parser
1633 .format_error(terminal, ctx, display_names, tokens)
1634 }
1635}
1636
1637#[cfg(test)]
1638mod tests {
1639 use super::*;
1640 use crate::grammar::SymbolId;
1641 use crate::table::CompiledTable;
1642
1643 #[test]
1644 fn test_action_entry_encoding() {
1645 let shift = OpEntry::shift(42);
1646 assert_eq!(shift.decode(), ParserOp::Shift(42));
1647
1648 let reduce = OpEntry::reduce(7);
1649 assert_eq!(reduce.decode(), ParserOp::Reduce(7));
1650
1651 let accept = OpEntry::reduce(0);
1653 assert_eq!(accept.decode(), ParserOp::Reduce(0));
1654
1655 let error = OpEntry(0);
1656 assert_eq!(error.decode(), ParserOp::Error);
1657
1658 let sor = OpEntry::shift_or_reduce(10, 5);
1659 match sor.decode() {
1660 ParserOp::ShiftOrReduce {
1661 shift_state,
1662 reduce_rule,
1663 } => {
1664 assert_eq!(shift_state, 10);
1665 assert_eq!(reduce_rule, 5);
1666 }
1667 other => panic!("Expected ShiftOrReduce, got {:?}", other),
1668 }
1669 }
1670 use crate::lr::to_grammar_internal;
1671 use crate::meta::parse_grammar;
1672
1673 #[test]
1674 fn test_parse_single_token() {
1675 let grammar = to_grammar_internal(
1676 &parse_grammar(
1677 r#"
1678 start s; terminals { a } s = a => a;
1679 "#,
1680 )
1681 .unwrap(),
1682 )
1683 .unwrap();
1684
1685 let compiled = CompiledTable::build_from_internal(&grammar);
1686 let mut parser = Parser::new(compiled.table());
1687
1688 let a_id = compiled.symbol_id("a").unwrap();
1689 let token = Token::new(a_id);
1690
1691 assert!(matches!(parser.maybe_reduce(Some(token)), Ok(None)));
1693
1694 parser.shift(token);
1696
1697 let result = parser.maybe_reduce(None);
1699 assert!(matches!(result, Ok(Some((1, 1, 0))))); let result = parser.maybe_reduce(None);
1703 assert!(matches!(result, Ok(Some((0, 0, 0)))));
1704 }
1705
1706 #[test]
1707 fn test_parse_error() {
1708 let grammar = to_grammar_internal(
1709 &parse_grammar(
1710 r#"
1711 start s; terminals { a } s = a => a;
1712 "#,
1713 )
1714 .unwrap(),
1715 )
1716 .unwrap();
1717
1718 let compiled = CompiledTable::build_from_internal(&grammar);
1719 let mut parser = Parser::new(compiled.table());
1720
1721 let wrong_id = SymbolId(99);
1722 let token = Token::new(wrong_id);
1723
1724 let result = parser.maybe_reduce(Some(token));
1725 assert!(result.is_err());
1726 }
1727
1728 #[test]
1729 fn test_format_error() {
1730 let grammar = to_grammar_internal(
1731 &parse_grammar(
1732 r#"
1733 start s; terminals { a, b } s = a => a;
1734 "#,
1735 )
1736 .unwrap(),
1737 )
1738 .unwrap();
1739
1740 let compiled = CompiledTable::build_from_internal(&grammar);
1741 let mut parser = Parser::new(compiled.table());
1742
1743 let b_id = compiled.symbol_id("b").unwrap();
1745 let token = Token::new(b_id);
1746
1747 let ParseError::Syntax { terminal } = parser.maybe_reduce(Some(token)).unwrap_err();
1748 let msg = parser.format_error(terminal, &compiled, None, None);
1749
1750 assert!(msg.contains("unexpected"), "msg: {}", msg);
1751 assert!(msg.contains("'b'"), "msg: {}", msg);
1752 assert!(msg.contains("s"), "msg: {}", msg);
1753 }
1754}