Skip to main content

tree_sitter_generate/build_tables/
build_parse_table.rs

1use std::{
2    cmp::Ordering,
3    collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
4    hash::BuildHasherDefault,
5};
6
7use indexmap::{map::Entry, IndexMap};
8use log::warn;
9use rustc_hash::FxHasher;
10use serde::Serialize;
11use thiserror::Error;
12
13use super::{
14    item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry},
15    item_set_builder::ParseItemSetBuilder,
16};
17use crate::{
18    grammars::{LexicalGrammar, PrecedenceEntry, ReservedWordSetId, SyntaxGrammar, VariableType},
19    node_types::VariableInfo,
20    rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
21    tables::{
22        FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable,
23        ParseTableEntry, ProductionInfo, ProductionInfoId,
24    },
25};
26
27// For conflict reporting, each parse state is associated with an example
28// sequence of symbols that could lead to that parse state.
29type SymbolSequence = Vec<Symbol>;
30
31type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
32pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
33
34#[derive(Clone, PartialEq)]
35struct AuxiliarySymbolInfo {
36    auxiliary_symbol: Symbol,
37    parent_symbols: Vec<Symbol>,
38}
39
40#[derive(Debug, Default)]
41struct ReductionInfo {
42    precedence: Precedence,
43    symbols: Vec<Symbol>,
44    has_left_assoc: bool,
45    has_right_assoc: bool,
46    has_non_assoc: bool,
47}
48
49struct ParseStateQueueEntry {
50    state_id: ParseStateId,
51    preceding_auxiliary_symbols: AuxiliarySymbolSequence,
52}
53
54struct ParseTableBuilder<'a> {
55    item_set_builder: ParseItemSetBuilder<'a>,
56    syntax_grammar: &'a SyntaxGrammar,
57    lexical_grammar: &'a LexicalGrammar,
58    variable_info: &'a [VariableInfo],
59    core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
60    state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
61    parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
62    parse_state_queue: VecDeque<ParseStateQueueEntry>,
63    non_terminal_extra_states: Vec<(Symbol, usize)>,
64    actual_conflicts: HashSet<Vec<Symbol>>,
65    parse_table: ParseTable,
66}
67
68pub type BuildTableResult<T> = Result<T, ParseTableBuilderError>;
69
70#[derive(Debug, Error, Serialize)]
71pub enum ParseTableBuilderError {
72    #[error("Unresolved conflict for symbol sequence:\n\n{0}")]
73    Conflict(#[from] ConflictError),
74    #[error("Extra rules must have unambiguous endings. Conflicting rules: {0}")]
75    AmbiguousExtra(#[from] AmbiguousExtraError),
76    #[error(
77        "The non-terminal rule `{0}` is used in a non-terminal `extra` rule, which is not allowed."
78    )]
79    ImproperNonTerminalExtra(String),
80    #[error("State count `{0}` exceeds the max value {max}.", max=u16::MAX)]
81    StateCount(usize),
82}
83
84#[derive(Default, Debug, Serialize, Error)]
85pub struct ConflictError {
86    pub symbol_sequence: Vec<String>,
87    pub conflicting_lookahead: String,
88    pub possible_interpretations: Vec<Interpretation>,
89    pub possible_resolutions: Vec<Resolution>,
90}
91
92#[derive(Default, Debug, Serialize, Error)]
93pub struct Interpretation {
94    pub preceding_symbols: Vec<String>,
95    pub variable_name: String,
96    pub production_step_symbols: Vec<String>,
97    pub step_index: u32,
98    pub done: bool,
99    pub conflicting_lookahead: String,
100    pub precedence: Option<String>,
101    pub associativity: Option<String>,
102}
103
104#[derive(Debug, Serialize)]
105pub enum Resolution {
106    Precedence { symbols: Vec<String> },
107    Associativity { symbols: Vec<String> },
108    AddConflict { symbols: Vec<String> },
109}
110
111#[derive(Debug, Serialize, Error)]
112pub struct AmbiguousExtraError {
113    pub parent_symbols: Vec<String>,
114}
115
116impl std::fmt::Display for ConflictError {
117    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
118        for symbol in &self.symbol_sequence {
119            write!(f, "  {symbol}")?;
120        }
121        writeln!(f, "  •  {}  …\n", self.conflicting_lookahead)?;
122
123        writeln!(f, "Possible interpretations:\n")?;
124        let mut interpretations = self
125            .possible_interpretations
126            .iter()
127            .map(|i| {
128                let line = i.to_string();
129                let prec_line = if let (Some(precedence), Some(associativity)) =
130                    (&i.precedence, &i.associativity)
131                {
132                    Some(format!(
133                        "(precedence: {precedence}, associativity: {associativity})",
134                    ))
135                } else {
136                    i.precedence
137                        .as_ref()
138                        .map(|precedence| format!("(precedence: {precedence})"))
139                };
140
141                (line, prec_line)
142            })
143            .collect::<Vec<_>>();
144        let max_interpretation_length = interpretations
145            .iter()
146            .map(|i| i.0.chars().count())
147            .max()
148            .unwrap();
149        interpretations.sort_unstable();
150        for (i, (line, prec_suffix)) in interpretations.into_iter().enumerate() {
151            write!(f, "  {}:", i + 1).unwrap();
152            write!(f, "{line}")?;
153            if let Some(prec_suffix) = prec_suffix {
154                write!(
155                    f,
156                    "{:1$}",
157                    "",
158                    max_interpretation_length.saturating_sub(line.chars().count()) + 2
159                )?;
160                write!(f, "{prec_suffix}")?;
161            }
162            writeln!(f)?;
163        }
164
165        writeln!(f, "\nPossible resolutions:\n")?;
166        for (i, resolution) in self.possible_resolutions.iter().enumerate() {
167            writeln!(f, "  {}:  {resolution}", i + 1)?;
168        }
169        Ok(())
170    }
171}
172
173impl std::fmt::Display for Interpretation {
174    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
175        for symbol in &self.preceding_symbols {
176            write!(f, "  {symbol}")?;
177        }
178        write!(f, "  ({}", self.variable_name)?;
179        for (i, symbol) in self.production_step_symbols.iter().enumerate() {
180            if i == self.step_index as usize {
181                write!(f, "  •")?;
182            }
183            write!(f, "  {symbol}")?;
184        }
185        write!(f, ")")?;
186        if self.done {
187            write!(f, "  •  {}  …", self.conflicting_lookahead)?;
188        }
189        Ok(())
190    }
191}
192
193impl std::fmt::Display for Resolution {
194    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
195        match self {
196            Self::Precedence { symbols } => {
197                write!(f, "Specify a higher precedence in ")?;
198                for (i, symbol) in symbols.iter().enumerate() {
199                    if i > 0 {
200                        write!(f, " and ")?;
201                    }
202                    write!(f, "`{symbol}`")?;
203                }
204                write!(f, " than in the other rules.")?;
205            }
206            Self::Associativity { symbols } => {
207                write!(f, "Specify a left or right associativity in ")?;
208                for (i, symbol) in symbols.iter().enumerate() {
209                    if i > 0 {
210                        write!(f, ", ")?;
211                    }
212                    write!(f, "`{symbol}`")?;
213                }
214            }
215            Self::AddConflict { symbols } => {
216                write!(f, "Add a conflict for these rules: ")?;
217                for (i, symbol) in symbols.iter().enumerate() {
218                    if i > 0 {
219                        write!(f, ", ")?;
220                    }
221                    write!(f, "`{symbol}`")?;
222                }
223            }
224        }
225        Ok(())
226    }
227}
228
229impl std::fmt::Display for AmbiguousExtraError {
230    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
231        for (i, symbol) in self.parent_symbols.iter().enumerate() {
232            if i > 0 {
233                write!(f, ", ")?;
234            }
235            write!(f, "{symbol}")?;
236        }
237        Ok(())
238    }
239}
240
241impl<'a> ParseTableBuilder<'a> {
242    fn new(
243        syntax_grammar: &'a SyntaxGrammar,
244        lexical_grammar: &'a LexicalGrammar,
245        item_set_builder: ParseItemSetBuilder<'a>,
246        variable_info: &'a [VariableInfo],
247    ) -> Self {
248        Self {
249            syntax_grammar,
250            lexical_grammar,
251            item_set_builder,
252            variable_info,
253            non_terminal_extra_states: Vec::new(),
254            state_ids_by_item_set: IndexMap::default(),
255            core_ids_by_core: HashMap::new(),
256            parse_state_info_by_id: Vec::new(),
257            parse_state_queue: VecDeque::new(),
258            actual_conflicts: syntax_grammar.expected_conflicts.iter().cloned().collect(),
259            parse_table: ParseTable {
260                states: Vec::new(),
261                symbols: Vec::new(),
262                external_lex_states: Vec::new(),
263                production_infos: Vec::new(),
264                max_aliased_production_length: 1,
265            },
266        }
267    }
268
269    fn build(mut self) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
270        // Ensure that the empty alias sequence has index 0.
271        self.parse_table
272            .production_infos
273            .push(ProductionInfo::default());
274
275        // Add the error state at index 0.
276        self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
277
278        // Add the starting state at index 1.
279        self.add_parse_state(
280            &Vec::new(),
281            &Vec::new(),
282            ParseItemSet {
283                entries: vec![ParseItemSetEntry {
284                    item: ParseItem::start(),
285                    lookaheads: std::iter::once(Symbol::end()).collect(),
286                    following_reserved_word_set: ReservedWordSetId::default(),
287                }],
288            },
289        );
290
291        // Compute the possible item sets for non-terminal extras.
292        let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
293        for extra_non_terminal in self
294            .syntax_grammar
295            .extra_symbols
296            .iter()
297            .filter(|s| s.is_non_terminal())
298        {
299            let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
300            for production in &variable.productions {
301                non_terminal_extra_item_sets_by_first_terminal
302                    .entry(production.first_symbol().unwrap())
303                    .or_insert_with(ParseItemSet::default)
304                    .insert(ParseItem {
305                        variable_index: extra_non_terminal.index as u32,
306                        production,
307                        step_index: 1,
308                        has_preceding_inherited_fields: false,
309                    })
310                    .lookaheads
311                    .insert(Symbol::end_of_nonterminal_extra());
312            }
313        }
314
315        let non_terminal_sets_len = non_terminal_extra_item_sets_by_first_terminal.len();
316        self.non_terminal_extra_states
317            .reserve(non_terminal_sets_len);
318        self.parse_state_info_by_id.reserve(non_terminal_sets_len);
319        self.parse_table.states.reserve(non_terminal_sets_len);
320        self.parse_state_queue.reserve(non_terminal_sets_len);
321        // Add a state for each starting terminal of a non-terminal extra rule.
322        for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
323            if terminal.is_non_terminal() {
324                Err(ParseTableBuilderError::ImproperNonTerminalExtra(
325                    self.symbol_name(&terminal),
326                ))?;
327            }
328
329            // Add the parse state, and *then* push the terminal and the state id into the
330            // list of nonterminal extra states
331            let state_id = self.add_parse_state(&Vec::new(), &Vec::new(), item_set);
332            self.non_terminal_extra_states.push((terminal, state_id));
333        }
334
335        while let Some(entry) = self.parse_state_queue.pop_front() {
336            let item_set = self
337                .item_set_builder
338                .transitive_closure(&self.parse_state_info_by_id[entry.state_id].1);
339
340            self.add_actions(
341                self.parse_state_info_by_id[entry.state_id].0.clone(),
342                entry.preceding_auxiliary_symbols,
343                entry.state_id,
344                &item_set,
345            )?;
346        }
347
348        if !self.actual_conflicts.is_empty() {
349            warn!(
350                "unnecessary conflicts:\n  {}",
351                &self
352                    .actual_conflicts
353                    .iter()
354                    .map(|conflict| {
355                        conflict
356                            .iter()
357                            .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
358                            .collect::<Vec<_>>()
359                            .join(", ")
360                    })
361                    .collect::<Vec<_>>()
362                    .join("\n  ")
363            );
364        }
365
366        Ok((self.parse_table, self.parse_state_info_by_id))
367    }
368
369    fn add_parse_state(
370        &mut self,
371        preceding_symbols: &SymbolSequence,
372        preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
373        item_set: ParseItemSet<'a>,
374    ) -> ParseStateId {
375        match self.state_ids_by_item_set.entry(item_set) {
376            // If an equivalent item set has already been processed, then return
377            // the existing parse state index.
378            Entry::Occupied(o) => *o.get(),
379
380            // Otherwise, insert a new parse state and add it to the queue of
381            // parse states to populate.
382            Entry::Vacant(v) => {
383                let core = v.key().core();
384                let core_count = self.core_ids_by_core.len();
385                let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);
386
387                let state_id = self.parse_table.states.len();
388                self.parse_state_info_by_id
389                    .push((preceding_symbols.clone(), v.key().clone()));
390
391                self.parse_table.states.push(ParseState {
392                    id: state_id,
393                    lex_state_id: 0,
394                    external_lex_state_id: 0,
395                    terminal_entries: IndexMap::default(),
396                    nonterminal_entries: IndexMap::default(),
397                    reserved_words: TokenSet::default(),
398                    core_id,
399                });
400                self.parse_state_queue.push_back(ParseStateQueueEntry {
401                    state_id,
402                    preceding_auxiliary_symbols: preceding_auxiliary_symbols.clone(),
403                });
404                v.insert(state_id);
405                state_id
406            }
407        }
408    }
409
410    fn add_actions(
411        &mut self,
412        mut preceding_symbols: SymbolSequence,
413        mut preceding_auxiliary_symbols: AuxiliarySymbolSequence,
414        state_id: ParseStateId,
415        item_set: &ParseItemSet<'a>,
416    ) -> BuildTableResult<()> {
417        let mut terminal_successors = BTreeMap::new();
418        let mut non_terminal_successors = BTreeMap::new();
419        let mut lookaheads_with_conflicts = TokenSet::new();
420        let mut reduction_infos = HashMap::<Symbol, ReductionInfo>::new();
421
422        // Each item in the item set contributes to either or a Shift action or a Reduce
423        // action in this state.
424        for ParseItemSetEntry {
425            item,
426            lookaheads,
427            following_reserved_word_set: reserved_lookaheads,
428        } in &item_set.entries
429        {
430            // If the item is unfinished, then this state has a transition for the item's
431            // next symbol. Advance the item to its next step and insert the resulting
432            // item into the successor item set.
433            if let Some(next_symbol) = item.symbol() {
434                let mut successor = item.successor();
435                let successor_set = if next_symbol.is_non_terminal() {
436                    let variable = &self.syntax_grammar.variables[next_symbol.index];
437
438                    // Keep track of where auxiliary non-terminals (repeat symbols) are
439                    // used within visible symbols. This information may be needed later
440                    // for conflict resolution.
441                    if variable.is_auxiliary() {
442                        preceding_auxiliary_symbols
443                            .push(self.get_auxiliary_node_info(item_set, next_symbol));
444                    }
445
446                    // For most parse items, the symbols associated with the preceding children
447                    // don't matter: they have no effect on the REDUCE action that would be
448                    // performed at the end of the item. But the symbols *do* matter for
449                    // children that are hidden and have fields, because those fields are
450                    // "inherited" by the parent node.
451                    //
452                    // If this item has consumed a hidden child with fields, then the symbols
453                    // of its preceding children need to be taken into account when comparing
454                    // it with other items.
455                    if variable.is_hidden()
456                        && !self.variable_info[next_symbol.index].fields.is_empty()
457                    {
458                        successor.has_preceding_inherited_fields = true;
459                    }
460
461                    non_terminal_successors
462                        .entry(next_symbol)
463                        .or_insert_with(ParseItemSet::default)
464                } else {
465                    terminal_successors
466                        .entry(next_symbol)
467                        .or_insert_with(ParseItemSet::default)
468                };
469                let successor_entry = successor_set.insert(successor);
470                successor_entry.lookaheads.insert_all(lookaheads);
471                successor_entry.following_reserved_word_set = successor_entry
472                    .following_reserved_word_set
473                    .max(*reserved_lookaheads);
474            }
475            // If the item is finished, then add a Reduce action to this state based
476            // on this item.
477            else {
478                let symbol = Symbol::non_terminal(item.variable_index as usize);
479                let action = if item.is_augmented() {
480                    ParseAction::Accept
481                } else {
482                    ParseAction::Reduce {
483                        symbol,
484                        child_count: item.step_index as usize,
485                        dynamic_precedence: item.production.dynamic_precedence,
486                        production_id: self.get_production_id(item),
487                    }
488                };
489
490                let precedence = item.precedence();
491                let associativity = item.associativity();
492                for lookahead in lookaheads.iter() {
493                    let table_entry = self.parse_table.states[state_id]
494                        .terminal_entries
495                        .entry(lookahead)
496                        .or_insert_with(ParseTableEntry::new);
497                    let reduction_info = reduction_infos.entry(lookahead).or_default();
498
499                    // While inserting Reduce actions, eagerly resolve conflicts related
500                    // to precedence: avoid inserting lower-precedence reductions, and
501                    // clear the action list when inserting higher-precedence reductions.
502                    if table_entry.actions.is_empty() {
503                        table_entry.actions.push(action);
504                    } else {
505                        match Self::compare_precedence(
506                            self.syntax_grammar,
507                            precedence,
508                            &[symbol],
509                            &reduction_info.precedence,
510                            &reduction_info.symbols,
511                        ) {
512                            Ordering::Greater => {
513                                table_entry.actions.clear();
514                                table_entry.actions.push(action);
515                                lookaheads_with_conflicts.remove(&lookahead);
516                                *reduction_info = ReductionInfo::default();
517                            }
518                            Ordering::Equal => {
519                                table_entry.actions.push(action);
520                                lookaheads_with_conflicts.insert(lookahead);
521                            }
522                            Ordering::Less => continue,
523                        }
524                    }
525
526                    reduction_info.precedence.clone_from(precedence);
527                    if let Err(i) = reduction_info.symbols.binary_search(&symbol) {
528                        reduction_info.symbols.insert(i, symbol);
529                    }
530                    match associativity {
531                        Some(Associativity::Left) => reduction_info.has_left_assoc = true,
532                        Some(Associativity::Right) => reduction_info.has_right_assoc = true,
533                        None => reduction_info.has_non_assoc = true,
534                    }
535                }
536            }
537        }
538
539        preceding_auxiliary_symbols.dedup();
540
541        // Having computed the successor item sets for each symbol, add a new
542        // parse state for each of these item sets, and add a corresponding Shift
543        // action to this state.
544        for (symbol, next_item_set) in terminal_successors {
545            preceding_symbols.push(symbol);
546            let next_state_id = self.add_parse_state(
547                &preceding_symbols,
548                &preceding_auxiliary_symbols,
549                next_item_set,
550            );
551            preceding_symbols.pop();
552
553            let entry = self.parse_table.states[state_id]
554                .terminal_entries
555                .entry(symbol);
556            if let Entry::Occupied(e) = &entry {
557                if !e.get().actions.is_empty() {
558                    lookaheads_with_conflicts.insert(symbol);
559                }
560            }
561
562            entry
563                .or_insert_with(ParseTableEntry::new)
564                .actions
565                .push(ParseAction::Shift {
566                    state: next_state_id,
567                    is_repetition: false,
568                });
569        }
570
571        for (symbol, next_item_set) in non_terminal_successors {
572            preceding_symbols.push(symbol);
573            let next_state_id = self.add_parse_state(
574                &preceding_symbols,
575                &preceding_auxiliary_symbols,
576                next_item_set,
577            );
578            preceding_symbols.pop();
579            self.parse_table.states[state_id]
580                .nonterminal_entries
581                .insert(symbol, GotoAction::Goto(next_state_id));
582        }
583
584        // For any symbol with multiple actions, perform conflict resolution.
585        // This will either
586        // * choose one action over the others using precedence or associativity
587        // * keep multiple actions if this conflict has been whitelisted in the grammar
588        // * fail, terminating the parser generation process
589        for symbol in lookaheads_with_conflicts.iter() {
590            self.handle_conflict(
591                item_set,
592                state_id,
593                &preceding_symbols,
594                &preceding_auxiliary_symbols,
595                symbol,
596                reduction_infos.get(&symbol).unwrap(),
597            )?;
598        }
599
600        // Add actions for the grammar's `extra` symbols.
601        let state = &mut self.parse_table.states[state_id];
602        let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();
603
604        // If this state represents the end of a non-terminal extra rule, then make sure that
605        // it doesn't have other successor states. Non-terminal extra rules must have
606        // unambiguous endings.
607        if is_end_of_non_terminal_extra {
608            if state.terminal_entries.len() > 1 {
609                let parent_symbols = item_set
610                    .entries
611                    .iter()
612                    .filter_map(|ParseItemSetEntry { item, .. }| {
613                        if !item.is_augmented() && item.step_index > 0 {
614                            Some(item.variable_index)
615                        } else {
616                            None
617                        }
618                    })
619                    .collect::<HashSet<_>>();
620                let parent_symbol_names = parent_symbols
621                    .iter()
622                    .map(|&variable_index| {
623                        self.syntax_grammar.variables[variable_index as usize]
624                            .name
625                            .clone()
626                    })
627                    .collect::<Vec<_>>();
628
629                Err(AmbiguousExtraError {
630                    parent_symbols: parent_symbol_names,
631                })?;
632            }
633        }
634        // Add actions for the start tokens of each non-terminal extra rule.
635        else {
636            for (terminal, state_id) in &self.non_terminal_extra_states {
637                state
638                    .terminal_entries
639                    .entry(*terminal)
640                    .or_insert(ParseTableEntry {
641                        reusable: true,
642                        actions: vec![ParseAction::Shift {
643                            state: *state_id,
644                            is_repetition: false,
645                        }],
646                    });
647            }
648
649            // Add ShiftExtra actions for the terminal extra tokens. These actions
650            // are added to every state except for those at the ends of non-terminal
651            // extras.
652            for extra_token in &self.syntax_grammar.extra_symbols {
653                if extra_token.is_non_terminal() {
654                    state
655                        .nonterminal_entries
656                        .insert(*extra_token, GotoAction::ShiftExtra);
657                } else {
658                    state
659                        .terminal_entries
660                        .entry(*extra_token)
661                        .or_insert(ParseTableEntry {
662                            reusable: true,
663                            actions: vec![ParseAction::ShiftExtra],
664                        });
665                }
666            }
667        }
668
669        if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
670            let reserved_word_set_id = item_set
671                .entries
672                .iter()
673                .filter_map(|entry| {
674                    if let Some(next_step) = entry.item.step() {
675                        if next_step.symbol == keyword_capture_token {
676                            Some(next_step.reserved_word_set_id)
677                        } else {
678                            None
679                        }
680                    } else if entry.lookaheads.contains(&keyword_capture_token) {
681                        Some(entry.following_reserved_word_set)
682                    } else {
683                        None
684                    }
685                })
686                .max();
687            if let Some(reserved_word_set_id) = reserved_word_set_id {
688                state.reserved_words =
689                    self.syntax_grammar.reserved_word_sets[reserved_word_set_id.0].clone();
690            }
691        }
692
693        Ok(())
694    }
695
696    fn handle_conflict(
697        &mut self,
698        item_set: &ParseItemSet,
699        state_id: ParseStateId,
700        preceding_symbols: &SymbolSequence,
701        preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
702        conflicting_lookahead: Symbol,
703        reduction_info: &ReductionInfo,
704    ) -> BuildTableResult<()> {
705        let entry = self.parse_table.states[state_id]
706            .terminal_entries
707            .get_mut(&conflicting_lookahead)
708            .unwrap();
709
710        // Determine which items in the set conflict with each other, and the
711        // precedences associated with SHIFT vs REDUCE actions. There won't
712        // be multiple REDUCE actions with different precedences; that is
713        // sorted out ahead of time in `add_actions`. But there can still be
714        // REDUCE-REDUCE conflicts where all actions have the *same*
715        // precedence, and there can still be SHIFT/REDUCE conflicts.
716        let mut considered_associativity = false;
717        let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
718        let mut conflicting_items = BTreeSet::new();
719        for ParseItemSetEntry {
720            item, lookaheads, ..
721        } in &item_set.entries
722        {
723            if let Some(step) = item.step() {
724                if item.step_index > 0
725                    && self
726                        .item_set_builder
727                        .first_set(&step.symbol)
728                        .contains(&conflicting_lookahead)
729                {
730                    if item.variable_index != u32::MAX {
731                        conflicting_items.insert(item);
732                    }
733
734                    let p = (
735                        item.precedence(),
736                        Symbol::non_terminal(item.variable_index as usize),
737                    );
738                    if let Err(i) = shift_precedence.binary_search(&p) {
739                        shift_precedence.insert(i, p);
740                    }
741                }
742            } else if lookaheads.contains(&conflicting_lookahead) && item.variable_index != u32::MAX
743            {
744                conflicting_items.insert(item);
745            }
746        }
747
748        if let ParseAction::Shift { is_repetition, .. } = entry.actions.last_mut().unwrap() {
749            // If all of the items in the conflict have the same parent symbol,
750            // and that parent symbols is auxiliary, then this is just the intentional
751            // ambiguity associated with a repeat rule. Resolve that class of ambiguity
752            // by leaving it in the parse table, but marking the SHIFT action with
753            // an `is_repetition` flag.
754            let conflicting_variable_index =
755                conflicting_items.iter().next().unwrap().variable_index;
756            if self.syntax_grammar.variables[conflicting_variable_index as usize].is_auxiliary()
757                && conflicting_items
758                    .iter()
759                    .all(|item| item.variable_index == conflicting_variable_index)
760            {
761                *is_repetition = true;
762                return Ok(());
763            }
764
765            // If the SHIFT action has higher precedence, remove all the REDUCE actions.
766            let mut shift_is_less = false;
767            let mut shift_is_more = false;
768            for p in shift_precedence {
769                match Self::compare_precedence(
770                    self.syntax_grammar,
771                    p.0,
772                    &[p.1],
773                    &reduction_info.precedence,
774                    &reduction_info.symbols,
775                ) {
776                    Ordering::Greater => shift_is_more = true,
777                    Ordering::Less => shift_is_less = true,
778                    Ordering::Equal => {}
779                }
780            }
781
782            if shift_is_more && !shift_is_less {
783                entry.actions.drain(0..entry.actions.len() - 1);
784            }
785            // If the REDUCE actions have higher precedence, remove the SHIFT action.
786            else if shift_is_less && !shift_is_more {
787                entry.actions.pop();
788                conflicting_items.retain(|item| item.is_done());
789            }
790            // If the SHIFT and REDUCE actions have the same predence, consider
791            // the REDUCE actions' associativity.
792            else if !shift_is_less && !shift_is_more {
793                considered_associativity = true;
794
795                // If all Reduce actions are left associative, remove the SHIFT action.
796                // If all Reduce actions are right associative, remove the REDUCE actions.
797                match (
798                    reduction_info.has_left_assoc,
799                    reduction_info.has_non_assoc,
800                    reduction_info.has_right_assoc,
801                ) {
802                    (true, false, false) => {
803                        entry.actions.pop();
804                        conflicting_items.retain(|item| item.is_done());
805                    }
806                    (false, false, true) => {
807                        entry.actions.drain(0..entry.actions.len() - 1);
808                    }
809                    _ => {}
810                }
811            }
812        }
813
814        // If all of the actions but one have been eliminated, then there's no problem.
815        let entry = self.parse_table.states[state_id]
816            .terminal_entries
817            .get_mut(&conflicting_lookahead)
818            .unwrap();
819        if entry.actions.len() == 1 {
820            return Ok(());
821        }
822
823        // Determine the set of parent symbols involved in this conflict.
824        let mut actual_conflict = Vec::new();
825        for item in &conflicting_items {
826            let symbol = Symbol::non_terminal(item.variable_index as usize);
827            if self.syntax_grammar.variables[symbol.index].is_auxiliary() {
828                actual_conflict.extend(
829                    preceding_auxiliary_symbols
830                        .iter()
831                        .rev()
832                        .find_map(|info| {
833                            if info.auxiliary_symbol == symbol {
834                                Some(&info.parent_symbols)
835                            } else {
836                                None
837                            }
838                        })
839                        .unwrap()
840                        .iter(),
841                );
842            } else {
843                actual_conflict.push(symbol);
844            }
845        }
846        actual_conflict.sort_unstable();
847        actual_conflict.dedup();
848
849        // If this set of symbols has been whitelisted, then there's no error.
850        if self
851            .syntax_grammar
852            .expected_conflicts
853            .contains(&actual_conflict)
854        {
855            self.actual_conflicts.remove(&actual_conflict);
856            return Ok(());
857        }
858
859        let mut conflict_error = ConflictError::default();
860        for symbol in preceding_symbols {
861            conflict_error
862                .symbol_sequence
863                .push(self.symbol_name(symbol));
864        }
865        conflict_error.conflicting_lookahead = self.symbol_name(&conflicting_lookahead);
866
867        let interpretations = conflicting_items
868            .iter()
869            .map(|item| {
870                let preceding_symbols = preceding_symbols
871                    .iter()
872                    .take(preceding_symbols.len() - item.step_index as usize)
873                    .map(|symbol| self.symbol_name(symbol))
874                    .collect::<Vec<_>>();
875
876                let variable_name = self.syntax_grammar.variables[item.variable_index as usize]
877                    .name
878                    .clone();
879
880                let production_step_symbols = item
881                    .production
882                    .steps
883                    .iter()
884                    .map(|step| self.symbol_name(&step.symbol))
885                    .collect::<Vec<_>>();
886
887                let precedence = match item.precedence() {
888                    Precedence::None => None,
889                    _ => Some(item.precedence().to_string()),
890                };
891
892                let associativity = item.associativity().map(|assoc| format!("{assoc:?}"));
893
894                Interpretation {
895                    preceding_symbols,
896                    variable_name,
897                    production_step_symbols,
898                    step_index: item.step_index,
899                    done: item.is_done(),
900                    conflicting_lookahead: self.symbol_name(&conflicting_lookahead),
901                    precedence,
902                    associativity,
903                }
904            })
905            .collect::<Vec<_>>();
906        conflict_error.possible_interpretations = interpretations;
907
908        let mut shift_items = Vec::new();
909        let mut reduce_items = Vec::new();
910        for item in conflicting_items {
911            if item.is_done() {
912                reduce_items.push(item);
913            } else {
914                shift_items.push(item);
915            }
916        }
917        shift_items.sort_unstable();
918        reduce_items.sort_unstable();
919
920        let get_rule_names = |items: &[&ParseItem]| -> Vec<String> {
921            let mut last_rule_id = None;
922            let mut result = Vec::with_capacity(items.len());
923            for item in items {
924                if last_rule_id == Some(item.variable_index) {
925                    continue;
926                }
927                last_rule_id = Some(item.variable_index);
928                result.push(self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)));
929            }
930
931            result
932        };
933
934        if actual_conflict.len() > 1 {
935            if !shift_items.is_empty() {
936                let names = get_rule_names(&shift_items);
937                conflict_error
938                    .possible_resolutions
939                    .push(Resolution::Precedence { symbols: names });
940            }
941
942            for item in &reduce_items {
943                let name = self.symbol_name(&Symbol::non_terminal(item.variable_index as usize));
944                conflict_error
945                    .possible_resolutions
946                    .push(Resolution::Precedence {
947                        symbols: vec![name],
948                    });
949            }
950        }
951
952        if considered_associativity {
953            let names = get_rule_names(&reduce_items);
954            conflict_error
955                .possible_resolutions
956                .push(Resolution::Associativity { symbols: names });
957        }
958
959        conflict_error
960            .possible_resolutions
961            .push(Resolution::AddConflict {
962                symbols: actual_conflict
963                    .iter()
964                    .map(|s| self.symbol_name(s))
965                    .collect(),
966            });
967
968        self.actual_conflicts.insert(actual_conflict);
969
970        Err(conflict_error)?
971    }
972
973    fn compare_precedence(
974        grammar: &SyntaxGrammar,
975        left: &Precedence,
976        left_symbols: &[Symbol],
977        right: &Precedence,
978        right_symbols: &[Symbol],
979    ) -> Ordering {
980        let precedence_entry_matches =
981            |entry: &PrecedenceEntry, precedence: &Precedence, symbols: &[Symbol]| -> bool {
982                match entry {
983                    PrecedenceEntry::Name(n) => {
984                        if let Precedence::Name(p) = precedence {
985                            n == p
986                        } else {
987                            false
988                        }
989                    }
990                    PrecedenceEntry::Symbol(n) => symbols
991                        .iter()
992                        .any(|s| &grammar.variables[s.index].name == n),
993                }
994            };
995
996        match (left, right) {
997            // Integer precedences can be compared to other integer precedences,
998            // and to the default precedence, which is zero.
999            (Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r),
1000            (Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0),
1001            (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(r),
1002
1003            // Named precedences can be compared to other named precedences.
1004            _ => grammar
1005                .precedence_orderings
1006                .iter()
1007                .find_map(|list| {
1008                    let mut saw_left = false;
1009                    let mut saw_right = false;
1010                    for entry in list {
1011                        let matches_left = precedence_entry_matches(entry, left, left_symbols);
1012                        let matches_right = precedence_entry_matches(entry, right, right_symbols);
1013                        if matches_left {
1014                            saw_left = true;
1015                            if saw_right {
1016                                return Some(Ordering::Less);
1017                            }
1018                        } else if matches_right {
1019                            saw_right = true;
1020                            if saw_left {
1021                                return Some(Ordering::Greater);
1022                            }
1023                        }
1024                    }
1025                    None
1026                })
1027                .unwrap_or(Ordering::Equal),
1028        }
1029    }
1030
1031    fn get_auxiliary_node_info(
1032        &self,
1033        item_set: &ParseItemSet,
1034        symbol: Symbol,
1035    ) -> AuxiliarySymbolInfo {
1036        let parent_symbols = item_set
1037            .entries
1038            .iter()
1039            .filter_map(|ParseItemSetEntry { item, .. }| {
1040                let variable_index = item.variable_index as usize;
1041                if item.symbol() == Some(symbol)
1042                    && !self.syntax_grammar.variables[variable_index].is_auxiliary()
1043                {
1044                    Some(Symbol::non_terminal(variable_index))
1045                } else {
1046                    None
1047                }
1048            })
1049            .collect();
1050        AuxiliarySymbolInfo {
1051            auxiliary_symbol: symbol,
1052            parent_symbols,
1053        }
1054    }
1055
1056    fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId {
1057        let mut production_info = ProductionInfo {
1058            alias_sequence: Vec::new(),
1059            field_map: BTreeMap::new(),
1060        };
1061
1062        for (i, step) in item.production.steps.iter().enumerate() {
1063            production_info.alias_sequence.push(step.alias.clone());
1064            if let Some(field_name) = &step.field_name {
1065                production_info
1066                    .field_map
1067                    .entry(field_name.clone())
1068                    .or_default()
1069                    .push(FieldLocation {
1070                        index: i,
1071                        inherited: false,
1072                    });
1073            }
1074
1075            if step.symbol.kind == SymbolType::NonTerminal
1076                && !self.syntax_grammar.variables[step.symbol.index]
1077                    .kind
1078                    .is_visible()
1079            {
1080                let info = &self.variable_info[step.symbol.index];
1081                for field_name in info.fields.keys() {
1082                    production_info
1083                        .field_map
1084                        .entry(field_name.clone())
1085                        .or_default()
1086                        .push(FieldLocation {
1087                            index: i,
1088                            inherited: true,
1089                        });
1090                }
1091            }
1092        }
1093
1094        while production_info.alias_sequence.last() == Some(&None) {
1095            production_info.alias_sequence.pop();
1096        }
1097
1098        if item.production.steps.len() > self.parse_table.max_aliased_production_length {
1099            self.parse_table.max_aliased_production_length = item.production.steps.len();
1100        }
1101
1102        if let Some(index) = self
1103            .parse_table
1104            .production_infos
1105            .iter()
1106            .position(|seq| *seq == production_info)
1107        {
1108            index
1109        } else {
1110            self.parse_table.production_infos.push(production_info);
1111            self.parse_table.production_infos.len() - 1
1112        }
1113    }
1114
1115    fn symbol_name(&self, symbol: &Symbol) -> String {
1116        match symbol.kind {
1117            SymbolType::End | SymbolType::EndOfNonTerminalExtra => "EOF".to_string(),
1118            SymbolType::External => self.syntax_grammar.external_tokens[symbol.index]
1119                .name
1120                .clone(),
1121            SymbolType::NonTerminal => self.syntax_grammar.variables[symbol.index].name.clone(),
1122            SymbolType::Terminal => {
1123                let variable = &self.lexical_grammar.variables[symbol.index];
1124                if variable.kind == VariableType::Named {
1125                    variable.name.clone()
1126                } else {
1127                    format!("'{}'", variable.name)
1128                }
1129            }
1130        }
1131    }
1132}
1133
1134pub fn build_parse_table<'a>(
1135    syntax_grammar: &'a SyntaxGrammar,
1136    lexical_grammar: &'a LexicalGrammar,
1137    item_set_builder: ParseItemSetBuilder<'a>,
1138    variable_info: &'a [VariableInfo],
1139) -> BuildTableResult<(ParseTable, Vec<ParseStateInfo<'a>>)> {
1140    ParseTableBuilder::new(
1141        syntax_grammar,
1142        lexical_grammar,
1143        item_set_builder,
1144        variable_info,
1145    )
1146    .build()
1147}