1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
4use iter_index::IndexerIterator;
5use vectree::VecTree;
6use lexigram_core::alt::Alternative;
7use lexigram_core::log::LogMsg;
8use lexigram_core::{CharLen, TokenId};
9use crate::grammar::{grtree_to_str, GrTreeExt, LLParsingTable, NTConversion, ProdRuleSet};
10use crate::{columns_to_str, indent_source, AltId, NameFixer, NameTransformer, SourceSpacer, StructLibs, SymbolTable, VarId, LL1};
11use crate::fixed_sym_table::{FixedSymTable, SymInfoTable};
12use crate::alt::ruleflag;
13use crate::build::{BuildError, BuildErrorSource, BuildFrom, HasBuildErrorSource, TryBuildFrom};
14use crate::CollectJoin;
15use crate::grammar::origin::{FromPRS, Origin};
16use crate::lexergen::LexigramCrate;
17use crate::log::{BufLog, LogReader, LogStatus, Logger};
18use crate::parser::{OpCode, Parser, Symbol};
19use crate::segments::Segments;
20use crate::segmap::Seg;
21
22pub(crate) mod tests;
23
24pub(crate) fn symbol_to_code(s: &Symbol) -> String {
27 match s {
28 Symbol::Empty => "Symbol::Empty".to_string(),
29 Symbol::T(t) => format!("Symbol::T({t})"),
30 Symbol::NT(nt) => format!("Symbol::NT({nt})"),
31 Symbol::End => "Symbol::End".to_string(),
32 }
33}
34
35#[derive(Clone, Debug, PartialEq)]
38struct ItemInfo {
39 name: String,
40 sym: Symbol, owner: VarId, index: Option<usize> }
44
45#[allow(unused)]
46impl ItemInfo {
47 fn to_str(&self, symbol_table: Option<&SymbolTable>) -> String {
48 format!("{} ({}{}, ◄{})",
49 self.name,
50 self.sym.to_str(symbol_table),
51 if let Some(n) = self.index { format!(", [{n}]") } else { "".to_string() },
52 Symbol::NT(self.owner).to_str(symbol_table))
53 }
54}
55
56pub struct ParserTables {
66 num_nt: usize,
67 num_t: usize,
68 alt_var: Vec<VarId>,
70 alts: Vec<Alternative>,
71 opcodes: Vec<Vec<OpCode>>,
72 init_opcodes: Vec<OpCode>,
73 table: Vec<AltId>,
74 symbol_table: FixedSymTable,
75 start: VarId,
76 include_alts: bool,
77}
78
79impl ParserTables {
80 pub fn new(
81 parsing_table: LLParsingTable,
82 symbol_table: FixedSymTable,
83 opcodes: Vec<Vec<OpCode>>,
84 init_opcodes: Vec<OpCode>,
85 start: VarId,
86 include_alts: bool
87 ) -> Self {
88 assert!(parsing_table.num_nt > start as usize);
89 let num_nt = parsing_table.num_nt;
90 let num_t = parsing_table.num_t;
91 let table = parsing_table.table;
92 let (factor_var, alts): (Vec<_>, Vec<_>) = parsing_table.alts.into_iter().unzip();
93 ParserTables { num_nt, num_t, alt_var: factor_var, alts, opcodes, init_opcodes, table, symbol_table, start, include_alts }
94 }
95
96 pub fn make_parser(&self) -> Parser<'_> {
97 Parser::new(
98 self.num_nt,
99 self.num_t,
100 self.alt_var.as_slice(),
101 if self.include_alts { self.alts.clone() } else { vec![] },
102 self.opcodes.clone(),
103 self.init_opcodes.clone(),
104 self.table.as_slice(),
105 self.symbol_table.clone(),
106 self.start,
107 )
108 }
109}
110
111impl BuildFrom<ParserGen> for ParserTables {
112 fn build_from(parser_gen: ParserGen) -> Self {
115 ParserTables::new(
116 parser_gen.parsing_table,
117 parser_gen.symbol_table.to_fixed_sym_table(),
118 parser_gen.opcodes,
119 parser_gen.init_opcodes,
120 parser_gen.start,
121 parser_gen.options.include_alts
122 )
123 }
124}
125
126impl TryBuildFrom<ParserGen> for ParserTables {
128 type Error = BuildError;
129
130 fn try_build_from(source: ParserGen) -> Result<Self, Self::Error> {
131 if source.get_log().has_no_errors() {
132 Ok(ParserTables::build_from(source))
133 } else {
134 Err(BuildError::new(source.give_log(), BuildErrorSource::ParserGen))
135 }
136 }
137}
138
139#[derive(Clone, PartialEq, Default, Debug)]
150pub enum NTValue {
151 None,
153 Parents,
155 #[default]
157 Default,
158 SetIds(Vec<VarId>),
160 SetNames(Vec<String>),
162}
163
164impl NTValue {
165 pub const DEFAULT: &str = "<default>";
167 pub const PARENTS: &str = "<parents>";
169
170 pub fn is_none(&self) -> bool {
171 matches!(self, NTValue::None)
172 }
173
174 pub fn is_parents(&self) -> bool {
175 matches!(self, NTValue::Parents)
176 }
177
178 pub fn is_default(&self) -> bool {
179 matches!(self, NTValue::Default)
180 }
181
182 pub fn is_ids(&self) -> bool {
183 matches!(self, NTValue::SetIds(_))
184 }
185
186 pub fn is_names(&self) -> bool {
187 matches!(self, NTValue::SetNames(_))
188 }
189}
190
191pub static DEFAULT_LISTENER_NAME: &str = "Parser";
194
195pub type SpanNbr = u16;
196
197fn count_span_nbr(opcode: &[OpCode]) -> SpanNbr {
198 let count = opcode.iter().filter(|op| op.has_span()).count();
199 count.try_into().unwrap_or_else(|_| panic!("# span = {count} > {}", SpanNbr::MAX))
200}
201
202struct SourceInputContext<'a> {
203 parent_has_value : bool,
204 parent_nt : usize,
205 pinfo : &'a LLParsingTable,
206 syns : &'a Vec<VarId>,
207 ambig_op_alts : &'a BTreeMap<AltId, Vec<AltId>>,
208}
209
210struct SourceState<'a> {
211 init_nt_done : &'a mut HashSet<VarId>,
212 span_init : &'a mut HashSet<VarId>,
213 nt_contexts : &'a mut Vec<Option<Vec<AltId>>>,
214 exit_alt_done : &'a mut HashSet<VarId>,
215 exit_fixer : &'a mut NameFixer,
216}
217
218struct WrapperSources {
219 src : Vec<String>,
220 src_listener_decl : Vec<String>,
221 src_skel : Vec<String>,
222 src_types : Vec<String>,
223 src_init : Vec<Vec<String>>,
224 src_exit : Vec<Vec<String>>,
225 src_wrapper_impl : Vec<String>,
226}
227
228#[derive(Clone, Debug)]
229pub struct ParserGenOptions {
230 pub nt_value: NTValue,
231 pub include_alts: bool,
232 pub headers: Vec<String>,
233 pub used_libs: StructLibs,
234 pub gen_wrapper: bool,
236 pub gen_span_params: bool,
238 pub gen_token_enums: bool,
239 pub lib_crate: LexigramCrate,
240 pub indent: usize,
242 pub types_indent: usize,
244 pub listener_indent: usize,
246}
247
248#[derive(Debug)]
249pub struct ParserGen {
250 parsing_table: LLParsingTable,
251 symbol_table: SymbolTable,
252 terminal_hooks: Vec<TokenId>,
253 name: String,
254 options: ParserGenOptions,
255 nt_values: Vec<bool>,
256 nt_parent: Vec<Vec<VarId>>,
258 var_alts: Vec<Vec<AltId>>,
259 origin: Origin<VarId, FromPRS>,
260 item_ops: Vec<Vec<Symbol>>,
261 opcodes: Vec<Vec<OpCode>>,
262 init_opcodes: Vec<OpCode>,
263 nt_name: Vec<(String, String, String)>,
264 alt_info: Vec<Option<(VarId, String)>>,
265 item_info: Vec<Vec<ItemInfo>>,
266 child_repeat_endpoints: HashMap<VarId, Vec<AltId>>,
267 gen_parser: bool,
269 span_nbrs: Vec<SpanNbr>,
270 span_nbrs_sep_list: HashMap<AltId, SpanNbr>,
272 start: VarId,
273 nt_conversion: HashMap<VarId, NTConversion>,
274 nt_type: HashMap<VarId, String>,
275 log: BufLog,
276}
277
278impl ParserGen {
279 pub fn build_from_rules<T>(mut rules: ProdRuleSet<T>, name: String) -> Self
285 where
286 ProdRuleSet<LL1>: BuildFrom<ProdRuleSet<T>>,
287 {
288 rules.log.add_note("building parser gen from rules...");
289 let mut ll1_rules = ProdRuleSet::<LL1>::build_from(rules);
290 assert_eq!(ll1_rules.get_log().num_errors(), 0);
291 let parsing_table = ll1_rules.make_parsing_table(true);
292 let num_nt = ll1_rules.get_num_nt();
293 let start = ll1_rules.get_start().unwrap();
294 let mut var_alts = vec![vec![]; num_nt];
295 for (alt_id, (var_id, _)) in parsing_table.alts.iter().index() {
296 var_alts[*var_id as usize].push(alt_id);
297 }
298 let mut nt_parent: Vec<Vec<VarId>> = vec![vec![]; num_nt];
299 for var_id in 0..num_nt {
300 let top_var_id = parsing_table.get_top_parent(var_id as VarId) as usize;
301 nt_parent[top_var_id].push(var_id as VarId);
302 }
303 let ProdRuleSet { symbol_table, nt_conversion, origin, .. } = ll1_rules;
304 let mut builder = ParserGen {
305 parsing_table,
306 symbol_table: symbol_table.expect(stringify!("symbol table is required to create a {}", std::any::type_name::<Self>())),
307 name,
308 options: ParserGenOptions::default(),
309 nt_values: vec![false; num_nt],
310 nt_parent,
311 var_alts,
312 origin,
313 terminal_hooks: Vec::new(),
314 item_ops: Vec::new(),
315 opcodes: Vec::new(),
316 init_opcodes: Vec::new(),
317 nt_name: Vec::new(),
318 alt_info: Vec::new(),
319 item_info: Vec::new(),
320 child_repeat_endpoints: HashMap::new(),
321 gen_parser: true,
322 span_nbrs: Vec::new(),
323 span_nbrs_sep_list: HashMap::new(),
324 start,
325 nt_conversion,
326 nt_type: HashMap::new(),
327 log: ll1_rules.log,
328 };
329 builder.apply_options();
330 builder.make_opcodes();
331 builder.make_span_nbrs();
332 builder
333 }
334
335 pub fn set_options(&mut self, options: ParserGenOptions) {
336 self.options = options;
337 self.apply_options();
338 }
339
340 fn apply_options(&mut self) {
342 self.apply_nt_value();
343 }
344
345 pub fn set_name(&mut self, name: String) {
346 self.name = name;
347 }
348
349 pub fn get_name(&self) -> &str {
350 &self.name
351 }
352
353 #[inline]
354 pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
355 Some(&self.symbol_table)
356 }
357
358 #[inline]
359 pub fn get_parsing_table(&self) -> &LLParsingTable {
360 &self.parsing_table
361 }
362
363 #[inline]
364 pub fn set_terminal_hooks(&mut self, terminal_hooks: Vec<TokenId>) {
365 if !terminal_hooks.is_empty() {
366 self.options.gen_token_enums = true;
367 }
368 self.terminal_hooks = terminal_hooks;
369 self.add_opcode_hooks();
370 }
371
372 #[inline]
373 pub fn add_header<T: Into<String>>(&mut self, header: T) {
374 self.options.headers.push(header.into());
375 }
376
377 #[inline]
378 pub fn extend_headers<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, headers: I) {
379 self.options.headers.extend(headers.into_iter().map(|s| s.into()));
380 }
381
382 #[inline]
383 pub fn add_lib<T: Into<String>>(&mut self, lib:T) {
384 self.options.used_libs.add(lib);
385 }
386
387 #[inline]
388 pub fn extend_libs<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, libs: I) {
389 self.options.used_libs.extend(libs);
390 }
391
392 #[inline]
393 pub fn add_nt_type<T: Into<String>>(&mut self, org_var: VarId, var_type: T) {
396 let var = self.conv_nt(org_var).unwrap_or_else(|| panic!("var {org_var} doesn't exist"));
397 self.nt_type.insert(var, var_type.into());
398 }
399
400 #[inline]
401 pub fn get_nt_type(&self, v: VarId) -> &str {
402 self.nt_type.get(&v).unwrap().as_str()
403 }
404
405 pub fn set_nt_value(&mut self, nt_value: NTValue) {
407 self.options.nt_value = nt_value;
408 self.apply_nt_value();
409 }
410
411 fn apply_nt_value(&mut self) {
412 let num_nt = self.get_symbol_table().unwrap().get_num_nt() as VarId;
413 let mut stack = vec![&self.options.nt_value];
414 let mut neg_stack = vec![];
415 self.nt_values.fill(false);
416 while let Some(nt_value) = stack.pop() {
417 match nt_value {
418 NTValue::None => {}
419 NTValue::Parents => {
420 for v in 0..num_nt {
421 if self.get_nt_parent(v).is_none() {
422 self.nt_values[v as usize] = true;
423 }
424 }
425 }
426 NTValue::Default => {
427 for v in 0..num_nt {
428 if self.get_nt_parent(v).is_none() || self.nt_has_all_flags(v, ruleflag::CHILD_REPEAT | ruleflag::L_FORM) {
429 self.nt_values[v as usize] = true;
430 }
431 }
432 }
433 NTValue::SetIds(ids) => {
434 for v in ids {
435 if *v < num_nt {
436 self.nt_values[*v as usize] = true;
437 } else {
438 self.log.add_error(format!("setting value of NT #{v}, which doesn't exist"));
439 }
440 }
441 }
442 NTValue::SetNames(names) => {
443 let name_to_id = self.symbol_table.get_nonterminals().index::<VarId>()
444 .map(|(v, name)| (name.as_str(), v))
445 .collect::<HashMap<&str, VarId>>();
446 for name in names {
447 match name.as_str() {
448 NTValue::DEFAULT => stack.push(&NTValue::Default),
449 NTValue::PARENTS => stack.push(&NTValue::Parents),
450 mut nt_name => {
451 let add = if !nt_name.starts_with('-') {
452 true
453 } else {
454 nt_name = &nt_name[1..];
455 false
456 };
457 if let Some(v) = name_to_id.get(nt_name) {
458 if add {
459 self.nt_values[*v as usize] = true;
460 } else {
461 neg_stack.push(*v);
462 }
463 } else {
464 self.log.add_error(format!("setting value of NT '{name}', which doesn't exist"));
465 }
466 }
467 }
468 }
469 }
470 }
471 }
472 for v in neg_stack {
473 self.nt_values[v as usize] = false;
474 }
475 }
476
477 #[inline]
478 pub fn set_nt_has_value(&mut self, v: VarId, has_value: bool) {
479 self.nt_values[v as usize] = has_value;
480 }
481
482 pub fn set_gen_parser(&mut self, gen_parser: bool) {
484 self.gen_parser = gen_parser;
485 }
486
487 pub fn set_gen_wrapper(&mut self, gen_wrapper: bool) {
489 self.options.gen_wrapper = gen_wrapper;
490 }
491
492 pub fn set_indent(&mut self, indent: usize) {
494 self.options.indent = indent;
495 }
496
497 pub fn set_types_indent(&mut self, indent: usize) {
500 self.options.types_indent = indent;
501 }
502
503 pub fn set_listener_indent(&mut self, indent: usize) {
506 self.options.listener_indent = indent;
507 }
508
509 pub fn set_indents(&mut self, wrapper: usize, types: usize, listner: usize) {
512 self.options.indent = wrapper;
513 self.options.types_indent = types;
514 self.options.listener_indent = listner;
515 }
516
517 pub fn set_gen_span_params(&mut self, gen_span_params: bool) {
519 self.options.gen_span_params = gen_span_params;
520 }
521
522 pub fn set_gen_token_enums(&mut self, gen_token_enums: bool) {
543 self.options.gen_token_enums = gen_token_enums;
544 }
545
546 #[inline]
547 pub fn get_nt_parent(&self, v: VarId) -> Option<VarId> {
548 self.parsing_table.parent[v as usize]
549 }
550
551 pub fn set_include_alts(&mut self, include_alts: bool) {
554 self.options.include_alts = include_alts;
555 }
556
557 #[inline]
558 pub fn use_full_lib(&mut self, use_full_lib: bool) {
559 self.options.lib_crate = if use_full_lib { LexigramCrate::Full } else { LexigramCrate::Core };
560 }
561
562 #[inline]
563 pub fn set_crate(&mut self, lcrate: LexigramCrate) {
564 self.options.lib_crate = lcrate;
565 }
566
567 #[cfg(test)] fn get_original_alt_str(&self, a_id: AltId, symbol_table: Option<&SymbolTable>) -> Option<String> {
569 let (_var, f) = &self.parsing_table.alts[a_id as usize];
570 f.get_origin().and_then(|(o_v, o_id)| {
571 Some(format!(
572 "{} -> {}",
573 Symbol::NT(o_v).to_str(symbol_table),
574 grtree_to_str(self.origin.get_tree(o_v).unwrap(), Some(o_id), None, Some(o_v), symbol_table, false)
575 ))
576 })
577 }
578
579 fn conv_nt(&self, org_var: VarId) -> Option<VarId> {
585 match self.nt_conversion.get(&org_var) {
586 None => if (org_var as usize) < self.parsing_table.num_nt { Some(org_var) } else { None },
587 Some(NTConversion::MovedTo(new)) => Some(*new),
588 Some(NTConversion::Removed) => None
589 }
590 }
591
592 #[allow(unused)]
593 fn nt_has_all_flags(&self, var: VarId, flags: u32) -> bool {
594 self.parsing_table.flags[var as usize] & flags == flags
595 }
596
597 #[allow(unused)]
598 fn nt_has_any_flags(&self, var: VarId, flags: u32) -> bool {
599 self.parsing_table.flags[var as usize] & flags != 0
600 }
601
602 #[allow(unused)]
603 fn sym_has_flags(&self, s: &Symbol, flags: u32) -> bool {
604 if let Symbol::NT(nt) = s { self.nt_has_all_flags(*nt, flags) } else { false }
605 }
606
607 #[allow(unused)]
608 fn sym_has_value(&self, symbol: &Symbol) -> bool {
609 match symbol {
610 Symbol::T(t) => self.symbol_table.is_token_data(*t),
611 Symbol::NT(nt) => self.nt_values[*nt as usize],
612 _ => false
613 }
614 }
615
616 fn full_alt_components(&self, a_id: AltId, emphasis: Option<VarId>) -> (String, String) {
617 const VERBOSE: bool = false;
618 if VERBOSE { println!("full_alt_components(a_id = {a_id}):"); }
619 let &(mut v_a, ref alt) = &self.parsing_table.alts[a_id as usize];
620 while self.parsing_table.flags[v_a as usize] & ruleflag::CHILD_L_FACT != 0 {
621 v_a = *self.parsing_table.parent[v_a as usize].as_ref().unwrap();
622 }
623 let symtab = self.get_symbol_table();
624 if let Some(v_emph) = emphasis {
625 let parent_nt = self.parsing_table.get_top_parent(v_emph);
626 if let Some((t_emph, id_emph)) = self.origin.get(v_emph) {
627 return ((Symbol::NT(parent_nt).to_str(symtab)), grtree_to_str(t_emph, None, Some(id_emph), Some(parent_nt), symtab, true));
628 } else {
629 return (Symbol::NT(parent_nt).to_str(symtab), format!("<VAR {v_emph} NOT FOUND>"));
630 }
631 }
632 if let Some((vo, id)) = alt.get_origin() {
633 let t = self.origin.get_tree(vo).unwrap();
634 let flags = self.parsing_table.flags[v_a as usize];
635 if v_a != vo && flags & ruleflag::CHILD_REPEAT != 0 {
636 (
638 String::new(),
639 format!("`{}` {} in `{} -> {}`",
640 grtree_to_str(t, Some(id), None, Some(vo), symtab, true),
641 if flags & ruleflag::L_FORM != 0 { "iteration" } else { "item" },
642 Symbol::NT(vo).to_str(symtab),
643 grtree_to_str(t, None, Some(id), Some(vo), symtab, true))
644 )
645 } else {
646 let root = Some(id);
647 (Symbol::NT(vo).to_str(symtab), grtree_to_str(t, root, None, Some(vo), symtab, true))
648 }
649 } else {
650 (Symbol::NT(v_a).to_str(symtab), format!("<alt {a_id} NOT FOUND>"))
651 }
652 }
653
654 fn full_alt_str(&self, a_id: AltId, emphasis: Option<VarId>, quote: bool) -> String {
656 let (left, right) = self.full_alt_components(a_id, emphasis);
657 if left.is_empty() {
658 right
659 } else {
660 format!("{q}{left} -> {right}{q}", q = if quote { "`" } else { "" })
661 }
662 }
663
664 fn make_opcodes(&mut self) {
665 const VERBOSE: bool = false;
666 self.log.add_note("- making opcodes...");
667 self.opcodes.clear();
668 self.init_opcodes = vec![OpCode::End, OpCode::NT(self.start)];
669 for (alt_id, (var_id, alt)) in self.parsing_table.alts.iter().index() {
670 if VERBOSE {
671 println!("{alt_id}: {}", alt.to_rule_str(*var_id, self.get_symbol_table(), 0));
672 }
673 let flags = self.parsing_table.flags[*var_id as usize];
674 let stack_sym = Symbol::NT(*var_id);
675 let mut new = self.parsing_table.alts[alt_id as usize].1.iter().filter(|s| !s.is_empty()).rev().cloned().to_vec();
676 if VERBOSE { println!(" - {}", new.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
677 let mut opcode = Vec::<OpCode>::new();
678 let mut parent = self.parsing_table.parent[*var_id as usize];
679 if flags & ruleflag::CHILD_L_FACT != 0 {
680 while self.nt_has_all_flags(parent.unwrap(), ruleflag::CHILD_L_FACT) {
681 parent = self.parsing_table.parent[parent.unwrap() as usize];
682 }
683 let parent = parent.unwrap();
684 let parent_r_form_right_rec = self.parsing_table.flags[parent as usize] & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0;
688 if VERBOSE {
689 println!(" - child lfact, parent: {}, !parent_r_form_right_rec = !{parent_r_form_right_rec}, match = {}",
690 Symbol::NT(parent).to_str(self.get_symbol_table()),
691 new.first() == Some(&Symbol::NT(parent)));
692 }
693 if new.first() == Some(&Symbol::NT(parent)) && !parent_r_form_right_rec {
694 opcode.push(OpCode::Loop(parent));
695 new.remove(0);
696 }
697 }
698 let parent_lrec_no_lfact = flags & (ruleflag::PARENT_L_RECURSION | ruleflag::PARENT_L_FACTOR) == ruleflag::PARENT_L_RECURSION;
699 if flags & ruleflag::PARENT_L_FACTOR == 0 ||
700 parent_lrec_no_lfact ||
701 new.iter().all(|s| if let Symbol::NT(ch) = s { !self.nt_has_all_flags(*ch, ruleflag::CHILD_L_FACT) } else { true })
702 {
703 opcode.push(OpCode::Exit(alt_id)); }
712 opcode.extend(new.into_iter().map(OpCode::from));
713 let r_form_right_rec = flags & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0;
714 if VERBOSE { println!(" - r_form_right_rec = {r_form_right_rec} = {} || {}",
715 flags & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0,
716 flags & ruleflag::CHILD_L_FACT != 0 && self.parsing_table.flags[parent.unwrap() as usize] & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0); }
717 if opcode.get(1).map(|op| op.matches(stack_sym)).unwrap_or(false) && !r_form_right_rec {
718 opcode.swap(0, 1);
721 if VERBOSE { println!(" - swap 0, 1: {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
722 } else if parent_lrec_no_lfact {
723 if let Some(OpCode::NT(x)) = opcode.get(1) {
724 if self.nt_has_all_flags(*x, ruleflag::CHILD_L_RECURSION) {
725 opcode.swap(0, 1);
728 if VERBOSE { println!(" - swap 0, 1: {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
729 }
730 }
731 } else if flags & ruleflag::CHILD_INDEPENDENT_AMBIGUITY != 0 && opcode.len() > 1 {
732 if let Some(OpCode::NT(var_prime)) = opcode.get(1) {
734 let vp = *var_prime; if self.nt_has_all_flags(vp, ruleflag::CHILD_AMBIGUITY) {
736 opcode.swap(0, 1);
737 opcode[0] = OpCode::Loop(vp);
738 if VERBOSE { println!(" - child indep ambig: {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
739 }
740 }
741 }
742 if flags & ruleflag::CHILD_L_FACT != 0 && opcode.len() >= 2 {
743 if self.nt_has_all_flags(parent.unwrap(), ruleflag::R_RECURSION | ruleflag::L_FORM)
744 && opcode[1] == OpCode::NT(parent.unwrap())
745 {
746 opcode.swap(0, 1);
747 opcode[0] = OpCode::Loop(parent.unwrap());
748 }
749 let fact_top = self.parsing_table.get_top_parent(*var_id);
750 if VERBOSE {
751 println!(" - check for initial exit swap: opcode = [{}], daddy = {}",
752 opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
753 Symbol::NT(fact_top).to_str(self.get_symbol_table()));
754 }
755 if self.parsing_table.flags[fact_top as usize] & ruleflag::PARENT_L_RECURSION != 0 &&
756 matches!(opcode[0], OpCode::Exit(_)) &&
757 matches!(opcode[1], OpCode::NT(v) if self.parsing_table.flags[v as usize] & ruleflag::CHILD_L_RECURSION != 0)
758 {
759 if VERBOSE {
760 println!(" swapping for initial exit_{}: {} <-> {}",
761 Symbol::NT(fact_top).to_str(self.get_symbol_table()).to_lowercase(),
762 opcode[0].to_str(self.get_symbol_table()),
763 opcode[1].to_str(self.get_symbol_table())
764 );
765 }
766 opcode.swap(0, 1);
767 }
768 }
769 opcode.iter_mut().for_each(|o| {
770 if let OpCode::NT(v) = o {
771 if v == var_id && !r_form_right_rec {
774 *o = OpCode::Loop(*v)
775 }
776 }
777 });
778 if VERBOSE { println!(" -> {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
779 self.opcodes.push(opcode);
780 }
781 }
782
783 fn add_opcode_hooks(&mut self) {
823 const VERBOSE: bool = false;
824 self.log.add_note("- adding hooks into opcodes...");
825 let hooks: HashSet<TokenId> = self.terminal_hooks.iter().cloned().collect();
826 let num_nt = self.parsing_table.num_nt;
827 let num_t = self.parsing_table.num_t;
828 let err = self.parsing_table.alts.len() as AltId;
829 if VERBOSE {
830 self.parsing_table.print(self.get_symbol_table(), 0);
831 println!("num_nt = {num_nt}\nnum_t = {num_t}\ntable: {}", self.parsing_table.table.len());
832 }
833 if VERBOSE { println!("hooks: {}", self.terminal_hooks.iter().map(|t| self.symbol_table.get_t_name(*t)).join(", ")); }
834 let deps: HashSet<VarId> = (0..num_nt as VarId)
835 .filter(|&nt| hooks.iter().any(|&t| self.parsing_table.table[nt as usize * num_t + t as usize] < err))
836 .collect();
837 if VERBOSE { println!("deps = {deps:?} = {}", deps.iter().map(|nt| self.symbol_table.get_nt_name(*nt)).join(", ")); }
838
839 if deps.contains(&self.start) {
841 self.init_opcodes = vec![OpCode::End, OpCode::NT(self.start), OpCode::Hook];
842 }
843 let mut changed = false;
844 for opcodes in self.opcodes.iter_mut() {
845 let mut new = vec![];
846 let n = opcodes.len();
847 for op in &opcodes[..n - 1] {
848 new.push(*op);
849 match op {
850 OpCode::T(t) if hooks.contains(t) => {
851 new.push(OpCode::Hook);
852 }
853 OpCode::NT(nt) | OpCode::Loop(nt) if deps.contains(nt) => {
854 new.push(OpCode::Hook);
855 }
856 _ => {}
857 }
858 }
859 if new.len() + 1 > n {
860 new.push(opcodes[n - 1]);
861 *opcodes = new;
862 changed = true;
863 }
864 }
865 if VERBOSE && changed {
866 println!("new opcodes:");
867 let mut cols = vec![];
868 let tbl = self.get_symbol_table();
869 for (i, (opcodes, (nt, alt))) in self.opcodes.iter().zip(&self.parsing_table.alts).enumerate() {
870 cols.push(vec![
871 i.to_string(),
872 format!("{} -> ", Symbol::NT(*nt).to_str(tbl)),
873 alt.to_str(tbl),
874 opcodes.iter().map(|op| op.to_str(tbl)).join(" "),
875 ]);
876 }
877 println!("{}", indent_source(vec![columns_to_str(cols, None)], 4))
878 }
879 }
880
881 fn make_span_nbrs(&mut self) {
882 self.log.add_note("- making spans...");
883 let mut span_nbrs = vec![0 as SpanNbr; self.parsing_table.alts.len()];
884 for (alt_id, (var_id, _)) in self.parsing_table.alts.iter().enumerate() {
885 let opcode = &self.opcodes[alt_id];
886 let mut span_nbr = span_nbrs[alt_id] + count_span_nbr(opcode);
887 if self.nt_has_any_flags(*var_id, ruleflag::CHILD_REPEAT | ruleflag::CHILD_L_RECURSION) ||
888 self.nt_has_all_flags(*var_id, ruleflag::R_RECURSION | ruleflag::L_FORM) {
889 span_nbr += 1;
891 }
892 if matches!(opcode.first(), Some(OpCode::NT(nt)) if nt != var_id && self.parsing_table.flags[*nt as usize] & ruleflag::CHILD_L_RECURSION != 0) {
893 span_nbr -= 1;
895 }
896 if self.nt_has_all_flags(*var_id, ruleflag::PARENT_L_FACTOR) {
900 if let Some(OpCode::NT(nt)) = opcode.first() {
901 span_nbr -= 1;
902 for a_id in self.var_alts[*nt as usize].iter() {
903 span_nbrs[*a_id as usize] += span_nbr;
904 }
906 span_nbr = 0;
908 }
909 }
910 span_nbrs[alt_id] = span_nbr;
911 }
912 self.span_nbrs = span_nbrs;
913 }
914
915 fn get_group_alts(&self, g: &[VarId]) -> Vec<(VarId, AltId)> {
916 g.iter().flat_map(|c|
917 self.var_alts[*c as usize].iter().map(|a| (*c, *a))
918 ).collect::<Vec<_>>()
919 }
920
921 fn gather_alts(&self, nt: VarId) -> Vec<AltId> {
925 const VERBOSE: bool = false;
926 let mut alt = vec![];
927 let mut explore = VecDeque::<VarId>::new();
928 explore.push_back(nt);
929 while !explore.is_empty() {
930 let var = explore.pop_front().unwrap();
931 if VERBOSE { println!("{var}: alt = {} | explore = {} | alts: {}",
932 alt.iter().join(", "), explore.iter().join(", "),
933 &self.var_alts[var as usize].iter().join(", ")); }
934 for a in &self.var_alts[var as usize] {
935 let (_, alter) = &self.parsing_table.alts[*a as usize];
936 if let Some(Symbol::NT(last)) = alter.symbols().last() {
937 if self.nt_has_all_flags(*last, ruleflag::CHILD_L_FACT) {
938 explore.push_back(*last);
940 continue;
941 }
942 }
943 alt.push(*a);
944 }
945 if VERBOSE { println!(" => alt = {} | explore = {}", alt.iter().join(", "), explore.iter().join(", ")); }
946 }
947 alt
948 }
949
950 fn calc_nt_value(&mut self) {
951 const VERBOSE: bool = false;
952 self.log.add_note("- calculating nonterminals' value...");
953 for g in self.nt_parent.iter().filter(|va| !va.is_empty()) {
955 let group = self.get_group_alts(g);
957 let mut re_evaluate = true;
958 let g_top = g[0];
959 let is_ambig = self.nt_has_all_flags(g_top, ruleflag::PARENT_AMBIGUITY);
960 while re_evaluate {
961 re_evaluate = false;
962 let mut nt_used = HashSet::<VarId>::new();
963 if VERBOSE {
964 let ids = group.iter().map(|(v, _)| *v).collect::<BTreeSet<VarId>>();
965 println!("parent: {}, NT with value: {}",
966 Symbol::NT(g[0]).to_str(self.get_symbol_table()),
967 ids.into_iter().filter_map(|v|
968 if self.nt_values[v as usize] { Some(Symbol::NT(v as VarId).to_str(self.get_symbol_table())) } else { None }
969 ).join(", "));
970 }
971 for (var_id, alt_id) in &group {
972 let mut has_value = false;
974 for s in &self.opcodes[*alt_id as usize] {
975 match s {
976 OpCode::T(t) =>
977 has_value |= self.symbol_table.is_token_data(*t),
978 OpCode::NT(nt) => {
979 let is_ambig_top = is_ambig && self.get_nt_parent(*nt) == Some(g_top)
980 && !self.nt_has_any_flags(*nt, ruleflag::CHILD_L_RECURSION | ruleflag::CHILD_REPEAT);
981 let var = if is_ambig_top { g_top } else { *nt };
982 nt_used.insert(var);
983 has_value |= self.nt_values[var as usize]
984 },
985 _ => {}
986 }
987 }
988 if has_value && self.parsing_table.parent[*var_id as usize].is_some() {
990 let mut child_nt = *var_id as usize;
992 while self.parsing_table.flags[child_nt] & ruleflag::CHILD_REPEAT == 0 {
993 if let Some(parent) = self.parsing_table.parent[child_nt] {
994 child_nt = parent as usize;
995 } else {
996 break;
997 }
998 }
999 if self.parsing_table.flags[child_nt] & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT {
1002 if VERBOSE && !self.nt_values[child_nt] {
1003 print!(" | {} is now valued {}",
1004 Symbol::NT(child_nt as VarId).to_str(self.get_symbol_table()),
1005 if nt_used.contains(&(child_nt as VarId)) { "and was used before" } else { "but wasn't used before" }
1006 );
1007 }
1008 re_evaluate |= !self.nt_values[child_nt] && nt_used.contains(&(child_nt as VarId));
1009 self.nt_values[child_nt] = true;
1010 }
1011 }
1012 }
1013 }
1014 }
1015 }
1016
1017 pub(crate) fn make_item_ops(&mut self) {
1018 const VERBOSE: bool = false;
1019 self.calc_nt_value();
1020 self.log.add_note("- making item ops...");
1021 let info = &self.parsing_table;
1022 let mut items = vec![Vec::<Symbol>::new(); self.parsing_table.alts.len()];
1023 if VERBOSE {
1024 println!("Groups:");
1025 for g in self.nt_parent.iter().filter(|va| !va.is_empty()) {
1026 let group = self.get_group_alts(g);
1027 let ids = group.iter().map(|(v, _)| *v).collect::<BTreeSet<VarId>>();
1028 println!("{}: {}, alts {}",
1029 Symbol::NT(g[0]).to_str(self.get_symbol_table()),
1030 ids.iter().map(|v| Symbol::NT(*v).to_str(self.get_symbol_table())).join(", "),
1031 group.iter().map(|(_, a)| a.to_string()).join(", ")
1032 );
1033 }
1034 }
1035 let mut alts_to_revisit = HashSet::<AltId>::new();
1036 for g in self.nt_parent.iter().filter(|va| !va.is_empty()) {
1038 let group = self.get_group_alts(g);
1040 let g_top = g[0];
1041 let is_ambig = self.nt_has_all_flags(g_top, ruleflag::PARENT_AMBIGUITY);
1042 if VERBOSE {
1043 let ids = group.iter().map(|(v, _)| *v).collect::<BTreeSet<VarId>>();
1044 println!("parent: {}, NT with value: {}",
1045 Symbol::NT(g[0]).to_str(self.get_symbol_table()),
1046 ids.into_iter().filter_map(|v|
1047 if self.nt_values[v as usize] { Some(Symbol::NT(v as VarId).to_str(self.get_symbol_table())) } else { None }
1048 ).join(", "));
1049 }
1050 let g_top_has_value = self.nt_values[g_top as usize];
1051 for (var_id, alt_id) in &group {
1052 let ambig_loop_value = g_top_has_value && is_ambig && self.nt_has_all_flags(*var_id, ruleflag::CHILD_L_RECURSION);
1053 items[*alt_id as usize] = if ambig_loop_value { vec![Symbol::NT(g_top)] } else { vec![] };
1054 }
1055 for (var_id, alt_id) in &group {
1056 let opcode = &self.opcodes[*alt_id as usize];
1057 let (_, alt) = &info.alts[*alt_id as usize];
1058 if VERBOSE {
1059 print!("- {alt_id}: {} -> {} [{}]",
1060 Symbol::NT(*var_id).to_str(self.get_symbol_table()),
1061 alt.to_str(self.get_symbol_table()),
1062 opcode.iter().map(|op| op.to_str(self.get_symbol_table())).join(" "));
1063 }
1064 let flags = info.flags[*var_id as usize];
1065
1066 let mut has_sep_list_child_without_value = false;
1070 let mut values = self.opcodes[*alt_id as usize].iter().rev()
1071 .filter_map(|s| {
1072 let sym_maybe = match s {
1073 OpCode::T(t) => Some(Symbol::T(*t)),
1074 OpCode::NT(nt) => {
1075 let is_ambig_top = is_ambig && self.get_nt_parent(*nt) == Some(g_top)
1076 && !self.nt_has_any_flags(*nt, ruleflag::CHILD_L_RECURSION | ruleflag::CHILD_REPEAT);
1077 let var = if is_ambig_top { g_top } else { *nt };
1078 Some(Symbol::NT(var))
1079 },
1080 _ => {
1081 if VERBOSE { print!(" | {} dropped", s.to_str(self.get_symbol_table())); }
1082 None
1083 }
1084 };
1085 sym_maybe.and_then(|s| {
1086 const REP_MASK: u32 = ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM;
1087 const CHILD_STAR: u32 = ruleflag::CHILD_REPEAT | ruleflag::L_FORM;
1088 let has_value = self.sym_has_value(&s);
1089 if has_value
1090 || matches!(s, Symbol::NT(v) if v != *var_id && self.parsing_table.flags[v as usize] & REP_MASK == CHILD_STAR)
1093 {
1094 if !has_value {
1095 has_sep_list_child_without_value = true;
1096 }
1097 Some(s)
1098 } else {
1099 None
1100 }
1101 })
1102 }).to_vec();
1103 if has_sep_list_child_without_value {
1105 alts_to_revisit.insert(*alt_id);
1107 }
1108 let parent_is_rrec_lfact = !is_ambig && self.nt_has_all_flags(g[0], ruleflag::R_RECURSION | ruleflag::PARENT_L_FACTOR);
1110 if parent_is_rrec_lfact {
1111 if flags & ruleflag::CHILD_L_FACT != 0 && self.nt_has_all_flags(g[0], ruleflag::L_FORM) {
1112 assert!(!self.nt_has_all_flags(*var_id, ruleflag::CHILD_L_FACT | ruleflag::L_FORM), "this was useful after all");
1113 if VERBOSE { print!(" child_rrec_lform_lfact"); }
1114 items[*alt_id as usize].insert(0, Symbol::NT(g[0]));
1115 }
1116 } else {
1117 let sym_maybe = if flags & ruleflag::CHILD_REPEAT != 0 && (self.nt_values[*var_id as usize] || flags & ruleflag::L_FORM != 0) {
1118 Some(Symbol::NT(*var_id))
1119 } else if !is_ambig && flags & ruleflag::CHILD_L_RECURSION != 0 {
1120 let parent = info.parent[*var_id as usize].unwrap();
1121 Some(Symbol::NT(parent))
1122 } else if !is_ambig && flags & (ruleflag::R_RECURSION | ruleflag::L_FORM) == ruleflag::R_RECURSION | ruleflag::L_FORM {
1123 Some(Symbol::NT(*var_id))
1124 } else {
1125 None
1126 };
1127 if let Some(s) = sym_maybe {
1128 if self.sym_has_value(&s) {
1129 if VERBOSE { print!(" | loop => {}", s.to_str(self.get_symbol_table())); }
1130 values.insert(0, s);
1131 }
1132 }
1133 }
1134 if VERBOSE {
1135 println!(" ==> [{}] + [{}]",
1136 items[*alt_id as usize].iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
1137 values.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "));
1138 }
1139 if let Some(OpCode::NT(nt)) = opcode.first() {
1140 let backup = if matches!(values.last(), Some(Symbol::NT(x)) if x == nt) {
1142 Some(values.pop().unwrap())
1143 } else {
1144 None
1145 };
1146 if nt != var_id && self.nt_has_all_flags(*nt, ruleflag::CHILD_L_RECURSION) {
1147 if VERBOSE { println!(" CHILD_L_RECURSION"); }
1148 items[*alt_id as usize].extend(values);
1150 continue;
1151 }
1152 if flags & ruleflag::PARENT_L_FACTOR != 0 {
1153 if VERBOSE {
1154 println!(" PARENT_L_FACTOR: moving {} to child {}",
1155 values.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
1156 Symbol::NT(*nt).to_str(self.get_symbol_table()));
1157 }
1158 let pre = &mut items[*alt_id as usize];
1160 if !pre.is_empty() {
1161 values.splice(0..0, std::mem::take(pre));
1163 }
1164 for a_id in self.var_alts[*nt as usize].iter() {
1165 items[*a_id as usize].extend(values.clone());
1166 }
1172 continue;
1173 }
1174 if let Some(sym) = backup {
1175 values.push(sym);
1176 }
1177 }
1178 items[*alt_id as usize].extend(values);
1179 } }
1181
1182 self.check_sep_list(&mut items);
1185
1186 for alt_id in alts_to_revisit {
1188 items[alt_id as usize].retain(|s| self.sym_has_value(s));
1189 }
1190 self.item_ops = items;
1191
1192 self.log.add_note(
1193 format!(
1194 "NT with value: {}",
1195 self.nt_values.iter().index()
1196 .filter(|&(_, val)| *val)
1197 .map(|(var, _)| Symbol::NT(var).to_str(self.get_symbol_table()))
1198 .join(", ")));
1199 }
1200
1201 fn check_sep_list(&mut self, items: &mut [Vec<Symbol>]) {
1205 const VERBOSE: bool = false;
1243 if VERBOSE {
1244 let log = std::mem::take(&mut self.log);
1245 self.item_ops = items.iter().cloned().to_vec();
1246 self.log_nt_info();
1247 self.log_alt_info();
1248 println!("{}", self.log);
1249 self.item_ops.clear();
1250 self.log = log;
1251 }
1252 self.log.add_note("- determining sep_list nonterminals...");
1253 if VERBOSE { println!("check_sep_list:"); }
1254 for (top_nt, g) in self.nt_parent.iter().enumerate().filter(|va| !va.1.is_empty()) {
1256 let candidate_children = g.iter()
1258 .filter_map(|&var| {
1259 let alts = &self.var_alts[var as usize];
1260 let flags = self.parsing_table.flags[var as usize];
1261 if alts.len() == 2 && flags & (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS) == ruleflag::CHILD_REPEAT {
1263 Some((var, alts[0] as usize, flags))
1264 } else {
1265 None
1266 }
1267 })
1268 .to_vec(); for &(c_var, c_alt_id, _c_flags) in &candidate_children {
1270 let has_value = self.nt_values[c_var as usize];
1271 let skip_loop_nt = if has_value { 1 } else { 0 }; let mut pattern = items[c_alt_id].iter().skip(skip_loop_nt).cloned().to_vec();
1273 if VERBOSE {
1274 println!(
1275 "? {} {c_alt_id}: pattern = {}",
1276 Symbol::NT(c_var).to_str(self.get_symbol_table()),
1277 pattern.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
1278 if !pattern.is_empty() {
1279 let pattern_len = pattern.len();
1280 let pattern_copy = pattern.clone();
1281 let c_sym = Symbol::NT(c_var);
1282 let (p_var, _p_alt_id, p_alt, mut p_pos) = self.nt_parent[top_nt].iter()
1284 .flat_map(|&p_var| &self.var_alts[p_var as usize])
1285 .filter_map(|&p_alt_id| {
1286 let (p_var, p_alt) = &self.parsing_table.alts[p_alt_id as usize];
1287 if *p_var != c_var {
1288 p_alt.v.iter().position(|s| s == &c_sym).map(|p_pos| (*p_var, p_alt_id as usize, p_alt, p_pos))
1289 } else {
1290 None
1291 }
1292 })
1293 .next()
1294 .unwrap_or_else(|| panic!("NT {c_var} alt {c_alt_id} should have a parent's alt that includes it"));
1295 if p_pos > 0 {
1296 p_pos -= 1; let c_alt = &self.parsing_table.alts[c_alt_id].1.v;
1299 let mut c_pos = c_alt.len() - 2; let p_pos0 = p_pos;
1301 let mut span_nbr = 0;
1302 while !pattern.is_empty() {
1303 if p_alt[p_pos] == c_alt[c_pos] {
1304 span_nbr += 1;
1305 if self.sym_has_value(&c_alt[c_pos]) {
1306 pattern.pop();
1307 }
1308 if c_pos == 0 || p_pos == 0 {
1309 break;
1310 }
1311 c_pos -= 1;
1312 p_pos -= 1;
1313 } else {
1314 break;
1315 }
1316 }
1317 if pattern.is_empty() {
1318 let exit_alts = self.gather_alts(p_var);
1319 let mut found_pos = vec![];
1325 let all_match = exit_alts.into_iter().all(|a| {
1326 let a_items = &items[a as usize];
1327 if let Some(p) = a_items.iter().position(|s| *s == c_sym) {
1328 if p >= pattern_len && a_items[p - pattern_len..p] == pattern_copy {
1330 found_pos.push((a as usize, p));
1331 true
1332 } else {
1333 false
1335 }
1336 } else {
1337 true
1338 }
1339 });
1340 if all_match {
1341 if VERBOSE {
1342 println!("- match:");
1343 println!(" c[{c_alt_id}]: {} items: {}",
1344 c_alt.iter().map(|s| s.to_str_quote(self.get_symbol_table())).join(" "),
1345 items[c_alt_id].iter().map(|s| s.to_str_quote(self.get_symbol_table())).join(" "));
1346 }
1347 for (p_alt_id, pos) in found_pos {
1348 if VERBOSE {
1349 println!(" p[{p_alt_id}]: {} items: {}",
1350 p_alt.iter().map(|s| s.to_str_quote(self.get_symbol_table())).join(" "),
1351 items[p_alt_id].iter().map(|s| s.to_str_quote(self.get_symbol_table())).join(" "));
1352 println!(
1353 " c_alt_id = {c_alt_id}, p_alt_id = {p_alt_id}, p_pos0 = {p_pos0}, span_nbr = {span_nbr}, pos = {pos} => remove [{}..{}]",
1354 pos - pattern_len, pos);
1355 }
1356 self.span_nbrs[p_alt_id] -= span_nbr as SpanNbr;
1357 self.span_nbrs_sep_list.insert(c_alt_id as AltId, span_nbr as SpanNbr);
1358 items[p_alt_id].drain(pos - pattern_len..pos);
1359 if VERBOSE {
1360 println!(" => p items: {}", items[p_alt_id].iter().map(|s| s.to_str_quote(self.get_symbol_table())).join(" "));
1361 }
1362 self.parsing_table.flags[c_var as usize] |= ruleflag::SEP_LIST;
1363 }
1364 }
1365 }
1366 }
1367 }
1368 }
1369 }
1370 }
1371
1372 fn sort_alt_ids(&self, top_nt: VarId, alts: &[AltId]) -> Vec<AltId> {
1373 const VERBOSE: bool = false;
1374 if VERBOSE {
1375 println!(" sorting {} alts {alts:?}", Symbol::NT(top_nt).to_str(self.get_symbol_table()));
1376 for &a_id in alts {
1377 let &(_nt, ref alt) = &self.parsing_table.alts[a_id as usize];
1378 if let Some((v, id)) = alt.origin {
1379 let tree = &self.origin.trees[v as usize];
1380 println!(" [{a_id}] id = {},{id} -> {} <-> {}",
1381 Symbol::NT(v).to_str(self.get_symbol_table()),
1382 crate::grammar::grtree_to_str_ansi(tree, None, Some(id), Some(v), self.get_symbol_table(), false),
1383 tree.to_str_index(None, self.get_symbol_table())
1384 );
1385 assert_eq!(v, top_nt, "v = {}, top_nt = {}", Symbol::NT(v).to_str(self.get_symbol_table()), Symbol::NT(top_nt).to_str(self.get_symbol_table()));
1386 }
1387 }
1388 }
1389 let mut sorted = vec![];
1390 let mut ids = alts.iter().filter_map(|&alt_id| self.parsing_table.alts[alt_id as usize].1.origin.map(|(_var, id)| (id, alt_id)))
1391 .collect::<HashMap<_, _>>();
1392 let tree = &self.origin.trees[top_nt as usize];
1393 for node in tree.iter_post_depth() {
1394 if let Some((_, alt_id)) = ids.remove_entry(&node.index) {
1395 sorted.push(alt_id);
1396 }
1397 }
1398 if VERBOSE { println!(" -> {sorted:?}"); }
1399 sorted
1400 }
1401
1402 fn get_type_info(&mut self) {
1459 const VERBOSE: bool = false;
1460
1461 self.log.add_note("- determining item_info...");
1462 let pinfo = &self.parsing_table;
1463 let mut nt_upper_fixer = NameFixer::new();
1464 let mut nt_lower_fixer = NameFixer::new();
1465 let mut nt_plower_fixer = NameFixer::new_empty(); let nt_name: Vec<(String, String, String)> = (0..pinfo.num_nt).map(|v| {
1467 let name = self.symbol_table.get_nt_name(v as VarId);
1468 let nu = nt_upper_fixer.get_unique_name(name.to_camelcase());
1469 let nl = nt_lower_fixer.get_unique_name(nu.to_underscore_lowercase());
1470 let npl = nt_plower_fixer.get_unique_name(nu.to_underscore_lowercase());
1471 (nu, nl, npl)
1472 }).to_vec();
1473
1474 let mut alt_info: Vec<Option<(VarId, String)>> = vec![None; pinfo.alts.len()];
1475 let mut nt_repeat = HashMap::<VarId, Vec<ItemInfo>>::new();
1476 let mut item_info: Vec<Vec<ItemInfo>> = vec![vec![]; pinfo.alts.len()];
1477 let mut child_repeat_endpoints = HashMap::<VarId, Vec<AltId>>::new();
1478 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
1479 let is_ambig = self.nt_has_any_flags(group[0], ruleflag::PARENT_AMBIGUITY);
1480 let mut is_ambig_1st_child = is_ambig;
1481 let mut alt_info_to_sort = HashMap::<VarId, Vec<AltId>>::new();
1482 for var in group {
1483 let nt = *var as usize;
1484 let nt_flags = pinfo.flags[nt];
1485 if is_ambig && (nt_flags & ruleflag::PARENT_L_RECURSION != 0 || (nt_flags & ruleflag::CHILD_L_RECURSION != 0 && !is_ambig_1st_child)) {
1486 continue;
1487 }
1488 if nt_flags & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT {
1489 let is_plus = nt_flags & ruleflag::REPEAT_PLUS != 0;
1492 let mut endpoints = self.gather_alts(*var);
1493 if VERBOSE { println!("** {} endpoints: {endpoints:?} ", Symbol::NT(*var).to_str(self.get_symbol_table())); }
1494 if is_plus {
1495 endpoints = endpoints.chunks(2).map(|slice| slice[0]).to_vec();
1498 } else {
1499 endpoints.retain(|e| !pinfo.alts[*e as usize].1.is_sym_empty());
1501 }
1502 assert!(!endpoints.is_empty());
1503 let endpoints = self.sort_alt_ids(group[0], &endpoints);
1504 child_repeat_endpoints.insert(*var, endpoints);
1505 }
1506 for &alt_id in &self.var_alts[nt] {
1507 let i = alt_id as usize;
1508 if is_ambig_1st_child && pinfo.alts[i].1.is_sym_empty() {
1509 continue;
1510 }
1511 let item_ops = &self.item_ops[alt_id as usize];
1512 let mut indices = HashMap::<Symbol, (String, Option<usize>)>::new();
1517 let mut fixer = NameFixer::new();
1518 let mut owner = pinfo.alts[i].0;
1519 while let Some(parent) = pinfo.parent[owner as usize] {
1520 if pinfo.flags[owner as usize] & ruleflag::CHILD_REPEAT != 0 {
1521 break;
1526 }
1527 owner = parent;
1528 }
1529 let is_nt_child_repeat = pinfo.flags[owner as usize] & ruleflag::CHILD_REPEAT != 0;
1530 for s in item_ops {
1531 if let Some((_, c)) = indices.get_mut(s) {
1532 *c = Some(0);
1533 } else {
1534 let name = if let Symbol::NT(vs) = s {
1535 let flag = pinfo.flags[*vs as usize];
1536 if flag & ruleflag::CHILD_REPEAT != 0 {
1537 let inside_alt_id = self.var_alts[*vs as usize][0];
1538 let inside_alt = &pinfo.alts[inside_alt_id as usize].1;
1539 if false {
1540 let mut plus_name = inside_alt.symbols()[0].to_str(self.get_symbol_table()).to_underscore_lowercase();
1542 plus_name.push_str(if flag & ruleflag::REPEAT_PLUS != 0 { "_plus" } else { "_star" });
1543 plus_name
1544 } else if is_nt_child_repeat && indices.is_empty() {
1545 if flag & ruleflag::REPEAT_PLUS != 0 { "plus_acc".to_string() } else { "star_acc".to_string() }
1547 } else {
1548 if flag & ruleflag::REPEAT_PLUS != 0 { "plus".to_string() } else { "star".to_string() }
1550 }
1551 } else {
1552 nt_name[*vs as usize].clone().1
1553 }
1554 } else {
1555 s.to_str(self.get_symbol_table()).to_lowercase()
1556 };
1557 indices.insert(*s, (fixer.get_unique_name(name), None));
1558 }
1559 }
1560
1561 let has_lfact_child = nt_flags & ruleflag::PARENT_L_FACTOR != 0 &&
1564 pinfo.alts[i].1.symbols().iter().any(|s| matches!(s, &Symbol::NT(c) if pinfo.flags[c as usize] & ruleflag::CHILD_L_FACT != 0));
1565
1566 let is_hidden_repeat_child = pinfo.flags[owner as usize] & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT;
1569
1570 let is_alt_sym_empty = self.is_alt_sym_empty(alt_id);
1572
1573 let is_duplicate = i > 0 && self.nt_has_all_flags(owner, ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) &&
1576 is_alt_sym_empty;
1577
1578 let is_last_empty_iteration = (nt_flags & ruleflag::CHILD_L_RECURSION != 0
1579 || self.nt_has_all_flags(*var, ruleflag::CHILD_REPEAT | ruleflag::L_FORM)) && is_alt_sym_empty;
1580
1581 let has_context = !has_lfact_child && !is_hidden_repeat_child && !is_duplicate && !is_last_empty_iteration;
1582 if VERBOSE {
1583 println!("NT {nt}, alt {alt_id}: has_lfact_child = {has_lfact_child}, is_hidden_repeat_child = {is_hidden_repeat_child}, \
1584 is_duplicate = {is_duplicate}, is_last_empty_iteration = {is_last_empty_iteration} => has_context = {has_context}");
1585 }
1586 if has_context {
1587 alt_info_to_sort.entry(owner)
1588 .and_modify(|v| v.push(alt_id))
1589 .or_insert_with(|| vec![alt_id]);
1590 }
1591 let has_owner_value = self.nt_values[owner as usize];
1592 item_info[i] = if item_ops.is_empty() && nt_flags & ruleflag::CHILD_L_RECURSION != 0 {
1593 if has_owner_value {
1595 vec![ItemInfo {
1596 name: nt_name[owner as usize].1.clone(),
1597 sym: Symbol::NT(owner),
1598 owner,
1599 index: None,
1600 }]
1601 } else {
1602 vec![]
1603 }
1604 } else {
1605 let is_rrec_lform = self.nt_has_all_flags(owner, ruleflag::R_RECURSION | ruleflag::L_FORM);
1606 let skip = if (is_nt_child_repeat || is_rrec_lform) && has_owner_value { 1 } else { 0 };
1607 let mut infos = item_ops.iter()
1608 .skip(skip)
1609 .map(|s| {
1610 let index = if let Some((_, Some(index))) = indices.get_mut(s) {
1611 let idx = *index;
1612 *index += 1;
1613 Some(idx)
1614 } else {
1615 None
1616 };
1617 ItemInfo {
1618 name: indices[s].0.clone(),
1619 sym: *s,
1620 owner,
1621 index,
1622 }
1623 }).to_vec();
1624 if self.nt_has_all_flags(owner, ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) {
1625 let last_name = fixer.get_unique_name("last_iteration".to_string());
1627 infos.push(ItemInfo {
1628 name: last_name,
1629 sym: Symbol::Empty, owner,
1631 index: None,
1632 });
1633 };
1634 if is_nt_child_repeat && !infos.is_empty() && !nt_repeat.contains_key(&owner) {
1635 nt_repeat.insert(owner, infos.clone());
1636 }
1637 infos
1638 }
1639 } if is_ambig && nt_flags & ruleflag::CHILD_L_RECURSION != 0 {
1641 is_ambig_1st_child = false;
1642 }
1643 } if VERBOSE { println!("alt_info_to_sort = {alt_info_to_sort:?}"); }
1645 for (owner, alts) in alt_info_to_sort {
1646 for (num, alt) in self.sort_alt_ids(group[0], &alts).into_iter().index_start(1) {
1647 alt_info[alt as usize] = Some((owner, format!("V{num}")));
1648 }
1649 }
1650 } if VERBOSE {
1653 println!("NT names: {}", nt_name.iter()
1654 .map(|(u, l, pl)| format!("{u}/{l}/{pl}"))
1655 .join(", "));
1656 println!("alt info:");
1657 for (alt_id, alt_names) in alt_info.iter().enumerate() {
1658 if let Some((v, name)) = alt_names {
1659 println!("- alt {alt_id}, NT {v} {}, Ctx name: {name}", Symbol::NT(*v).to_str(self.get_symbol_table()));
1660 }
1661 }
1662 println!();
1663 println!("nt_name: {nt_name:?}");
1664 println!("alt_info: {alt_info:?}");
1665 println!("item_info:");
1666 for (i, item) in item_info.iter().enumerate().filter(|(_, item)| !item.is_empty()) {
1667 println!("- {i}: {{ {} }}", item.iter()
1668 .map(|ii| format!("{}{} ({})", ii.name, ii.index.map(|i| format!("[{i}]")).unwrap_or(String::new()), ii.sym.to_str(self.get_symbol_table())))
1669 .join(", "));
1670 }
1671 println!("item_info: {item_info:?}");
1672 println!("child_repeat_endpoints: {child_repeat_endpoints:?}");
1673 }
1674 self.nt_name = nt_name;
1675 self.alt_info = alt_info;
1676 self.item_info = item_info;
1677 self.child_repeat_endpoints = child_repeat_endpoints;
1678 }
1679
1680 pub fn gen_source_code(&mut self) -> (String, String, String) {
1685 self.log.add_note("generating source code...");
1686 if !self.log.has_no_errors() {
1687 return (String::new(), String::new(), String::new());
1688 }
1689 let mut parts = vec![];
1694 if !self.options.headers.is_empty() {
1695 parts.push(self.options.headers.clone());
1696 }
1697 let mut tmp_parts = if self.gen_parser {
1698 vec![self.source_build_parser()]
1699 } else {
1700 vec![]
1701 };
1702 let (src_types, src_listener) = if self.options.gen_wrapper {
1703 self.make_item_ops();
1704 let (src_wrapper, src_types, src_listener) = self.source_wrapper();
1705 tmp_parts.push(src_wrapper);
1706 (
1707 indent_source(vec![src_types], self.options.types_indent),
1708 indent_source(vec![src_listener], self.options.listener_indent)
1709 )
1710 } else {
1711 (String::new(), String::new())
1712 };
1713 self.log_nt_info();
1714 self.log_alt_info();
1715 parts.push(self.source_use());
1716 parts.extend(tmp_parts);
1717 (indent_source(parts, self.options.indent), src_types, src_listener)
1719 }
1720
1721 pub fn try_gen_source_code(mut self) -> Result<(BufLog, String, String, String), BuildError> {
1722 let (src, src_types, src_listener) = self.gen_source_code();
1723 if self.log.has_no_errors() {
1724 Ok((self.give_log(), src, src_types, src_listener))
1725 } else {
1726 Err(BuildError::new(self.give_log(), BuildErrorSource::ParserGen))
1727 }
1728 }
1729
1730 fn source_use(&self) -> Vec<String> {
1731 self.options.used_libs.gen_source_code()
1732 }
1733
1734 fn source_build_parser(&mut self) -> Vec<String> {
1735 static BASE_PARSER_LIBS: [&str; 5] = [
1736 "::VarId",
1737 "::AltId",
1738 "::parser::OpCode",
1739 "::parser::Parser",
1740 "::fixed_sym_table::FixedSymTable",
1741 ];
1742 static ALT_PARSER_LIBS: [&str; 2] = [
1743 "::alt::Alternative",
1744 "::parser::Symbol",
1745 ];
1746
1747 self.log.add_note("generating build_parser source...");
1748 let num_nt = self.symbol_table.get_num_nt();
1749 let num_t = self.symbol_table.get_num_t();
1750 self.options.used_libs.extend(BASE_PARSER_LIBS.into_iter().map(|s| format!("{}{s}", self.options.lib_crate)));
1751 self.log.add_note(format!("- creating symbol tables: {num_t} terminals, {num_nt} nonterminals"));
1752 let mut src = vec![
1753 format!("const PARSER_NUM_T: usize = {num_t};"),
1754 format!("const PARSER_NUM_NT: usize = {num_nt};"),
1755 format!("static SYMBOLS_T: [(&str, Option<&str>); PARSER_NUM_T] = [{}];",
1756 self.symbol_table.get_terminals().map(|(s, os)|
1757 format!("(\"{s}\", {})", os.as_ref().map(|s| format!("Some({s:?})")).unwrap_or("None".to_string()))).join(", ")),
1758 format!("static SYMBOLS_NT: [&str; PARSER_NUM_NT] = [{}];",
1759 self.symbol_table.get_nonterminals().map(|s| format!("{s:?}")).join(", ")),
1760 format!("static ALT_VAR: [VarId; {}] = [{}];",
1761 self.parsing_table.alts.len(),
1762 self.parsing_table.alts.iter().map(|(v, _)| format!("{v}")).join(", ")),
1763 ];
1764 if self.options.include_alts {
1765 self.options.used_libs.extend(ALT_PARSER_LIBS.into_iter().map(|s| format!("{}{s}", self.options.lib_crate)));
1766 src.push(format!("static ALTERNATIVES: [&[Symbol]; {}] = [{}];",
1767 self.parsing_table.alts.len(),
1768 self.parsing_table.alts.iter().map(|(_, f)| format!("&[{}]", f.iter().map(symbol_to_code).join(", "))).join(", ")));
1769 }
1770 self.log.add_note(format!("- creating parsing tables: {} items, {} opcodes", self.parsing_table.table.len(), self.opcodes.len()));
1771 src.extend(vec![
1772 format!(
1773 "static PARSING_TABLE: [AltId; {}] = [{}];",
1774 self.parsing_table.table.len(),
1775 self.parsing_table.table.iter().map(|v| format!("{v}")).join(", ")),
1776 format!(
1777 "static OPCODES: [&[OpCode]; {}] = [{}];",
1778 self.opcodes.len(),
1779 self.opcodes.iter().map(|strip| format!("&[{}]", strip.iter().map(|op| format!("OpCode::{op:?}")).join(", "))).join(", ")),
1780 format!(
1781 "static INIT_OPCODES: [OpCode; {}] = [{}];",
1782 self.init_opcodes.len(),
1783 self.init_opcodes.iter().map(|op| format!("OpCode::{op:?}")).join(", ")),
1784 format!("static START_SYMBOL: VarId = {};\n", self.start),
1785 ]);
1786 if self.options.gen_token_enums {
1787 src.add_space();
1788 src.push("#[derive(Clone, Copy, PartialEq, Debug)]".to_string());
1789 src.push("#[repr(u16)]".to_string());
1790 src.push("pub enum Term {".to_string());
1791 let cols = self.symbol_table.get_terminals().enumerate()
1792 .map(|(t, (s, s_opt))| vec![
1793 format!(" #[doc = \"{}\"]", if let Some(so) = s_opt { format!("'{so}'") } else { "(variable)".to_string() }),
1796 format!("{s} = {t},", )])
1797 .to_vec();
1798 src.extend(columns_to_str(cols, Some(vec![16, 0])));
1799 src.push("}\n".to_string());
1800 src.push("#[derive(Clone, Copy, PartialEq, Debug)]".to_string());
1801 src.push("#[repr(u16)]".to_string());
1802 src.push("pub enum NTerm {".to_string());
1803 let cols = self.symbol_table.get_nonterminals().index()
1804 .map(|(t, s)| vec![
1805 format!(
1806 " #[doc = \"`{s}`{}\"]",
1807 if let Some(p) = self.get_nt_parent(t) {
1808 format!(", parent: `{}`", Symbol::NT(p).to_str(self.get_symbol_table()))
1809 } else {
1810 String::new()
1811 }),
1812 format!("{} = {t},", s.to_camelcase())])
1813 .to_vec();
1814 src.extend(columns_to_str(cols, Some(vec![16, 0])));
1815 src.push("}\n".to_string());
1816 src.push("pub fn get_term_name(t: TokenId) -> (&'static str, Option<&'static str>) {".to_string());
1817 src.push(" SYMBOLS_T[t as usize]".to_string());
1818 src.push("}\n".to_string());
1819 }
1820 src.extend(vec![
1821 "pub fn build_parser() -> Parser<'static> {{".to_string(),
1822 " let symbol_table = FixedSymTable::new(".to_string(),
1823 " SYMBOLS_T.into_iter().map(|(s, os)| (s.to_string(), os.map(|s| s.to_string()))).collect(),".to_string(),
1824 " SYMBOLS_NT.into_iter().map(|s| s.to_string()).collect()".to_string(),
1825 " );".to_string(),
1826 " Parser::new(".to_string(),
1827 " PARSER_NUM_NT, PARSER_NUM_T + 1,".to_string(),
1828 " &ALT_VAR,".to_string(),
1829 if self.options.include_alts {
1830 " ALTERNATIVES.into_iter().map(|s| Alternative::new(s.to_vec())).collect(),".to_string()
1831 } else {
1832 " Vec::new(),".to_string()
1833 },
1834 " OPCODES.into_iter().map(|strip| strip.to_vec()).collect(),".to_string(),
1835 " INIT_OPCODES.to_vec(),".to_string(),
1836 " &PARSING_TABLE,".to_string(),
1837 " symbol_table,".to_string(),
1838 " START_SYMBOL".to_string(),
1839 " )".to_string(),
1840 "}}".to_string(),
1841 ]);
1842 src
1843 }
1844
1845 fn get_info_type(&self, infos: &[ItemInfo], info: &ItemInfo) -> String {
1846 let type_name_base = match info.sym {
1847 Symbol::T(_) => "String".to_string(),
1848 Symbol::NT(vs) => self.get_nt_type(vs).to_string(),
1849 Symbol::Empty => "bool".to_string(),
1850 _ => panic!("unexpected symbol {}", info.sym)
1851 };
1852 if info.index.is_some() {
1853 let nbr = infos.iter()
1854 .map(|nfo| if nfo.sym == info.sym { nfo.index.unwrap() } else { 0 })
1855 .max().unwrap() + 1;
1856 format!("[{type_name_base}; {nbr}]")
1857 } else {
1858 type_name_base
1859 }
1860 }
1861
1862 fn source_infos(&self, infos: &[ItemInfo], add_pub: bool, add_type: bool) -> String {
1864 let pub_str = if add_pub { "pub " } else { "" };
1865 infos.iter()
1866 .filter_map(|info| {
1867 if info.index.is_none() || info.index == Some(0) {
1868 let type_name = if add_type {
1869 format!(": {}", self.get_info_type(infos, info))
1870 } else {
1871 String::new()
1872 };
1873 Some(format!("{pub_str}{}{type_name}", info.name))
1874 } else {
1875 None
1876 }
1877 }).join(", ")
1878 }
1879
1880 fn is_alt_sym_empty(&self, a_id: AltId) -> bool {
1881 self.parsing_table.alts[a_id as usize].1.is_sym_empty()
1882 }
1883
1884 fn make_match_choices(&self, alts: &[AltId], name: &str, flags: u32, no_method: bool, force_id: Option<AltId>) -> (bool, Vec<String>) {
1886 assert!(!alts.is_empty(), "alts cannot be empty");
1887 let discarded = if !no_method && flags & (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT { 1 } else { 0 };
1892
1893 let is_plus_no_lform = flags & (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) == (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS);
1897 let is_alt_id_threshold = if is_plus_no_lform { 2 } else { 1 };
1898 let is_alt_id = force_id.is_none() && alts.len() - discarded > is_alt_id_threshold;
1899
1900 let mut choices = Vec::<String>::new();
1901 let force_id_str = force_id.map(|f| f.to_string()).unwrap_or_default();
1902 if alts.len() - discarded == 1 {
1903 if no_method {
1904 choices.push(format!(" {} => {{}}", alts[0]));
1905 } else {
1906 choices.push(format!(" {} => self.{name}({force_id_str}),", alts[0]));
1907 }
1908 } else {
1909 let last = alts.len() - 1 - discarded;
1910 choices.extend((0..last).map(|i| format!(" {} |", alts[i])));
1911 if no_method {
1912 choices.push(format!(" {} => {{}}", alts[last]));
1913 } else {
1914 choices.push(format!(" {} => self.{name}({}{force_id_str}),",
1915 alts[last],
1916 if is_alt_id { "alt_id" } else { "" }));
1917 }
1918 }
1919 if discarded == 1 {
1920 choices.push(format!(" {} => {{}}", alts.last().unwrap()));
1921 }
1922 (is_alt_id, choices)
1923 }
1924
1925 fn gen_match_item<F: FnOnce() -> String>(&self, common: String, span_only: F) -> String {
1929 if self.options.gen_span_params {
1930 let span_code = span_only();
1931 format!("({span_code}, {common})")
1932 } else {
1933 common
1934 }
1935 }
1936
1937 fn get_var_param(item: &ItemInfo, indices: &HashMap<Symbol, Vec<String>>, non_indices: &mut Vec<String>) -> Option<String> {
1938 if let Some(index) = item.index {
1939 if index == 0 {
1940 Some(format!("{}: [{}]", item.name, indices[&item.sym].iter().rev().join(", ")))
1941 } else {
1942 None
1943 }
1944 } else {
1945 let name = non_indices.pop().unwrap();
1946 if name == item.name {
1947 Some(name)
1948 } else {
1949 Some(format!("{}: {name}", item.name))
1950 }
1951 }
1952 }
1953
1954 fn get_var_params(item_info: &[ItemInfo], skip: usize, indices: &HashMap<Symbol, Vec<String>>, non_indices: &mut Vec<String>) -> String {
1955 item_info.iter().skip(skip).filter_map(|item| {
1956 Self::get_var_param(item, indices, non_indices)
1957 }).join(", ")
1958 }
1959
1960 fn source_lets(infos: &[ItemInfo], nt_name: &[(String, String, String)], indent: &str, last_alt_id_maybe: Option<AltId>) -> (Vec<String>, String) {
1961 let mut src_let = vec![];
1962 let mut var_fixer = NameFixer::new();
1963 let mut indices = HashMap::<Symbol, Vec<String>>::new();
1964 let mut non_indices = Vec::<String>::new();
1965 for item in infos.iter().rev() {
1966 let varname = if let Some(index) = item.index {
1967 let name = var_fixer.get_unique_name(format!("{}_{}", item.name, index + 1));
1968 indices.entry(item.sym).and_modify(|v| v.push(name.clone())).or_insert(vec![name.clone()]);
1969 name
1970 } else {
1971 let name = item.name.clone();
1972 non_indices.push(name.clone());
1973 name
1974 };
1975 if item.sym.is_empty() {
1976 src_let.push(format!("{indent}let {varname} = alt_id == {};", last_alt_id_maybe.unwrap()));
1977 } else if let Symbol::NT(v) = item.sym {
1978 src_let.push(format!("{indent}let {varname} = self.stack.pop().unwrap().get_{}();", nt_name[v as usize].2));
1979 } else {
1980 src_let.push(format!("{indent}let {varname} = self.stack_t.pop().unwrap();"));
1981 }
1982 }
1983 let src_struct = Self::get_var_params(infos, 0, &indices, &mut non_indices);
1984 (src_let, src_struct)
1985 }
1986
1987 fn source_update_span(n: &str) -> Vec<String> {
1988 vec![
1989 format!(" let spans = self.stack_span.drain(self.stack_span.len() - {n} ..).collect::<Vec<_>>();"),
1990 " self.stack_span.push(spans.iter().fold(PosSpan::empty(), |acc, sp| acc + sp));".to_string(),
1991 ]
1992 }
1993
1994 fn source_child_repeat_lets(
1999 &self,
2000 endpoints: &[AltId],
2001 item_info: &[Vec<ItemInfo>],
2002 is_plus: bool,
2003 nt_name: &[(String, String, String)],
2004 fn_name: &str,
2005 nu: &str,
2006 is_init: bool,
2007 ) -> (Vec<String>, String)
2008 {
2009 let mut src_val = vec![];
2017 let val_name = if endpoints.len() > 1 {
2018 src_val.push(format!(" let {} = match alt_id {{", self.gen_match_item("val".to_string(), || "n".to_string())));
2020 for (i, &a_id) in endpoints.iter().index_start(1) {
2021 let infos = &item_info[a_id as usize];
2022 src_val.push(format!(" {a_id}{} => {{", if is_plus { format!(" | {}", a_id + 1) } else { String::new() }));
2023 let (src_let, src_struct) = Self::source_lets(infos, nt_name, " ", None);
2024 src_val.extend(src_let);
2025 let return_value = self.gen_match_item(
2026 format!("Syn{nu}Item::V{i} {{ {} }}", src_struct),
2027 || self.span_nbrs[a_id as usize].to_string());
2028 src_val.push(format!(" {return_value}"));
2029 src_val.push(" }".to_string());
2030 }
2031 src_val.push(format!(" _ => panic!(\"unexpected alt id {{alt_id}} in fn {fn_name}\"),"));
2032 src_val.push(" };".to_string());
2033 if self.options.gen_span_params {
2034 src_val.extend(Self::source_update_span("n"));
2035 }
2036 "val".to_string()
2037 } else {
2038 let a_id = endpoints[0];
2040 if self.options.gen_span_params {
2041 let span_nbr = if is_init {
2042 *self.span_nbrs_sep_list.get(&a_id).unwrap()
2043 } else {
2044 self.span_nbrs[a_id as usize]
2045 };
2046 src_val.extend(Self::source_update_span(&span_nbr.to_string()));
2047 }
2048 let infos = &item_info[a_id as usize];
2049 let (src_let, src_struct) = Self::source_lets(infos, nt_name, " ", None);
2050 src_val.extend(src_let);
2051 if infos.len() == 1 {
2052 infos[0].name.clone()
2054 } else {
2055 src_val.push(format!(" let val = Syn{nu}Item {{ {} }};", src_struct));
2057 "val".to_string()
2058 }
2059 };
2060 (src_val, val_name)
2061 }
2062
2063 fn source_wrapper(&mut self) -> (Vec<String>, Vec<String>, Vec<String>) {
2068 const VERBOSE: bool = false;
2069 const MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS: bool = false;
2070
2071 static PARSER_LIBS: [&str; 9] = [
2072 "::VarId", "::parser::Call", "::parser::ListenerWrapper",
2073 "::AltId", "::log::Logger", "::TokenId", "::lexer::PosSpan",
2074 "::parser::Terminate", "::log::LogMsg"
2075 ];
2076
2077 self.log.add_note("generating wrapper source...");
2078 self.options.used_libs.extend(PARSER_LIBS.into_iter().map(|s| format!("{}{s}", self.options.lib_crate)));
2079
2080 self.get_type_info();
2081 let pinfo = &self.parsing_table;
2082
2083 for (v, name) in self.nt_name.iter().enumerate().filter(|(v, _)| self.nt_values[*v]) {
2085 let v = v as VarId;
2086 self.nt_type.entry(v).or_insert_with(|| format!("Syn{}", name.0));
2087 }
2088
2089 let mut src = vec![];
2090
2091 let mut nt_contexts = self.source_wrapper_ctx::<VERBOSE>(&mut src);
2093
2094 let (src_types, syns) = self.source_wrapper_types::<VERBOSE>(&mut src);
2096
2097 let mut exit_fixer = NameFixer::new();
2099 let mut span_init = HashSet::<VarId>::new();
2100 let src_skel = vec![
2101 format!("// {:-<80}", ""),
2102 format!("// Template for the user implementation of {}Listener", self.name),
2103 String::new(),
2104 "struct Listener {".to_string(),
2105 " log: BufLog,".to_string(),
2106 "}".to_string(),
2107 String::new(),
2108 "#[allow(unused)]".to_string(),
2109 format!("impl {}Listener for Listener {{", self.name),
2110 " fn get_log_mut(&mut self) -> &mut impl Logger {".to_string(),
2111 " &mut self.log".to_string(),
2112 " }".to_string(),
2113 String::new(),
2114 ];
2115 let mut sources = WrapperSources {
2116 src,
2117 src_listener_decl: vec![],
2118 src_skel,
2119 src_types,
2120 src_init: vec![],
2121 src_exit: vec![],
2122 src_wrapper_impl: vec![],
2123 };
2124
2125 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
2127 let parent_nt = group[0] as usize;
2128 let parent_flags = self.parsing_table.flags[parent_nt];
2129 let parent_has_value = self.nt_values[parent_nt];
2130 let mut exit_alt_done = HashSet::<VarId>::new();
2131 let mut init_nt_done = HashSet::<VarId>::new();
2132 if VERBOSE { println!("- GROUP {}, parent has {}value, parent flags: {}",
2133 group.iter().map(|v| Symbol::NT(*v).to_str(self.get_symbol_table())).join(", "),
2134 if parent_has_value { "" } else { "no " },
2135 ruleflag::to_string(parent_flags).join(" | ")); }
2136 let is_ambig = parent_flags & ruleflag::PARENT_AMBIGUITY != 0;
2137 let ambig_children = if is_ambig {
2138 group.iter().filter(|&v| self.nt_has_any_flags(*v, ruleflag::CHILD_L_RECURSION)).cloned().to_vec()
2139 } else {
2140 Vec::new()
2141 };
2142 let mut ambig_op_alts = BTreeMap::<AltId, Vec<AltId>>::new();
2143 for (id, f) in ambig_children.iter() .flat_map(|v| self.gather_alts(*v))
2145 .filter_map(|f| self.parsing_table.alts[f as usize].1.get_ambig_alt_id().map(|id| (id, f)))
2146 {
2147 ambig_op_alts.entry(id).or_default().push(f);
2148 }
2149 if VERBOSE && is_ambig {
2150 println!("- ambig children vars: {}", ambig_children.iter().map(|v| Symbol::NT(*v).to_str(self.get_symbol_table())).join(", "));
2151 println!(" ambig op alts: {ambig_op_alts:?}");
2152 }
2153
2154 let in_ctx = SourceInputContext {
2156 parent_has_value,
2157 parent_nt,
2158 pinfo,
2159 syns: &syns,
2160 ambig_op_alts: &ambig_op_alts,
2161 };
2162 let mut state = SourceState {
2163 init_nt_done: &mut init_nt_done,
2164 span_init: &mut span_init,
2165 nt_contexts: &mut nt_contexts,
2166 exit_alt_done: &mut exit_alt_done,
2167 exit_fixer: &mut exit_fixer,
2168 };
2169
2170 for var in group {
2171 let nt = *var as usize;
2172 let flags = self.parsing_table.flags[nt];
2173 let is_ambig_1st_child = is_ambig && flags & ruleflag::CHILD_L_RECURSION != 0 && ambig_children.first() == Some(var);
2175 let is_ambig_redundant = is_ambig && flags & ruleflag::L_RECURSION != 0 && !is_ambig_1st_child;
2178 let has_value = self.nt_values[nt];
2179
2180 self.source_wrapper_init::<VERBOSE>(
2182 &in_ctx,
2183 *var,
2184 flags,
2185 has_value,
2186 is_ambig_1st_child,
2187 &mut state,
2188 &mut sources
2189 );
2190
2191 self.source_wrapper_exit::<VERBOSE>(
2193 &in_ctx,
2194 *var,
2195 flags,
2196 has_value,
2197 is_ambig_1st_child,
2198 is_ambig_redundant,
2199 &mut state,
2200 &mut sources
2201 );
2202 }
2203
2204 for a in group.iter().flat_map(|v| &self.var_alts[*v as usize]).filter(|a| !exit_alt_done.contains(a)) {
2206 let is_called = self.opcodes[*a as usize].contains(&OpCode::Exit(*a));
2207 let (v, alt) = &self.parsing_table.alts[*a as usize];
2208 let alt_str = if MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS {
2209 self.full_alt_str(*a, None, false)
2210 } else {
2211 alt.to_rule_str(*v, self.get_symbol_table(), self.parsing_table.flags[*v as usize])
2212 };
2213 let comment = format!("// {alt_str} ({})", if is_called { "not used" } else { "never called" });
2214 if is_called {
2215 sources.src_exit.push(vec![format!(" {a} => {{}}"), comment]);
2216 } else {
2217 sources.src_exit.push(vec![format!(" /* {a} */"), comment]);
2218 }
2219 }
2220 let mut seg_init = Segments::from_iter(
2222 group.iter()
2223 .filter_map(|&v| if !init_nt_done.contains(&v) { Some(Seg(v as u32, v as u32)) } else { None })
2224 );
2225 seg_init.normalize();
2226 for seg in seg_init {
2227 let Seg(a, b) = seg;
2228 if a == b {
2229 sources.src_init.push(vec![format!(" {a} => {{}}"), format!("// {}", Symbol::NT(a as VarId).to_str(self.get_symbol_table()))]);
2230 } else {
2231 sources.src_init.push(vec![
2232 format!(" {a}{}{b} => {{}}", if b == a + 1 { " | " } else { " ..= " }),
2233 format!("// {}", (a..=b).map(|v| Symbol::NT(v as VarId).to_str(self.get_symbol_table())).join(", "))
2234 ]);
2235 }
2236 }
2237 }
2238
2239 self.source_wrapper_finalize(span_init, sources)
2240 }
2241
2242 fn source_wrapper_ctx<const VERBOSE: bool>(&self, src: &mut Vec<String>) -> Vec<Option<Vec<AltId>>> {
2244 let mut nt_contexts: Vec<Option<Vec<AltId>>> = vec![None; self.parsing_table.num_nt];
2245 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
2246 let mut group_names = HashMap::<VarId, Vec<AltId>>::new();
2247 for nt in group {
2249 for &alt_id in &self.var_alts[*nt as usize] {
2250 if let Some((owner, _name)) = &self.alt_info[alt_id as usize] {
2251 group_names.entry(*owner)
2252 .and_modify(|v| v.push(alt_id))
2253 .or_insert_with(|| vec![alt_id]);
2254 }
2255 }
2256 }
2257 if VERBOSE {
2258 println!("group {}", group.iter().map(|nt| Symbol::NT(*nt).to_str(self.get_symbol_table())).join(" "));
2259 }
2260 for &nt in group {
2261 if let Some(alts) = group_names.get(&nt) {
2262 let flags = self.parsing_table.flags[nt as usize];
2263 if VERBOSE {
2264 print!("- {}: flags {}", Symbol::NT(nt).to_str(self.get_symbol_table()), ruleflag::to_string(flags).join(" "));
2265 if let Some(gn) = group_names.get(&nt) {
2266 println!(", alts = {}", gn.iter().map(|a| a.to_string()).join(", "));
2267 let sorted = self.sort_alt_ids(group[0], gn);
2268 println!(" sorted alts: {sorted:?}");
2269 } else {
2270 println!();
2271 }
2272 }
2273 if flags & (ruleflag::SEP_LIST | ruleflag::L_FORM) == ruleflag::SEP_LIST | ruleflag::L_FORM {
2274 src.push("#[derive(Debug)]".to_string());
2275 src.push(format!("pub enum InitCtx{} {{", self.nt_name[nt as usize].0));
2276 let a_id = self.var_alts[nt as usize][0];
2277 let comment = format!(
2278 "value of `{}` before {}",
2279 self.item_ops[a_id as usize][1..].iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
2280 self.full_alt_components(a_id, None).1
2281 );
2282 let ctx_content = self.source_infos(&self.item_info[a_id as usize], false, true);
2283 src.push(format!(" /// {comment}"));
2284 let a_name = &self.alt_info[a_id as usize].as_ref().unwrap().1;
2285 let ctx_item = if ctx_content.is_empty() {
2286 if VERBOSE { println!(" {a_name},"); }
2287 format!(" {a_name},", )
2288 } else {
2289 if VERBOSE { println!(" {a_name} {{ {ctx_content} }},"); }
2290 format!(" {a_name} {{ {ctx_content} }},", )
2291 };
2292 src.push(ctx_item);
2293 src.push("}".to_string());
2294 }
2295 src.push("#[derive(Debug)]".to_string());
2296 src.push(format!("pub enum Ctx{} {{", self.nt_name[nt as usize].0));
2297 if VERBOSE { println!(" context Ctx{}:", self.nt_name[nt as usize].0); }
2298 let alts = self.sort_alt_ids(group[0], alts);
2299 nt_contexts[nt as usize] = Some(alts.clone());
2300 for a_id in alts {
2301 let comment = self.full_alt_str(a_id, None, true);
2302 src.push(format!(" /// {comment}"));
2303 if VERBOSE { println!(" /// {comment}"); }
2304 let ctx_content = self.source_infos(&self.item_info[a_id as usize], false, true);
2305 let a_name = &self.alt_info[a_id as usize].as_ref().unwrap().1;
2306 let ctx_item = if ctx_content.is_empty() {
2307 if VERBOSE { println!(" {a_name},"); }
2308 format!(" {a_name},", )
2309 } else {
2310 if VERBOSE { println!(" {a_name} {{ {ctx_content} }},"); }
2311 format!(" {a_name} {{ {ctx_content} }},", )
2312 };
2313 src.push(ctx_item);
2314 }
2315 src.push("}".to_string());
2316 }
2317 }
2318 }
2319 nt_contexts
2320 }
2321
2322 fn source_wrapper_types<const VERBOSE: bool>(&self, src: &mut Vec<String>) -> (Vec<String>, Vec<VarId>) {
2324 static TYPE_DERIVE: &str = "#[derive(Debug, PartialEq)]";
2325
2326 let mut src_types = vec![
2327 format!("// {:-<80}", ""),
2328 "// Template for the user-defined types:".to_string(),
2329 ];
2330 src.add_space();
2331 let mut syns = Vec::<VarId>::new(); for (v, names) in self.nt_name.iter().enumerate().filter(|(v, _)| self.nt_values[*v]) {
2333 let v = v as VarId;
2334 let (nu, _nl, _npl) = names;
2335 let nt_type = self.get_nt_type(v);
2336 if self.nt_has_all_flags(v, ruleflag::CHILD_REPEAT) {
2337 let is_lform = self.nt_has_all_flags(v, ruleflag::L_FORM);
2338 let first_alt = self.var_alts[v as usize][0];
2339 let (t, var_oid) = self.origin.get(v).unwrap();
2340 if is_lform {
2341 let astr = format!("/// User-defined type for {}", self.full_alt_str(first_alt, None, true));
2342 src_types.push(String::new());
2343 src_types.push(astr.clone());
2344 src_types.push(TYPE_DERIVE.to_string());
2345 src_types.push(format!("pub struct {}();", self.get_nt_type(v)));
2346 } else {
2347 let top_parent = self.parsing_table.get_top_parent(v);
2348 src.push(format!("/// Computed `{}` array in `{} -> {}`",
2349 grtree_to_str(t, Some(var_oid), None, Some(top_parent), self.get_symbol_table(), true),
2350 Symbol::NT(top_parent).to_str(self.get_symbol_table()),
2351 grtree_to_str(t, None, Some(var_oid), Some(top_parent), self.get_symbol_table(), true),
2352 ));
2353 let endpoints = self.child_repeat_endpoints.get(&v).unwrap();
2354 if endpoints.len() > 1 {
2355 src.push("#[derive(Debug, PartialEq)]".to_string());
2357 src.push(format!("pub struct {nt_type}(pub Vec<Syn{nu}Item>);"));
2358 src.push("#[derive(Debug, PartialEq)]".to_string());
2359 src.push(format!("pub enum Syn{nu}Item {{"));
2360 for (i, &a_id) in endpoints.iter().index_start(1) {
2361 src.push(format!(" /// {}", self.full_alt_str(a_id, None, true)));
2362 src.push(format!(" V{i} {{ {} }},", self.source_infos(&self.item_info[a_id as usize], false, true)));
2363 }
2364 src.push("}".to_string());
2365 } else {
2366 let a_id = endpoints[0];
2368 let infos = &self.item_info[a_id as usize];
2369 if infos.len() == 1 {
2370 let type_name = self.get_info_type(infos, &infos[0]);
2372 src.push("#[derive(Debug, PartialEq)]".to_string());
2373 src.push(format!("pub struct {nt_type}(pub Vec<{type_name}>);", ));
2374 } else {
2375 src.push("#[derive(Debug, PartialEq)]".to_string());
2377 src.push(format!("pub struct {nt_type}(pub Vec<Syn{nu}Item>);"));
2378 src.push(format!("/// {}", self.full_alt_str(first_alt, None, false)));
2379 src.push("#[derive(Debug, PartialEq)]".to_string());
2380 src.push(format!("pub struct Syn{nu}Item {{ {} }}", self.source_infos(infos, true, true)));
2381 }
2382 }
2383 }
2384 } else {
2385 src_types.push(String::new());
2386 src_types.push(format!("/// User-defined type for `{}`", Symbol::NT(v).to_str(self.get_symbol_table())));
2387 src_types.push(TYPE_DERIVE.to_string());
2388 src_types.push(format!("pub struct {}();", self.get_nt_type(v)));
2389 }
2390 syns.push(v);
2391 }
2392 if !self.nt_values[self.start as usize] {
2393 let nu = &self.nt_name[self.start as usize].0;
2394 src.push(format!("/// Top non-terminal {nu} (has no value)"));
2395 src.push("#[derive(Debug, PartialEq)]".to_string());
2396 src.push(format!("pub struct Syn{nu}();"))
2397 }
2398
2399 if VERBOSE { println!("syns = {syns:?}"); }
2401 src.add_space();
2402 src.push("#[derive(Debug)]".to_string());
2404 src.push(format!("enum EnumSynValue {{ {} }}",
2405 syns.iter().map(|v| format!("{}({})", self.nt_name[*v as usize].0, self.get_nt_type(*v))).join(", ")));
2406 if !syns.is_empty() {
2407 src.add_space();
2409 src.push("impl EnumSynValue {".to_string());
2410 for v in &syns {
2411 let (nu, _, npl) = &self.nt_name[*v as usize];
2412 let nt_type = self.get_nt_type(*v);
2413 src.push(format!(" fn get_{npl}(self) -> {nt_type} {{"));
2414 if syns.len() == 1 {
2415 src.push(format!(" let EnumSynValue::{nu}(val) = self;"));
2416 src.push(" val".to_string());
2417 } else {
2418 src.push(format!(" if let EnumSynValue::{nu}(val) = self {{ val }} else {{ panic!() }}"));
2419 }
2420 src.push(" }".to_string());
2421 }
2422 src.push("}".to_string());
2423 }
2424 (src_types, syns)
2425 }
2426
2427 fn source_wrapper_init<const VERBOSE: bool>(
2429 &self,
2430 ctx : &SourceInputContext,
2431 var : VarId,
2432 flags : u32,
2433 has_value : bool,
2434 is_ambig_1st_child : bool,
2435 state : &mut SourceState,
2436 sources : &mut WrapperSources
2437 ) {
2438 let &SourceInputContext { ambig_op_alts, .. } = ctx;
2439 let SourceState { init_nt_done, span_init, .. } = state;
2440 let WrapperSources { src_listener_decl, src_skel, src_init, src_wrapper_impl, .. } = sources;
2441 let nt = var as usize;
2442 let sym_nt = Symbol::NT(var);
2443 let nt_comment = format!("// {}", sym_nt.to_str(self.get_symbol_table()));
2444 let is_sep_list = flags & ruleflag::SEP_LIST != 0;
2445 let is_lform = flags & ruleflag::L_FORM != 0;
2446 let is_rrec_lform = is_lform && flags & ruleflag::R_RECURSION != 0;
2447 let is_plus = flags & ruleflag::REPEAT_PLUS != 0;
2448 let (nu, nl, npl) = &self.nt_name[nt];
2449 if VERBOSE { println!(" - VAR {}, has {}value, flags: {}",
2450 sym_nt.to_str(self.get_symbol_table()),
2451 if has_value { "" } else { "no " },
2452 ruleflag::to_string(flags).join(" | ")); }
2453
2454 let mut has_skel_init = false;
2455 let init_fn_name = format!("init_{npl}");
2456 if self.parsing_table.parent[nt].is_none() {
2457 init_nt_done.insert(var);
2458 if is_rrec_lform {
2459 span_init.insert(var);
2460 }
2461 if is_rrec_lform && has_value {
2462 src_wrapper_impl.push(String::new());
2463 src_listener_decl.push(format!(" fn {init_fn_name}(&mut self) -> {};", self.get_nt_type(nt as VarId)));
2464 src_skel.push(format!(" fn {init_fn_name}(&mut self) -> {} {{", self.get_nt_type(nt as VarId)));
2465 has_skel_init = true;
2466 src_init.push(vec![format!(" {nt} => self.init_{nl}(),"), nt_comment]);
2467 src_wrapper_impl.push(format!(" fn {init_fn_name}(&mut self) {{"));
2468 src_wrapper_impl.push(format!(" let val = self.listener.init_{nl}();"));
2469 src_wrapper_impl.push(format!(" self.stack.push(EnumSynValue::{nu}(val));"));
2470 src_wrapper_impl.push(" }".to_string());
2471 } else {
2472 src_listener_decl.push(format!(" fn {init_fn_name}(&mut self) {{}}"));
2473 src_init.push(vec![format!(" {nt} => self.listener.{init_fn_name}(),"), nt_comment]);
2474 }
2475 } else if flags & ruleflag::CHILD_REPEAT != 0 {
2476 if !is_sep_list {
2477 span_init.insert(var);
2478 }
2479 if has_value || is_sep_list {
2480 init_nt_done.insert(var);
2481 src_wrapper_impl.push(String::new());
2482 src_init.push(vec![format!(" {nt} => self.{init_fn_name}(),"), nt_comment]);
2483 src_wrapper_impl.push(format!(" fn {init_fn_name}(&mut self) {{"));
2484 if is_lform {
2485 if is_sep_list {
2486 let all_exit_alts = if is_ambig_1st_child {
2487 ambig_op_alts.values().rev().map(|v| v[0]).to_vec()
2488 } else {
2489 self.gather_alts(nt as VarId)
2490 };
2491 let exit_alts = all_exit_alts.into_iter()
2492 .filter(|f|
2493 (flags & ruleflag::CHILD_L_RECURSION == 0
2494 && flags & (ruleflag::CHILD_REPEAT_LFORM | ruleflag::REPEAT_PLUS) != ruleflag::CHILD_REPEAT_LFORM)
2495 || !self.is_alt_sym_empty(*f)
2496 );
2497 let (mut last_alt_ids, exit_info_alts): (Vec<AltId>, Vec<AltId>) = exit_alts.into_iter()
2498 .partition(|i| self.alt_info[*i as usize].is_none());
2499 let last_alt_id_maybe = if last_alt_ids.is_empty() { None } else { Some(last_alt_ids.remove(0)) };
2500 let a = exit_info_alts[0];
2501 let indent = " ";
2502 let (src_let, ctx_params) = Self::source_lets(&self.item_info[a as usize], &self.nt_name, indent, last_alt_id_maybe);
2503 src_wrapper_impl.extend(src_let);
2504 let ctx = if ctx_params.is_empty() {
2505 format!("InitCtx{nu}::{}", self.alt_info[a as usize].as_ref().unwrap().1)
2506 } else {
2507 format!("InitCtx{nu}::{} {{ {ctx_params} }}", self.alt_info[a as usize].as_ref().unwrap().1)
2508 };
2509 src_wrapper_impl.push(format!(" let ctx = {ctx};"));
2510 if self.options.gen_span_params {
2511 src_wrapper_impl.extend(Self::source_update_span(&self.span_nbrs_sep_list[&a].to_string()));
2512 }
2513 src_wrapper_impl.push(format!(
2514 " {}self.listener.{init_fn_name}(ctx{});",
2515 if has_value { "let val = " } else { "" },
2516 if self.options.gen_span_params { ", spans" } else { "" }));
2517 let ret = if has_value {
2518 format!("-> {};", self.get_nt_type(nt as VarId))
2519 } else {
2520 src_listener_decl.push(" #[allow(unused_variables)]".to_string());
2521 "{}".to_string()
2522 };
2523 src_listener_decl.push(format!(
2524 " fn {init_fn_name}(&mut self, ctx: InitCtx{nu}{}) {ret}",
2525 if self.options.gen_span_params { ", spans: Vec<PosSpan>" } else { "" }));
2526
2527 let ret = if has_value { format!(" -> {}", self.get_nt_type(nt as VarId)) } else { String::new() };
2529 src_skel.push(format!(
2530 " fn {init_fn_name}(&mut self, ctx: InitCtx{nu}{}){ret} {{",
2531 if self.options.gen_span_params { ", spans: Vec<PosSpan>" } else { "" }));
2532 let a_id = self.var_alts[nt][0];
2533 let a_info = &self.item_info[a_id as usize];
2534 if !a_info.is_empty() {
2535 let comment = format!(
2536 "value of `{}` before {}",
2537 self.item_ops[a_id as usize][1..].iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
2538 self.full_alt_components(a_id, None).1
2539 );
2540 let ctx_content = a_info.iter().map(|i| i.name.clone()).join(", ");
2541 let a_name = &self.alt_info[a_id as usize].as_ref().unwrap().1;
2542 src_skel.push(format!(" // {comment}"));
2543 src_skel.push(format!(" let InitCtx{nu}::{a_name} {{ {ctx_content} }} = ctx;"));
2544 }
2545 has_skel_init = true;
2546 } else {
2547 src_wrapper_impl.push(format!(" let val = self.listener.{init_fn_name}();"));
2548 src_listener_decl.push(format!(" fn {init_fn_name}(&mut self) -> {};", self.get_nt_type(nt as VarId)));
2549 src_skel.push(format!(" fn {init_fn_name}(&mut self) -> {} {{", self.get_nt_type(nt as VarId)));
2550 has_skel_init = true;
2551 }
2552 if has_value {
2553 src_wrapper_impl.push(format!(" self.stack.push(EnumSynValue::{nu}(val));"));
2554 }
2555 } else if is_sep_list {
2556 let endpoints = self.child_repeat_endpoints.get(&var).unwrap();
2559 let (src_val, val_name) = self.source_child_repeat_lets(endpoints, &self.item_info, is_plus, &self.nt_name, &init_fn_name, nu, true);
2560 src_wrapper_impl.extend(src_val);
2561 src_wrapper_impl.push(format!(" self.stack.push(EnumSynValue::{nu}(Syn{nu}(vec![{val_name}])));"));
2562 } else {
2563 src_wrapper_impl.push(format!(" let val = Syn{nu}(Vec::new());"));
2564 src_wrapper_impl.push(format!(" self.stack.push(EnumSynValue::{nu}(val));"));
2565 }
2566 src_wrapper_impl.push(" }".to_string());
2567 } else if is_lform {
2568 init_nt_done.insert(var);
2569 src_init.push(vec![format!(" {nt} => self.listener.{init_fn_name}(),"), nt_comment]);
2570 src_listener_decl.push(format!(" fn {init_fn_name}(&mut self) {{}}"));
2571 } else {
2572 }
2574 } else {
2575 }
2577 if has_skel_init {
2578 if has_value {
2579 src_skel.push(format!(" {}()", self.get_nt_type(nt as VarId)));
2580 }
2581 src_skel.push(" }".to_string());
2582 src_skel.push(String::new());
2583 }
2584 }
2585
2586 fn source_wrapper_exit<const VERBOSE: bool>(
2588 &self,
2589 ctx : &SourceInputContext,
2590 var : VarId,
2591 flags : u32,
2592 has_value : bool,
2593 is_ambig_1st_child : bool,
2594 is_ambig_redundant : bool,
2595 state : &mut SourceState,
2596 sources : &mut WrapperSources
2597 ) {
2598 const MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS: bool = false;
2599
2600 let &SourceInputContext {
2601 parent_has_value, parent_nt, pinfo, syns, ambig_op_alts
2602 } = ctx;
2603 let SourceState { nt_contexts, exit_alt_done, exit_fixer, .. } = state;
2604 let WrapperSources { src_listener_decl, src_skel, src_exit, src_wrapper_impl, .. } = sources;
2605 let nt = var as usize;
2606 let is_plus = flags & ruleflag::REPEAT_PLUS != 0;
2607 let is_parent = nt == parent_nt;
2608 let is_child_repeat_lform = self.nt_has_all_flags(var, ruleflag::CHILD_REPEAT_LFORM);
2609 let (nu, _nl, npl) = &self.nt_name[nt];
2610
2611 if !is_ambig_redundant && flags & ruleflag::CHILD_L_FACT == 0 {
2613 let mut has_skel_exit = false;
2614 let mut has_skel_exit_return = false;
2615 let (pnu, _pnl, pnpl) = &self.nt_name[parent_nt];
2616 if VERBOSE { println!(" {nu} (parent {pnu})"); }
2617 let no_method = !has_value && flags & ruleflag::CHILD_REPEAT_LFORM == ruleflag::CHILD_REPEAT;
2618 let is_rrec_lform = self.nt_has_all_flags(var, ruleflag::R_RECURSION | ruleflag::L_FORM);
2619 let (fnpl, fnu, fnt, f_valued) = if is_ambig_1st_child {
2620 (pnpl, pnu, parent_nt, parent_has_value) } else {
2622 (npl, nu, nt, has_value)
2623 };
2624 if is_parent || (is_child_repeat_lform && !no_method) || is_ambig_1st_child {
2625 let extra_param = if self.options.gen_span_params { ", spans: Vec<PosSpan>" } else { "" };
2626 if f_valued {
2627 let nt_type = self.get_nt_type(fnt as VarId);
2628 if is_rrec_lform || (is_child_repeat_lform) {
2629 src_listener_decl.push(format!(" fn exit_{fnpl}(&mut self, acc: &mut {nt_type}, ctx: Ctx{fnu}{extra_param});"));
2630 src_skel.push(format!(" fn exit_{fnpl}(&mut self, acc: &mut {nt_type}, ctx: Ctx{fnu}{extra_param}) {{"));
2631 } else {
2632 src_listener_decl.push(format!(" fn exit_{fnpl}(&mut self, ctx: Ctx{fnu}{extra_param}) -> {nt_type};"));
2633 src_skel.push(format!(" fn exit_{fnpl}(&mut self, ctx: Ctx{fnu}{extra_param}) -> {nt_type} {{"));
2634 has_skel_exit_return = true;
2635 }
2636 } else {
2637 src_listener_decl.push(" #[allow(unused_variables)]".to_string());
2638 src_listener_decl.push(format!(" fn exit_{fnpl}(&mut self, ctx: Ctx{fnu}{extra_param}) {{}}"));
2639 src_skel.push(format!(" fn exit_{fnpl}(&mut self, ctx: Ctx{fnu}{extra_param}) {{"));
2640 }
2641 has_skel_exit = true;
2642 }
2643 let all_exit_alts = if is_ambig_1st_child {
2644 ambig_op_alts.values().rev().map(|v| v[0]).to_vec()
2645 } else {
2646 self.gather_alts(nt as VarId)
2647 };
2648 let (last_it_alts, exit_alts) = all_exit_alts.into_iter()
2649 .partition::<Vec<_>, _>(|f|
2650 (flags & ruleflag::CHILD_L_RECURSION != 0
2651 || flags & (ruleflag::CHILD_REPEAT_LFORM | ruleflag::REPEAT_PLUS) == ruleflag::CHILD_REPEAT_LFORM)
2652 && self.is_alt_sym_empty(*f));
2653 if VERBOSE {
2654 println!(" no_method: {no_method}, exit alts: {}", exit_alts.iter().join(", "));
2655 if !last_it_alts.is_empty() {
2656 println!(" last_it_alts: {}", last_it_alts.iter().join(", "));
2657 }
2658 }
2659
2660 if has_skel_exit {
2662 if let Some(alts) = &nt_contexts[fnt] {
2663 let mut skel_ctx = vec![];
2664 for &a_id in alts {
2665 if let Some((_, variant)) = self.alt_info[a_id as usize].as_ref() {
2666 let comment = self.full_alt_str(a_id, None, false);
2667 let fields = self.source_infos(&self.item_info[a_id as usize], false, false);
2668 let ctx_content = if fields.is_empty() {
2669 String::new()
2670 } else {
2671 format!(" {{ {fields} }}")
2672 };
2673 skel_ctx.push((comment, variant, ctx_content));
2674 }
2675 }
2676 match skel_ctx.len() {
2677 0 => {}
2678 1 => {
2679 let (comment, variant, ctx_content) = skel_ctx.pop().unwrap();
2680 src_skel.push(format!(" // {comment}"));
2681 src_skel.push(format!(" let Ctx{fnu}::{variant}{ctx_content} = ctx;"));
2682 }
2683 _ => {
2684 src_skel.push(" match ctx {".to_string());
2685 for (comment, variant, ctx_content) in skel_ctx {
2686 src_skel.push(format!(" // {comment}"));
2687 src_skel.push(format!(" Ctx{fnu}::{variant}{ctx_content} => {{}}"));
2688 }
2689 src_skel.push(" }".to_string());
2690 }
2691 }
2692 if has_skel_exit_return {
2693 src_skel.push(format!(" {}()", self.get_nt_type(fnt as VarId)));
2694 }
2695 src_skel.push(" }".to_string());
2696 src_skel.push(String::new());
2697 } else {
2698 panic!("no alts for NT {fnpl} [{fnt}]");
2699 }
2700 }
2701
2702 for f in &exit_alts {
2703 exit_alt_done.insert(*f);
2704 }
2705 let inter_or_exit_name = if flags & ruleflag::PARENT_L_RECURSION != 0 { format!("inter_{npl}") } else { format!("exit_{npl}") };
2706 let fn_name = exit_fixer.get_unique_name(inter_or_exit_name.clone());
2707 let (is_alt_id, choices) = self.make_match_choices(&exit_alts, &fn_name, flags, no_method, None);
2708 if VERBOSE { println!(" choices: {}", choices.iter().map(|s| s.trim()).join(" ")); }
2709 let comments = exit_alts.iter().map(|f| {
2710 let (v, pf) = &self.parsing_table.alts[*f as usize];
2711 if MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS {
2712 format!("// {}", self.full_alt_str(*f, None, false))
2713 } else {
2714 format!("// {}", pf.to_rule_str(*v, self.get_symbol_table(), self.parsing_table.flags[*v as usize]))
2715 }
2716 }).to_vec();
2717 src_exit.extend(choices.into_iter().zip(comments).map(|(a, b)| vec![a, b]));
2718 if is_ambig_1st_child {
2719 for (a_id, dup_alts) in ambig_op_alts.values().rev().filter_map(|v| if v.len() > 1 { v.split_first() } else { None }) {
2720 let (_, choices) = self.make_match_choices(dup_alts, &fn_name, 0, no_method, Some(*a_id));
2723 let comments = dup_alts.iter()
2724 .map(|a| {
2725 let (v, alt) = &pinfo.alts[*a as usize];
2726 format!("// {} (duplicate of {a_id})", alt.to_rule_str(*v, self.get_symbol_table(), 0))
2727 }).to_vec();
2728 src_exit.extend(choices.into_iter().zip(comments).map(|(a, b)| vec![a, b]));
2729 for a in dup_alts {
2730 exit_alt_done.insert(*a);
2731 }
2732 }
2733 }
2734 if !no_method {
2735 src_wrapper_impl.push(String::new());
2736 src_wrapper_impl.push(format!(" fn {fn_name}(&mut self{}) {{", if is_alt_id { ", alt_id: AltId" } else { "" }));
2737 }
2738 if flags & ruleflag::CHILD_REPEAT_LFORM == ruleflag::CHILD_REPEAT {
2739 if has_value {
2740 let endpoints = self.child_repeat_endpoints.get(&var).unwrap();
2741 let (src_val, val_name) = self.source_child_repeat_lets(endpoints, &self.item_info, is_plus, &self.nt_name, &fn_name, nu, false);
2742 src_wrapper_impl.extend(src_val);
2743 let vec_name = if is_plus { "plus_acc" } else { "star_acc" };
2744 src_wrapper_impl.push(format!(" let Some(EnumSynValue::{nu}(Syn{nu}({vec_name}))) = self.stack.last_mut() else {{"));
2745 src_wrapper_impl.push(format!(" panic!(\"expected Syn{nu} item on wrapper stack\");"));
2746 src_wrapper_impl.push(" };".to_string());
2747 src_wrapper_impl.push(format!(" {vec_name}.push({val_name});"));
2748 }
2749 } else {
2750 assert!(!no_method, "no_method is not expected here (only used in +* with no lform)");
2751 let (mut last_alt_ids, exit_info_alts): (Vec<AltId>, Vec<AltId>) = exit_alts.into_iter()
2752 .partition(|i| self.alt_info[*i as usize].is_none());
2753 let fnu = if is_child_repeat_lform { nu } else { pnu }; let fnpl = if is_child_repeat_lform { npl } else { pnpl }; let a_has_value = if is_child_repeat_lform { has_value } else { parent_has_value };
2756 let is_single = exit_info_alts.len() == 1;
2757 let indent = if is_single { " " } else { " " };
2758 if !is_single {
2759 if self.options.gen_span_params {
2760 src_wrapper_impl.push(" let (n, ctx) = match alt_id {".to_string());
2761 } else {
2762 src_wrapper_impl.push(" let ctx = match alt_id {".to_string());
2763 }
2764 }
2765 if VERBOSE { println!(" exit_alts -> {exit_info_alts:?}, last_alt_id -> {last_alt_ids:?}"); }
2766 let spans_param = if self.options.gen_span_params { ", spans" } else { "" };
2767 for a in exit_info_alts {
2768 if VERBOSE {
2769 println!(" - ALTERNATIVE {a}: {} -> {}",
2770 Symbol::NT(var).to_str(self.get_symbol_table()),
2771 self.parsing_table.alts[a as usize].1.to_str(self.get_symbol_table()));
2772 }
2773 let last_alt_id_maybe = if last_alt_ids.is_empty() { None } else { Some(last_alt_ids.remove(0)) };
2774 if !is_single {
2775 let last_alt_choice = if let Some(last_alt_id) = last_alt_id_maybe { format!(" | {last_alt_id}") } else { String::new() };
2776 src_wrapper_impl.push(format!(" {a}{last_alt_choice} => {{", ));
2777 }
2778 let (src_let, ctx_params) = Self::source_lets(&self.item_info[a as usize], &self.nt_name, indent, last_alt_id_maybe);
2779 src_wrapper_impl.extend(src_let);
2780 let ctx = if ctx_params.is_empty() {
2781 format!("Ctx{fnu}::{}", self.alt_info[a as usize].as_ref().unwrap().1)
2782 } else {
2783 format!("Ctx{fnu}::{} {{ {ctx_params} }}", self.alt_info[a as usize].as_ref().unwrap().1)
2784 };
2785 if is_single {
2786 src_wrapper_impl.push(format!(" let ctx = {ctx};"));
2787 if self.options.gen_span_params {
2788 src_wrapper_impl.extend(Self::source_update_span(&self.span_nbrs[a as usize].to_string()));
2789
2790 }
2791 } else {
2792 let ctx_value = self.gen_match_item(ctx, || self.span_nbrs[a as usize].to_string());
2793 src_wrapper_impl.push(format!("{indent}{ctx_value}"));
2794 src_wrapper_impl.push(" }".to_string());
2795 }
2796 }
2797 if !is_single {
2798 src_wrapper_impl.push(format!(" _ => panic!(\"unexpected alt id {{alt_id}} in fn {fn_name}\")"));
2799 src_wrapper_impl.push(" };".to_string());
2800 if self.options.gen_span_params {
2801 src_wrapper_impl.extend(Self::source_update_span("n"));
2802 }
2803 }
2804 if (is_rrec_lform | is_child_repeat_lform) && f_valued {
2805 src_wrapper_impl.push(
2806 format!(" let Some(EnumSynValue::{fnu}(acc)) = self.stack.last_mut() else {{ panic!() }};"));
2807 src_wrapper_impl.push(
2808 format!(" self.listener.exit_{fnpl}(acc, ctx{spans_param});"));
2809 } else {
2810 src_wrapper_impl.push(format!(
2811 " {}self.listener.exit_{fnpl}(ctx{spans_param});",
2812 if a_has_value { "let val = " } else { "" }));
2813 if a_has_value {
2814 src_wrapper_impl.push(format!(" self.stack.push(EnumSynValue::{fnu}(val));"));
2815 }
2816 }
2817 }
2818 if !no_method {
2819 src_wrapper_impl.push(" }".to_string());
2820 }
2821 for a in last_it_alts {
2822 assert_eq!(flags, pinfo.flags[nt]);
2823 let owner_maybe = if flags & ruleflag::CHILD_REPEAT_LFORM == ruleflag::CHILD_REPEAT_LFORM {
2826 Some(var)
2827 } else if flags & ruleflag::CHILD_L_RECURSION != 0 {
2828 pinfo.parent[nt]
2829 } else {
2830 None
2831 };
2832 if let Some(owner) = owner_maybe {
2833 if self.nt_values[owner as usize] {
2834 let (variant, _, fnname) = &self.nt_name[owner as usize];
2835 let typ = self.get_nt_type(owner);
2836 let varname = if is_child_repeat_lform { "acc" } else { fnname };
2837 if VERBOSE { println!(" exitloop{fnname}({varname}) owner = {}", Symbol::NT(owner).to_str(self.get_symbol_table())); }
2838 src_listener_decl.push(" #[allow(unused_variables)]".to_string());
2839 src_listener_decl.push(format!(" fn exitloop_{fnname}(&mut self, {varname}: &mut {typ}) {{}}"));
2840 let (v, pf) = &self.parsing_table.alts[a as usize];
2841 let alt_str = if MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS {
2842 self.full_alt_str(a, None, false)
2843 } else {
2844 pf.to_rule_str(*v, self.get_symbol_table(), self.parsing_table.flags[*v as usize])
2845 };
2846 src_exit.push(vec![format!(" {a} => self.exitloop_{fnpl}(),"), format!("// {alt_str}")]);
2847 exit_alt_done.insert(a);
2848 src_wrapper_impl.push(String::new());
2849 src_wrapper_impl.push(format!(" fn exitloop_{fnpl}(&mut self) {{"));
2850 src_wrapper_impl.push(format!(" let EnumSynValue::{variant}({varname}) = self.stack.last_mut().unwrap(){};",
2851 if syns.len() > 1 { " else { panic!() }" } else { "" }));
2852 src_wrapper_impl.push(format!(" self.listener.exitloop_{fnname}({varname});"));
2853 src_wrapper_impl.push(" }".to_string());
2854 }
2855 }
2856 }
2857 }
2858 }
2859
2860 fn source_wrapper_finalize(&mut self, span_init: HashSet<VarId>, sources: WrapperSources) -> (Vec<String>, Vec<String>, Vec<String>) {
2861 let WrapperSources { mut src, src_listener_decl, mut src_skel, mut src_types, src_init, src_exit, src_wrapper_impl } = sources;
2862
2863 src.add_space();
2865 src.push(format!("pub trait {}Listener {{", self.name));
2866 src.push(" /// Checks if the listener requests an abort. This happens if an error is too difficult to recover from".to_string());
2867 src.push(" /// and may corrupt the stack content. In that case, the parser immediately stops and returns `ParserError::AbortRequest`.".to_string());
2868 src.push(" fn check_abort_request(&self) -> Terminate { Terminate::None }".to_string());
2869 src.push(" fn get_log_mut(&mut self) -> &mut impl Logger;".to_string());
2870 src.push(" #[allow(unused_variables)]".to_string());
2871 src.push(" fn handle_msg(&mut self, span_opt: Option<&PosSpan>, msg: LogMsg) {".to_string());
2872 src.push(" self.get_log_mut().add(msg);".to_string());
2873 src.push(" }".to_string());
2874 let extra_span = if self.options.gen_span_params { ", span: PosSpan" } else { "" };
2875 let extra_ref_span = if self.options.gen_span_params { ", span: &PosSpan" } else { "" };
2876 if !self.terminal_hooks.is_empty() {
2877 src.push(" #[allow(unused_variables)]".to_string());
2878 src.push(format!(" fn hook(&mut self, token: TokenId, text: &str{extra_ref_span}) -> TokenId {{ token }}"));
2879 }
2880 src.push(" #[allow(unused_variables)]".to_string());
2881 src.push(format!(" fn intercept_token(&mut self, token: TokenId, text: &str{extra_ref_span}) -> TokenId {{ token }}"));
2882 if self.nt_values[self.start as usize] || self.options.gen_span_params {
2883 src.push(" #[allow(unused_variables)]".to_string());
2884 }
2885 if self.nt_values[self.start as usize] {
2886 src.push(format!(" fn exit(&mut self, {}: {}{extra_span}) {{}}", self.nt_name[self.start as usize].2, self.get_nt_type(self.start)));
2887 } else {
2888 src.push(format!(" fn exit(&mut self{extra_span}) {{}}"));
2889 }
2890 src.push(" #[allow(unused_variables)]".to_string());
2891 src.push(" fn abort(&mut self, terminate: Terminate) {}".to_string());
2892 src.extend(src_listener_decl);
2900 src.push("}".to_string());
2901
2902 src.add_space();
2904 src.push("pub struct Wrapper<T> {".to_string());
2905 src.push(" verbose: bool,".to_string());
2906 src.push(" listener: T,".to_string());
2907 src.push(" stack: Vec<EnumSynValue>,".to_string());
2908 src.push(" max_stack: usize,".to_string());
2909 src.push(" stack_t: Vec<String>,".to_string());
2910 if self.options.gen_span_params {
2911 src.push(" stack_span: Vec<PosSpan>,".to_string());
2912 }
2913 src.push("}".to_string());
2914 src.push(String::new());
2915 src.push(format!("impl<T: {}Listener> ListenerWrapper for Wrapper<T> {{", self.name));
2916 src.push(" fn switch(&mut self, call: Call, nt: VarId, alt_id: AltId, t_data: Option<Vec<String>>) {".to_string());
2917 src.push(" if self.verbose {".to_string());
2918 src.push(" println!(\"switch: call={call:?}, nt={nt}, alt={alt_id}, t_data={t_data:?}\");".to_string());
2919 src.push(" }".to_string());
2920 src.push(" if let Some(mut t_data) = t_data {".to_string());
2921 src.push(" self.stack_t.append(&mut t_data);".to_string());
2922 src.push(" }".to_string());
2923 src.push(" match call {".to_string());
2924 src.push(" Call::Enter => {".to_string());
2925 if self.options.gen_span_params {
2926 let mut seg_span = Segments::from_iter(span_init.into_iter().map(|v| Seg(v as u32, v as u32)));
2928 seg_span.normalize();
2929 let pattern = seg_span.into_iter().map(|Seg(a, b)| {
2930 if a == b {
2931 a.to_string()
2932 } else if b == a + 1 {
2933 format!("{a} | {b}")
2934 } else {
2935 format!("{a} ..= {b}")
2936 }
2937 }).join(" | ");
2938 if !pattern.is_empty() {
2939 src.push(format!(" if matches!(nt, {pattern}) {{"));
2940 src.push(" self.stack_span.push(PosSpan::empty());".to_string());
2941 src.push(" }".to_string());
2942 }
2943 }
2944 src.push(" match nt {".to_string());
2945 src.extend(columns_to_str(src_init, Some(vec![64, 0])));
2951 src.push(" _ => panic!(\"unexpected enter nonterminal id: {nt}\")".to_string());
2952 src.push(" }".to_string());
2953 src.push(" }".to_string());
2954 src.push(" Call::Loop => {}".to_string());
2955 src.push(" Call::Exit => {".to_string());
2956 src.push(" match alt_id {".to_string());
2957 src.extend(columns_to_str(src_exit, Some(vec![64, 0])));
2965 src.push(" _ => panic!(\"unexpected exit alternative id: {alt_id}\")".to_string());
2966 src.push(" }".to_string());
2967 src.push(" }".to_string());
2968 src.push(" Call::End(terminate) => {".to_string());
2969 src.push(" match terminate {".to_string());
2970 src.push(" Terminate::None => {".to_string());
2971 let mut args = vec![];
2972 let (_nu, _nl, npl) = &self.nt_name[self.start as usize];
2973 if self.nt_values[self.start as usize] {
2974 src.push(format!(" let val = self.stack.pop().unwrap().get_{npl}();"));
2975 args.push("val");
2976 }
2977 if self.options.gen_span_params {
2978 src.push(" let span = self.stack_span.pop().unwrap();".to_string());
2979 args.push("span");
2980 }
2981 src.push(format!(" self.listener.exit({});", args.join(", ")));
2982 src.push(" }".to_string());
2983 src.push(" Terminate::Abort | Terminate::Conclude => self.listener.abort(terminate),".to_string());
2984 src.push(" }".to_string());
2985 src.push(" }".to_string());
2986 src.push(" }".to_string());
2987 src.push(" self.max_stack = std::cmp::max(self.max_stack, self.stack.len());".to_string());
2988 src.push(" if self.verbose {".to_string());
2989 src.push(" println!(\"> stack_t: {}\", self.stack_t.join(\", \"));".to_string());
2990 src.push(" println!(\"> stack: {}\", self.stack.iter().map(|it| format!(\"{it:?}\")).collect::<Vec<_>>().join(\", \"));".to_string());
2991 src.push(" }".to_string());
2992 src.push(" }".to_string());
2993 src.push(String::new());
2994 src.push(" fn check_abort_request(&self) -> Terminate {".to_string());
2995 src.push(" self.listener.check_abort_request()".to_string());
2996 src.push(" }".to_string());
2997 src.push(String::new());
2998 src.push(" fn abort(&mut self) {".to_string());
2999 src.push(" self.stack.clear();".to_string());
3000 if self.options.gen_span_params {
3001 src.push(" self.stack_span.clear();".to_string());
3002 }
3003 src.push(" self.stack_t.clear();".to_string());
3004 src.push(" }".to_string());
3005 src.push(String::new());
3006 src.push(" fn get_log_mut(&mut self) -> &mut impl Logger {".to_string());
3007 src.push(" self.listener.get_log_mut()".to_string());
3008 src.push(" }".to_string());
3009 src.push(String::new());
3010 src.push(" fn report(&mut self, span_opt: Option<&PosSpan>, msg: LogMsg) {".to_string());
3011 src.push(" self.listener.handle_msg(span_opt, msg);".to_string());
3012 src.push(" }".to_string());
3013 if self.options.gen_span_params {
3014 src.push(String::new());
3015 src.push(" fn push_span(&mut self, span: PosSpan) {".to_string());
3016 src.push(" self.stack_span.push(span);".to_string());
3017 src.push(" }".to_string());
3018 }
3019 src.push(String::new());
3020 src.push(" fn is_stack_empty(&self) -> bool {".to_string());
3021 src.push(" self.stack.is_empty()".to_string());
3022 src.push(" }".to_string());
3023 src.push(String::new());
3024 src.push(" fn is_stack_t_empty(&self) -> bool {".to_string());
3025 src.push(" self.stack_t.is_empty()".to_string());
3026 src.push(" }".to_string());
3027 if self.options.gen_span_params {
3028 src.add_space();
3029 src.push(" fn is_stack_span_empty(&self) -> bool {".to_string());
3030 src.push(" self.stack_span.is_empty()".to_string());
3031 src.push(" }".to_string());
3032 }
3033 let unused_span = if self.options.gen_span_params { "" } else { "_" };
3034 let extra_span_arg = if self.options.gen_span_params { ", span" } else { "" };
3035 if !self.terminal_hooks.is_empty() {
3036 src.add_space();
3037 src.push(format!(" fn hook(&mut self, token: TokenId, text: &str, {unused_span}span: &PosSpan) -> TokenId {{"));
3038 src.push(format!(" self.listener.hook(token, text{extra_span_arg})"));
3039 src.push(" }".to_string());
3040 }
3041 src.add_space();
3042 src.push(format!(" fn intercept_token(&mut self, token: TokenId, text: &str, {unused_span}span: &PosSpan) -> TokenId {{"));
3043 src.push(format!(" self.listener.intercept_token(token, text{extra_span_arg})"));
3044 src.push(" }".to_string());
3045 src.push("}".to_string());
3046
3047 src.add_space();
3048 src.push(format!("impl<T: {}Listener> Wrapper<T> {{", self.name));
3049 src.push(" pub fn new(listener: T, verbose: bool) -> Self {".to_string());
3050 src.push(format!(
3051 " Wrapper {{ verbose, listener, stack: Vec::new(), max_stack: 0, stack_t: Vec::new(){} }}",
3052 if self.options.gen_span_params { ", stack_span: Vec::new()" } else { "" }
3053 ));
3054 src.push(" }".to_string());
3055 src.push(String::new());
3056 src.push(" pub fn get_listener(&self) -> &T {".to_string());
3057 src.push(" &self.listener".to_string());
3058 src.push(" }".to_string());
3059 src.push(String::new());
3060 src.push(" pub fn get_listener_mut(&mut self) -> &mut T {".to_string());
3061 src.push(" &mut self.listener".to_string());
3062 src.push(" }".to_string());
3063 src.push(String::new());
3064 src.push(" pub fn give_listener(self) -> T {".to_string());
3065 src.push(" self.listener".to_string());
3066 src.push(" }".to_string());
3067 src.push(String::new());
3068 src.push(" pub fn set_verbose(&mut self, verbose: bool) {".to_string());
3069 src.push(" self.verbose = verbose;".to_string());
3070 src.push(" }".to_string());
3071src.extend(src_wrapper_impl);
3091 src.push("}".to_string());
3092
3093 src_types.extend(vec![
3095 String::new(),
3096 format!("// {:-<80}", ""),
3097 ]);
3098 if let Some(line) = src_skel.last() {
3099 if line.is_empty() {
3100 src_skel.pop();
3101 }
3102 }
3103 src_skel.extend(vec![
3104 "}".to_string(),
3105 String::new(),
3106 format!("// {:-<80}", ""),
3107 ]);
3108 self.log.add_info(format!("Template for the user types:\n\n{}\n", src_types.join("\n")));
3109 self.log.add_info(format!("Template for the listener implementation:\n\n{}\n", src_skel.join("\n")));
3110
3111 (src, src_types, src_skel)
3112 }
3113}
3114
3115impl LogReader for ParserGen {
3116 type Item = BufLog;
3117
3118 fn get_log(&self) -> &Self::Item {
3119 &self.log
3120 }
3121
3122 fn give_log(self) -> Self::Item {
3123 self.log
3124 }
3125}
3126
3127impl HasBuildErrorSource for ParserGen {
3128 const SOURCE: BuildErrorSource = BuildErrorSource::ParserGen;
3129}
3130
3131impl<T> BuildFrom<ProdRuleSet<T>> for ParserGen where ProdRuleSet<LL1>: BuildFrom<ProdRuleSet<T>> {
3132 fn build_from(mut rules: ProdRuleSet<T>) -> Self {
3138 let name = rules.name.take().unwrap_or(DEFAULT_LISTENER_NAME.to_string());
3139 ParserGen::build_from_rules(rules, name)
3140 }
3141}
3142
3143impl Default for ParserGenOptions {
3144 fn default() -> Self {
3145 ParserGenOptions {
3146 nt_value: NTValue::Default,
3147 include_alts: false,
3148 headers: vec![],
3149 used_libs: StructLibs::new(),
3150 gen_wrapper: true,
3151 gen_span_params: false,
3152 gen_token_enums: false,
3153 lib_crate: LexigramCrate::Core,
3154 indent: 0,
3155 types_indent: 0,
3156 listener_indent: 0,
3157 }
3158 }
3159}
3160
3161impl ParserGen {
3165
3166 pub fn get_nt_tree(&self) -> VecTree<VarId> {
3167 let mut tree = VecTree::new();
3168 let root = tree.add_root(0);
3169 let mut idx = HashMap::new();
3170 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
3171 idx.clear();
3172 let tree_ids = tree.add_iter(None, group.iter().cloned()).to_vec();
3182 idx.extend(group.iter().zip(tree_ids));
3183 for &child in group.iter() {
3184 tree.attach_child(
3185 self.parsing_table.parent[child as usize]
3186 .map(|p| idx[&p])
3187 .unwrap_or(root),
3188 idx[&child]);
3189 }
3190 }
3191 tree
3192 }
3193
3194 pub fn get_indented_nt(&self) -> Vec<(VarId, String)>{
3195 let tree = self.get_nt_tree();
3196 let mut indented = vec![];
3197 let mut indent = vec![];
3198 for node in tree.iter_pre_depth_simple().skip(1) {
3199 let depth = node.depth as usize;
3200 if indent.len() < depth {
3201 indent.push((1..depth).map(|i| if i & 1 == 0 { " " } else { ". " }).join(""));
3202 }
3203 indented.push((*node, format!("{}{}", &indent[depth - 1], Symbol::NT(*node).to_str(self.get_symbol_table()))));
3204 }
3205 indented
3206 }
3207
3208 pub fn nt_info_str(&self) -> Vec<String> {
3209 let indented = self.get_indented_nt();
3210 let mut cols = vec![
3211 vec![" NT".to_string(), " name".to_string(), " val".to_string(), " flags".to_string(), String::new()]];
3212 for (v, line) in indented {
3213 let nt = v as usize;
3214 cols.push(vec![
3216 format!("| {v:3}"),
3217 format!("| {line}"),
3218 if self.nt_values[nt] { "| y".to_string() } else { "|".to_string() },
3219 format!("| {}", ruleflag::to_string(self.parsing_table.flags[nt]).join(", ")),
3221 "|".to_string(),
3222 ]);
3223 }
3224 let mut txt = columns_to_str(cols, Some(vec![3, 5, 0, 0, 0]));
3225 if let Some(max) = txt.get(1).map(|s| s.charlen()) {
3226 let sep = format!("+{:-<1$}+", "", max - 2);
3227 txt.insert(1, sep.clone());
3228 txt.push(sep);
3229 }
3230 txt
3231 }
3232
3233 pub fn log_nt_info(&mut self) {
3234 let mut txt = self.nt_info_str();
3235 txt.push(String::new());
3236 self.log.add_info("nonterminal information:");
3237 self.log.extend_messages(txt.into_iter().map(LogMsg::Info));
3238 }
3239
3240 pub fn alt_info_str(&self) -> Vec<String> {
3241 let indented = self.get_indented_nt();
3242 let mut cols = vec![
3243 vec![" NT".to_string(), " alt".to_string(), " opcodes".to_string(), " spans".to_string(), " item_ops".to_string(), String::new()]];
3244 for (v, line) in indented {
3245 let nt = v as usize;
3246 for &alt_id in &self.var_alts[nt] {
3247 let a_id = alt_id as usize;
3248 let alt = &self.parsing_table.alts[a_id].1;
3249 let opcodes = self.opcodes[a_id].iter().map(|o| o.to_str_quote(self.get_symbol_table())).join(" ");
3250 let item_ops = self.item_ops.get(a_id)
3251 .map(|ops| ops.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "))
3252 .unwrap_or_else(|| "-".to_string());
3253 cols.push(vec![
3254 format!("| {v:3}"),
3255 format!("| {alt_id:4}: {line} -> {}", alt.to_str(self.get_symbol_table())),
3256 format!("| {opcodes}"),
3257 format!("| {}{}",
3258 &self.span_nbrs[a_id],
3259 if let Some(ispan) = self.span_nbrs_sep_list.get(&alt_id) { format!(", {ispan}") } else { String::new() }),
3260 format!("| {item_ops}"),
3261 "|".to_string(),
3262 ]);
3263 }
3264 }
3265 let mut txt = columns_to_str(cols, Some(vec![3, 5, 0, 0, 0, 0]));
3266 if let Some(max) = txt.get(1).map(|s| s.charlen()) {
3267 let sep = format!("+{:-<1$}+", "", max - 2);
3268 txt.insert(1, sep.clone());
3269 txt.push(sep);
3270 }
3271 txt
3272 }
3273
3274 pub fn log_alt_info(&mut self) {
3275 let mut txt = self.alt_info_str();
3276 txt.push("legend: ►nt = enter nonterminal nt, ◄0 = exit alt, ●nt = loop nonterminal, Xyz! = variable terminal, \"…\" = fixed terminal, ▲ = hook".to_string());
3277 txt.push(String::new());
3278 self.log.add_note("rule alternatives:");
3279 self.log.extend_messages(txt.into_iter().map(LogMsg::Info));
3280 }
3281
3282 pub fn print_items(&self, indent: usize, show_symbols: bool, show_span: bool) {
3283 let tbl = self.get_symbol_table();
3284 let fields = (0..self.parsing_table.alts.len())
3285 .map(|a| {
3286 let a_id = a as AltId;
3287 let (v, alt) = &self.parsing_table.alts[a];
3288 let ops = &self.opcodes[a];
3289 let it = &self.item_ops[a_id as usize];
3290 let mut cols = vec![];
3291 if show_symbols {
3292 let symbols = format!("symbols![{}]", it.iter().map(|s| s.to_macro_item()).join(", "));
3293 let value = if show_span {
3294 assert!(self.options.gen_span_params, "ParserGen is not configured for spans");
3295 format!("({}, {symbols})", self.span_nbrs[a_id as usize])
3296 } else {
3297 symbols
3298 };
3299 cols.push(format!("{a_id} => {value},"));
3300 }
3301 cols.extend([
3302 format!("// {a_id:2}: {} -> {}", Symbol::NT(*v).to_str(tbl), alt.iter().map(|s| s.to_str_quote(tbl)).join(" ")),
3303 format!("| {}", ops.iter().map(|s| s.to_str_quote(tbl)).join(" ")),
3304 format!(
3305 "| {}{}",
3306 &self.span_nbrs[a_id as usize],
3307 if let Some(ispan) = self.span_nbrs_sep_list.get(&a_id) { format!(", {ispan}") } else { String::new() }),
3308 format!("| {}", it.iter().map(|s| s.to_str(tbl)).join(" ")),
3309 ]);
3310 cols
3311 }).to_vec();
3312 let widths = if show_symbols { vec![40, 0, 0, 0, 0] } else { vec![16, 0, 0, 0, 0] };
3313 for l in columns_to_str(fields, Some(widths)) {
3314 println!("{:indent$}{l}", "", indent = indent)
3315 }
3316 }
3317
3318 pub fn print_flags(&self, indent: usize) {
3319 let tbl: Option<&SymbolTable> = self.get_symbol_table();
3320 let prefix = format!("{:width$}//", "", width = indent);
3321 let nt_flags = self.get_parsing_table().flags.iter().index().filter_map(|(nt, &f)|
3322 if f != 0 { Some(format!("{prefix} - {}: {} ({})", Symbol::NT(nt).to_str(tbl), ruleflag::to_string(f).join(" | "), f)) } else { None }
3323 ).join("\n");
3324 let parents = self.get_parsing_table().parent.iter().index().filter_map(|(c, &par)|
3325 par.map(|p| format!("{prefix} - {} -> {}", Symbol::NT(c).to_str(tbl), Symbol::NT(p).to_str(tbl)))
3326 ).join("\n");
3327 if !nt_flags.is_empty() {
3328 println!("{prefix} NT flags:\n{nt_flags}");
3329 }
3330 if !parents.is_empty() {
3331 println!("{prefix} parents:\n{parents}");
3332 }
3333 }
3334}