1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
4use iter_index::IndexerIterator;
5use lexigram_core::alt::Alternative;
6use crate::grammar::{grtree_to_str, GrTreeExt, LLParsingTable, NTConversion, ProdRuleSet, RuleTreeSet};
7use crate::{columns_to_str, indent_source, AltId, General, NameFixer, NameTransformer, Normalized, SourceSpacer, StructLibs, SymbolTable, VarId, LL1};
8use crate::fixed_sym_table::{FixedSymTable, SymInfoTable};
9use crate::alt::ruleflag;
10use crate::build::{BuildError, BuildErrorSource, BuildFrom, HasBuildErrorSource, TryBuildFrom};
11use crate::CollectJoin;
12use crate::grammar::origin::{FromPRS, Origin};
13use crate::lexergen::LexigramCrate;
14use crate::log::{BufLog, LogMsg, LogReader, LogStatus, Logger};
15use crate::parser::{OpCode, Parser, Symbol};
16use crate::segments::Segments;
17use crate::segmap::Seg;
18
19pub(crate) mod tests;
20
21pub(crate) fn symbol_to_code(s: &Symbol) -> String {
24 match s {
25 Symbol::Empty => "Symbol::Empty".to_string(),
26 Symbol::T(t) => format!("Symbol::T({t})"),
27 Symbol::NT(nt) => format!("Symbol::NT({nt})"),
28 Symbol::End => "Symbol::End".to_string(),
29 }
30}
31
32#[derive(Clone, Debug, PartialEq)]
35struct ItemInfo {
36 name: String,
37 sym: Symbol, owner: VarId, index: Option<usize> }
41
42#[allow(unused)]
43impl ItemInfo {
44 fn to_str(&self, symbol_table: Option<&SymbolTable>) -> String {
45 format!("{} ({}{}, ◄{})",
46 self.name,
47 self.sym.to_str(symbol_table),
48 if let Some(n) = self.index { format!(", [{n}]") } else { "".to_string() },
49 Symbol::NT(self.owner).to_str(symbol_table))
50 }
51}
52
53pub struct ParserTables {
63 num_nt: usize,
64 num_t: usize,
65 alt_var: Vec<VarId>,
67 alts: Vec<Alternative>,
68 opcodes: Vec<Vec<OpCode>>,
69 table: Vec<AltId>,
70 symbol_table: FixedSymTable,
71 start: VarId,
72 include_alts: bool,
73}
74
75impl ParserTables {
76 pub fn new(parsing_table: LLParsingTable, symbol_table: FixedSymTable, opcodes: Vec<Vec<OpCode>>, start: VarId, include_alts: bool) -> Self {
77 assert!(parsing_table.num_nt > start as usize);
78 let num_nt = parsing_table.num_nt;
79 let num_t = parsing_table.num_t;
80 let table = parsing_table.table;
81 let (factor_var, alts): (Vec<_>, Vec<_>) = parsing_table.alts.into_iter().unzip();
82 ParserTables { num_nt, num_t, alt_var: factor_var, alts, opcodes, table, symbol_table, start, include_alts }
83 }
84
85 pub fn make_parser(&self) -> Parser<'_> {
86 Parser::new(
87 self.num_nt,
88 self.num_t,
89 self.alt_var.as_slice(),
90 if self.include_alts { self.alts.clone() } else { vec![] },
91 self.opcodes.clone(),
92 self.table.as_slice(),
93 self.symbol_table.clone(),
94 self.start,
95 )
96 }
97}
98
99impl BuildFrom<ParserGen> for ParserTables {
100 fn build_from(parser_gen: ParserGen) -> Self {
103 ParserTables::new(
104 parser_gen.parsing_table,
105 parser_gen.symbol_table.to_fixed_sym_table(),
106 parser_gen.opcodes,
107 parser_gen.start,
108 parser_gen.include_alts
109 )
110 }
111}
112
113impl TryBuildFrom<ParserGen> for ParserTables {
115 type Error = BuildError;
116
117 fn try_build_from(source: ParserGen) -> Result<Self, Self::Error> {
118 if source.get_log().has_no_errors() {
119 Ok(ParserTables::build_from(source))
120 } else {
121 Err(BuildError::new(source.give_log(), BuildErrorSource::ParserGen))
122 }
123 }
124}
125
126pub static DEFAULT_LISTENER_NAME: &str = "Parser";
129
130static FOLD_SPAN_CODE: [&str; 4] = [
131 " let spans = self.stack_span.drain(self.stack_span.len() - n ..).collect::<Vec<_>>();",
132 " let mut new_span = PosSpan::empty();",
133 " spans.iter().for_each(|span| new_span += span);",
134 " self.stack_span.push(new_span);",
135];
136
137pub type SpanNbr = u16;
138
139fn count_span_nbr(opcode: &[OpCode]) -> SpanNbr {
140 let count = opcode.iter().filter(|op| op.has_span()).count();
141 count.try_into().expect(&format!("# span = {count} > {}", SpanNbr::MAX))
142}
143
144#[derive(Debug)]
145pub struct ParserGen {
146 parsing_table: LLParsingTable,
147 symbol_table: SymbolTable,
148 name: String,
149 nt_value: Vec<bool>,
150 nt_parent: Vec<Vec<VarId>>,
152 var_alts: Vec<Vec<AltId>>,
153 origin: Origin<VarId, FromPRS>,
154 item_ops: HashMap<AltId, Vec<Symbol>>,
155 opcodes: Vec<Vec<OpCode>>,
156 gen_span_params: bool,
158 span_nbrs: Vec<SpanNbr>,
159 start: VarId,
160 nt_conversion: HashMap<VarId, NTConversion>,
161 headers: Vec<String>,
162 used_libs: StructLibs,
163 nt_type: HashMap<VarId, String>,
164 nt_extra_info: HashMap<VarId, (String, Vec<String>)>,
165 log: BufLog,
166 include_alts: bool,
167 lib_crate: LexigramCrate,
168}
169
170impl ParserGen {
171 pub fn build_from_tree(tree: RuleTreeSet<General>, name: String) -> Self {
174 let normalized = RuleTreeSet::<Normalized>::build_from(tree);
175 let lr_rules = ProdRuleSet::build_from(normalized);
176 Self::build_from_rules(lr_rules, name)
177 }
178
179 pub fn build_from_rules<T>(rules: ProdRuleSet<T>, name: String) -> Self where ProdRuleSet<LL1>: BuildFrom<ProdRuleSet<T>> {
184 let mut ll1_rules = ProdRuleSet::<LL1>::build_from(rules);
185 assert_eq!(ll1_rules.get_log().num_errors(), 0);
186 let parsing_table = ll1_rules.make_parsing_table(true);
187 let num_nt = ll1_rules.get_num_nt();
188 let start = ll1_rules.get_start().unwrap();
189 let mut var_alts = vec![vec![]; num_nt];
190 for (alt_id, (var_id, _)) in parsing_table.alts.iter().index() {
191 var_alts[*var_id as usize].push(alt_id);
192 }
193 let mut nt_parent: Vec<Vec<VarId>> = vec![vec![]; num_nt];
194 for var_id in 0..num_nt {
195 let top_var_id = parsing_table.get_top_parent(var_id as VarId) as usize;
196 nt_parent[top_var_id].push(var_id as VarId);
197 }
198 let ProdRuleSet { symbol_table, nt_conversion, origin, .. } = ll1_rules;
199 let mut builder = ParserGen {
200 parsing_table,
201 symbol_table: symbol_table.expect(stringify!("symbol table is required to create a {}", std::any::type_name::<Self>())),
202 name,
203 nt_value: vec![false; num_nt],
204 nt_parent,
205 var_alts,
206 origin,
207 item_ops: HashMap::new(),
208 opcodes: Vec::new(),
209 gen_span_params: false,
210 span_nbrs: Vec::new(),
211 start,
212 nt_conversion,
213 headers: Vec::new(),
214 used_libs: StructLibs::new(),
215 nt_type: HashMap::new(),
216 nt_extra_info: HashMap::new(),
217 log: ll1_rules.log,
218 include_alts: false,
219 lib_crate: LexigramCrate::Core,
220 };
221 builder.make_opcodes();
222 builder.make_span_nbrs();
223 builder
224 }
225
226 pub fn set_name(&mut self, name: String) {
227 self.name = name;
228 }
229
230 pub fn get_name(&self) -> &str {
231 &self.name
232 }
233
234 #[inline]
235 pub fn get_symbol_table(&self) -> Option<&SymbolTable> {
236 Some(&self.symbol_table)
237 }
238
239 #[inline]
240 pub fn get_parsing_table(&self) -> &LLParsingTable {
241 &self.parsing_table
242 }
243
244 #[inline]
245 pub fn add_header<T: Into<String>>(&mut self, header: T) {
246 self.headers.push(header.into());
247 }
248
249 #[inline]
250 pub fn extend_headers<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, headers: I) {
251 self.headers.extend(headers.into_iter().map(|s| s.into()));
252 }
253
254 #[inline]
255 pub fn add_lib<T: Into<String>>(&mut self, lib:T) {
256 self.used_libs.add(lib);
257 }
258
259 #[inline]
260 pub fn extend_libs<I: IntoIterator<Item=T>, T: Into<String>>(&mut self, libs: I) {
261 self.used_libs.extend(libs);
262 }
263
264 #[inline]
265 pub fn add_nt_type<T: Into<String>>(&mut self, org_var: VarId, var_type: T) {
268 let var = self.conv_nt(org_var).expect(&format!("var {org_var} doesn't exist"));
269 self.nt_type.insert(var, var_type.into());
270 }
271
272 #[inline]
273 pub fn get_nt_type(&self, v: VarId) -> &str {
274 self.nt_type.get(&v).unwrap().as_str()
275 }
276
277 #[inline]
278 pub fn get_nt_extra_info(&self, nt: VarId) -> Option<&(String, Vec<String>)> {
292 self.nt_extra_info.get(&nt)
293 }
294
295 #[inline]
296 pub fn set_nt_has_value(&mut self, v: VarId, has_value: bool) {
297 self.nt_value[v as usize] = has_value;
298 }
299
300 pub fn set_parents_have_value(&mut self) {
301 for v in 0..self.get_symbol_table().unwrap().get_num_nt() as VarId {
302 if self.get_nt_parent(v).is_none() {
303 self.set_nt_has_value(v, true);
304 } else {
305 }
306 }
307 }
308
309 pub fn set_gen_span_params(&mut self, gen_span_params: bool) {
311 self.gen_span_params = gen_span_params;
312 }
313
314 #[inline]
315 pub fn get_nt_parent(&self, v: VarId) -> Option<VarId> {
316 self.parsing_table.parent[v as usize]
317 }
318
319 pub fn set_include_alts(&mut self, include_alts: bool) {
322 self.include_alts = include_alts;
323 }
324
325 #[inline]
326 pub fn use_full_lib(&mut self, use_full_lib: bool) {
327 self.lib_crate = if use_full_lib { LexigramCrate::Full } else { LexigramCrate::Core };
328 }
329
330 #[inline]
331 pub fn set_crate(&mut self, lcrate: LexigramCrate) {
332 self.lib_crate = lcrate;
333 }
334
335 #[cfg(test)] fn get_original_alt_str(&self, a_id: AltId, symbol_table: Option<&SymbolTable>) -> Option<String> {
337 let (_var, f) = &self.parsing_table.alts[a_id as usize];
338 f.get_origin().and_then(|(o_v, o_id)| {
339 Some(format!(
340 "{} -> {}",
341 Symbol::NT(o_v).to_str(symbol_table),
342 grtree_to_str(self.origin.get_tree(o_v).unwrap(), Some(o_id), None, Some(o_v), symbol_table, false)
343 ))
344 })
345 }
346
347 fn conv_nt(&self, org_var: VarId) -> Option<VarId> {
353 match self.nt_conversion.get(&org_var) {
354 None => if (org_var as usize) < self.parsing_table.num_nt { Some(org_var) } else { None },
355 Some(NTConversion::MovedTo(new)) => Some(*new),
356 Some(NTConversion::Removed) => None
357 }
358 }
359
360 #[allow(unused)]
361 fn nt_has_all_flags(&self, var: VarId, flags: u32) -> bool {
362 self.parsing_table.flags[var as usize] & flags == flags
363 }
364
365 #[allow(unused)]
366 fn nt_has_any_flags(&self, var: VarId, flags: u32) -> bool {
367 self.parsing_table.flags[var as usize] & flags != 0
368 }
369
370 #[allow(unused)]
371 fn sym_has_flags(&self, s: &Symbol, flags: u32) -> bool {
372 if let Symbol::NT(nt) = s { self.nt_has_all_flags(*nt, flags) } else { false }
373 }
374
375 #[allow(unused)]
376 fn sym_has_value(&self, symbol: &Symbol) -> bool {
377 match symbol {
378 Symbol::T(t) => self.symbol_table.is_token_data(*t),
379 Symbol::NT(nt) => self.nt_value[*nt as usize],
380 _ => false
381 }
382 }
383
384 fn full_alt_components(&self, a_id: AltId, emphasis: Option<VarId>) -> (String, String) {
385 const VERBOSE: bool = false;
386 if VERBOSE { println!("full_alt_components(a_id = {a_id}):"); }
387 let &(mut v_a, ref alt) = &self.parsing_table.alts[a_id as usize];
388 while self.parsing_table.flags[v_a as usize] & ruleflag::CHILD_L_FACT != 0 {
389 v_a = *self.parsing_table.parent[v_a as usize].as_ref().unwrap();
390 }
391 let symtab = self.get_symbol_table();
392 if let Some(v_emph) = emphasis {
393 let parent_nt = self.parsing_table.get_top_parent(v_emph);
394 if let Some((t_emph, id_emph)) = self.origin.get(v_emph) {
395 return ((Symbol::NT(parent_nt).to_str(symtab)), grtree_to_str(t_emph, None, Some(id_emph), Some(parent_nt), symtab, true));
396 } else {
397 return (Symbol::NT(parent_nt).to_str(symtab), format!("<VAR {v_emph} NOT FOUND>"));
398 }
399 }
400 if let Some((vo, id)) = alt.get_origin() {
401 let t = self.origin.get_tree(vo).unwrap();
402 let flags = self.parsing_table.flags[v_a as usize];
403 if v_a != vo && flags & ruleflag::CHILD_REPEAT != 0 {
404 (
406 String::new(),
407 format!("`{}` {} in `{} -> {}`",
408 grtree_to_str(t, Some(id), None, Some(vo), symtab, true),
409 if flags & ruleflag::L_FORM != 0 { "iteration" } else { "item" },
410 Symbol::NT(vo).to_str(symtab),
411 grtree_to_str(t, None, Some(id), Some(vo), symtab, true))
412 )
413 } else {
414 let root = Some(id);
415 (Symbol::NT(vo).to_str(symtab), grtree_to_str(t, root, None, Some(vo), symtab, true))
416 }
417 } else {
418 (Symbol::NT(v_a).to_str(symtab), format!("<alt {a_id} NOT FOUND>"))
419 }
420 }
421
422 fn full_alt_str(&self, a_id: AltId, emphasis: Option<VarId>, quote: bool) -> String {
424 let (left, right) = self.full_alt_components(a_id, emphasis);
425 if left.is_empty() {
426 right
427 } else {
428 format!("{q}{left} -> {right}{q}", q = if quote { "`" } else { "" })
429 }
430 }
431
432 fn make_opcodes(&mut self) {
433 const VERBOSE: bool = false;
434 for (alt_id, (var_id, alt)) in self.parsing_table.alts.iter().index() {
435 if VERBOSE {
436 println!("{alt_id}: {}", alt.to_rule_str(*var_id, self.get_symbol_table(), 0));
437 }
438 let flags = self.parsing_table.flags[*var_id as usize];
439 let stack_sym = Symbol::NT(*var_id);
440 let mut new = self.parsing_table.alts[alt_id as usize].1.iter().filter(|s| !s.is_empty()).rev().cloned().to_vec();
441 if VERBOSE { println!(" - {}", new.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
442 let mut opcode = Vec::<OpCode>::new();
443 let mut parent = self.parsing_table.parent[*var_id as usize];
444 if flags & ruleflag::CHILD_L_FACT != 0 {
445 while self.nt_has_all_flags(parent.unwrap(), ruleflag::CHILD_L_FACT) {
446 parent = self.parsing_table.parent[parent.unwrap() as usize];
447 }
448 let parent = parent.unwrap();
449 let parent_r_form_right_rec = self.parsing_table.flags[parent as usize] & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0;
453 if VERBOSE {
454 println!(" - child lfact, parent: {}, !parent_r_form_right_rec = !{parent_r_form_right_rec}, match = {}",
455 Symbol::NT(parent).to_str(self.get_symbol_table()),
456 new.get(0) == Some(&Symbol::NT(parent)));
457 }
458 if new.get(0) == Some(&Symbol::NT(parent)) && !parent_r_form_right_rec {
459 opcode.push(OpCode::Loop(parent));
460 new.remove(0);
461 }
462 }
463 let parent_lrec_no_lfact = flags & (ruleflag::PARENT_L_RECURSION | ruleflag::PARENT_L_FACTOR) == ruleflag::PARENT_L_RECURSION;
464 if flags & ruleflag::PARENT_L_FACTOR == 0 ||
465 parent_lrec_no_lfact ||
466 new.iter().all(|s| if let Symbol::NT(ch) = s { !self.nt_has_all_flags(*ch, ruleflag::CHILD_L_FACT) } else { true })
467 {
468 opcode.push(OpCode::Exit(alt_id)); }
477 opcode.extend(new.into_iter().map(|s| OpCode::from(s)));
478 let r_form_right_rec = flags & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0;
479 if VERBOSE { println!(" - r_form_right_rec = {r_form_right_rec} = {} || {}",
480 flags & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0,
481 flags & ruleflag::CHILD_L_FACT != 0 && self.parsing_table.flags[parent.unwrap() as usize] & ruleflag::R_RECURSION != 0 && flags & ruleflag::L_FORM == 0); }
482 if opcode.get(1).map(|op| op.matches(stack_sym)).unwrap_or(false) && !r_form_right_rec {
483 opcode.swap(0, 1);
486 if VERBOSE { println!(" - swap 0, 1: {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
487 } else if parent_lrec_no_lfact {
488 if let Some(OpCode::NT(x)) = opcode.get(1) {
489 if self.nt_has_all_flags(*x, ruleflag::CHILD_L_RECURSION) {
490 opcode.swap(0, 1);
493 if VERBOSE { println!(" - swap 0, 1: {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
494 }
495 }
496 } else if flags & ruleflag::CHILD_INDEPENDENT_AMBIGUITY != 0 && opcode.len() > 1 {
497 if let Some(OpCode::NT(var_prime)) = opcode.get(1) {
499 let vp = *var_prime; if self.nt_has_all_flags(vp, ruleflag::CHILD_AMBIGUITY) {
501 opcode.swap(0, 1);
502 opcode[0] = OpCode::Loop(vp);
503 if VERBOSE { println!(" - child indep ambig: {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
504 }
505 }
506 }
507 if flags & ruleflag::CHILD_L_FACT != 0 {
508 if opcode.len() >= 2 {
509 if self.nt_has_all_flags(parent.unwrap(), ruleflag::R_RECURSION | ruleflag::L_FORM) {
510 if opcode[1] == OpCode::NT(parent.unwrap()) {
511 opcode.swap(0, 1);
512 opcode[0] = OpCode::Loop(parent.unwrap());
513 }
514 }
515 let fact_top = self.parsing_table.get_top_parent(*var_id);
516 if VERBOSE {
517 println!(" - check for initial exit swap: opcode = [{}], daddy = {}",
518 opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
519 Symbol::NT(fact_top).to_str(self.get_symbol_table()));
520 }
521 if self.parsing_table.flags[fact_top as usize] & ruleflag::PARENT_L_RECURSION != 0 &&
522 matches!(opcode[0], OpCode::Exit(_)) &&
523 matches!(opcode[1], OpCode::NT(v) if self.parsing_table.flags[v as usize] & ruleflag::CHILD_L_RECURSION != 0)
524 {
525 if VERBOSE {
526 println!(" swapping for initial exit_{}: {} <-> {}",
527 Symbol::NT(fact_top).to_str(self.get_symbol_table()).to_lowercase(),
528 opcode[0].to_str(self.get_symbol_table()),
529 opcode[1].to_str(self.get_symbol_table())
530 );
531 }
532 opcode.swap(0, 1);
533 }
534 }
535 }
536 opcode.iter_mut().for_each(|o| {
537 if let OpCode::NT(v) = o {
538 if v == var_id && !r_form_right_rec {
541 *o = OpCode::Loop(*v)
542 }
543 }
544 });
545 if VERBOSE { println!(" -> {}", opcode.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")); }
546 self.opcodes.push(opcode);
547 }
548 }
549
550 fn make_span_nbrs(&mut self) {
551 let mut span_nbrs = vec![0 as SpanNbr; self.parsing_table.alts.len()];
552 for (alt_id, (var_id, _)) in self.parsing_table.alts.iter().enumerate() {
553 let opcode = &self.opcodes[alt_id];
554 let mut span_nbr = span_nbrs[alt_id] + count_span_nbr(&opcode);
555 if self.nt_has_any_flags(*var_id, ruleflag::CHILD_REPEAT | ruleflag::CHILD_L_RECURSION) ||
556 self.nt_has_all_flags(*var_id, ruleflag::R_RECURSION | ruleflag::L_FORM) {
557 span_nbr += 1;
559 }
560 if matches!(opcode.get(0), Some(OpCode::NT(nt)) if nt != var_id && self.parsing_table.flags[*nt as usize] & ruleflag::CHILD_L_RECURSION != 0) {
561 span_nbr -= 1;
563 }
564 if self.nt_has_all_flags(*var_id, ruleflag::PARENT_L_FACTOR) {
568 if let Some(OpCode::NT(nt)) = opcode.get(0) {
569 span_nbr -= 1;
570 for a_id in self.var_alts[*nt as usize].iter() {
571 span_nbrs[*a_id as usize] += span_nbr;
572 }
574 span_nbr = 0;
576 }
577 }
578 span_nbrs[alt_id] = span_nbr;
579 }
580 self.span_nbrs = span_nbrs;
581 }
582
583 fn get_group_alts(&self, g: &Vec<VarId>) -> Vec<(VarId, AltId)> {
584 g.iter().flat_map(|c|
585 self.var_alts[*c as usize].iter().map(|a| (*c, *a))
586 ).collect::<Vec<_>>()
587 }
588
589 fn gather_alts(&self, nt: VarId) -> Vec<AltId> {
593 const VERBOSE: bool = false;
594 let mut alt = vec![];
595 let mut explore = VecDeque::<VarId>::new();
596 explore.push_back(nt);
597 while !explore.is_empty() {
598 let var = explore.pop_front().unwrap();
599 if VERBOSE { println!("{var}: alt = {} | explore = {} | alts: {}",
600 alt.iter().join(", "), explore.iter().join(", "),
601 &self.var_alts[var as usize].iter().join(", ")); }
602 for a in &self.var_alts[var as usize] {
603 let (_, alter) = &self.parsing_table.alts[*a as usize];
604 if let Some(Symbol::NT(last)) = alter.symbols().last() {
605 if self.nt_has_all_flags(*last, ruleflag::CHILD_L_FACT) {
606 explore.push_back(*last);
608 continue;
609 }
610 }
611 alt.push(*a);
612 }
613 if VERBOSE { println!(" => alt = {} | explore = {}", alt.iter().join(", "), explore.iter().join(", ")); }
614 }
615 alt
616 }
617
618 pub(crate) fn make_item_ops(&mut self) {
619 const VERBOSE: bool = false;
620 let info = &self.parsing_table;
621 let mut items = HashMap::<AltId, Vec<Symbol>>::new();
622 if VERBOSE {
623 println!("Groups:");
624 for g in self.nt_parent.iter().filter(|va| !va.is_empty()) {
625 let group = self.get_group_alts(g);
626 let ids = group.iter().map(|(v, _)| *v).collect::<BTreeSet<VarId>>();
627 println!("{}: {}, alts {}",
628 Symbol::NT(g[0]).to_str(self.get_symbol_table()),
629 ids.iter().map(|v| Symbol::NT(*v).to_str(self.get_symbol_table())).join(", "),
630 group.iter().map(|(_, a)| a.to_string()).join(", ")
631 );
632 }
633 }
634 for g in self.nt_parent.iter().filter(|va| !va.is_empty()) {
636 let group = self.get_group_alts(g);
638 let mut change = true;
639 let g_top = g[0];
640 let is_ambig = self.nt_has_all_flags(g_top, ruleflag::PARENT_AMBIGUITY);
641 while change {
642 change = false;
643 let mut nt_used = HashSet::<VarId>::new();
644 if VERBOSE {
645 let ids = group.iter().map(|(v, _)| *v).collect::<BTreeSet<VarId>>();
646 println!("parent: {}, NT with value: {}",
647 Symbol::NT(g[0]).to_str(self.get_symbol_table()),
648 ids.into_iter().filter_map(|v|
649 if self.nt_value[v as usize] { Some(Symbol::NT(v as VarId).to_str(self.get_symbol_table())) } else { None }
650 ).join(", "));
651 }
652 for (nt, alt_id) in &group {
653 let ambig_loop = is_ambig && self.nt_has_all_flags(*nt, ruleflag::CHILD_L_RECURSION);
654 items.insert(*alt_id, if ambig_loop { vec![Symbol::NT(g_top)] } else { vec![] });
655 }
656 for (var_id, alt_id) in &group {
657 let opcode = &self.opcodes[*alt_id as usize];
658 let (_, alt) = &info.alts[*alt_id as usize];
659 if VERBOSE {
660 print!("- {alt_id}: {} -> {} [{}]",
661 Symbol::NT(*var_id).to_str(self.get_symbol_table()),
662 alt.to_str(self.get_symbol_table()),
663 opcode.iter().map(|op| op.to_str(self.get_symbol_table())).join(" "));
664 }
665 let flags = info.flags[*var_id as usize];
666
667 let mut values = self.opcodes[*alt_id as usize].iter().rev()
671 .filter_map(|s| {
672 let sym_maybe = match s {
673 OpCode::T(t) => Some(Symbol::T(*t)),
674 OpCode::NT(nt) => {
675 let is_ambig_top = is_ambig && self.get_nt_parent(*nt) == Some(g_top)
676 && !self.nt_has_any_flags(*nt, ruleflag::CHILD_L_RECURSION | ruleflag::CHILD_REPEAT);
677 let var = if is_ambig_top { g_top } else { *nt };
678 nt_used.insert(var);
679 Some(Symbol::NT(var))
680 },
681 _ => {
682 if VERBOSE { print!(" | {} dropped", s.to_str(self.get_symbol_table())); }
683 None
684 }
685 };
686 sym_maybe.and_then(|s| if self.sym_has_value(&s) { Some(s) } else { None })
687 }).to_vec();
688 if !values.is_empty() && self.parsing_table.parent[*var_id as usize].is_some() {
690 let mut top_nt = *var_id as usize;
691 while self.parsing_table.flags[top_nt] & ruleflag::CHILD_REPEAT == 0 {
692 if let Some(parent) = self.parsing_table.parent[top_nt] {
693 top_nt = parent as usize;
694 } else {
695 break;
696 }
697 }
698 if self.parsing_table.flags[top_nt] & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT {
701 if VERBOSE && !self.nt_value[top_nt] {
702 print!(" | {} is now valued {}",
703 Symbol::NT(top_nt as VarId).to_str(self.get_symbol_table()),
704 if nt_used.contains(&(top_nt as VarId)) { "and was used before" } else { "but wasn't used before" }
705 );
706 }
707 change |= !self.nt_value[top_nt] && nt_used.contains(&(top_nt as VarId));
708 self.nt_value[top_nt] = true;
709 }
710 }
711 if change {
712 if VERBOSE { println!("\nnt_value changed, redoing this group"); }
714 break;
715 }
716 let parent_is_rrec_lfact = !is_ambig && self.nt_has_all_flags(g[0], ruleflag::R_RECURSION | ruleflag::PARENT_L_FACTOR);
718 if parent_is_rrec_lfact {
719 if flags & ruleflag::CHILD_L_FACT != 0 && self.nt_has_all_flags(g[0], ruleflag::L_FORM) {
720 assert!(!self.nt_has_all_flags(*var_id, ruleflag::CHILD_L_FACT | ruleflag::L_FORM), "this was useful after all");
721 if VERBOSE { print!(" child_rrec_lform_lfact"); }
722 items.get_mut(&alt_id).unwrap().insert(0, Symbol::NT(g[0]));
723 }
724 } else {
725 let sym_maybe = if flags & ruleflag::CHILD_REPEAT != 0 && (self.nt_value[*var_id as usize] || flags & ruleflag::L_FORM != 0) {
726 Some(Symbol::NT(*var_id))
727 } else if !is_ambig && flags & ruleflag::CHILD_L_RECURSION != 0 {
728 let parent = info.parent[*var_id as usize].unwrap();
729 Some(Symbol::NT(parent))
730 } else if !is_ambig && flags & (ruleflag::R_RECURSION | ruleflag::L_FORM) == ruleflag::R_RECURSION | ruleflag::L_FORM {
731 Some(Symbol::NT(*var_id))
732 } else {
733 None
734 };
735 if let Some(s) = sym_maybe {
736 if self.sym_has_value(&s) {
737 if VERBOSE { print!(" | loop => {}", s.to_str(self.get_symbol_table())); }
738 values.insert(0, s);
739 }
740 }
741 }
742 if VERBOSE {
743 println!(" ==> [{}] + [{}]",
744 items.get(&alt_id).map(|v| v.iter().map(|s| s.to_str(self.get_symbol_table())).join(" ")).unwrap_or(String::new()),
745 values.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "));
746 }
747 if let Some(OpCode::NT(nt)) = opcode.get(0) {
748 let backup = if matches!(values.last(), Some(Symbol::NT(x)) if x == nt) {
750 Some(values.pop().unwrap())
751 } else {
752 None
753 };
754 if nt != var_id && self.nt_has_all_flags(*nt, ruleflag::CHILD_L_RECURSION) {
755 if VERBOSE { println!(" CHILD_L_RECURSION"); }
756 items.get_mut(&alt_id).unwrap().extend(values);
758 continue;
759 }
760 if flags & ruleflag::PARENT_L_FACTOR != 0 {
761 if VERBOSE {
762 println!(" PARENT_L_FACTOR: moving {} to child {}",
763 values.iter().map(|s| s.to_str(self.get_symbol_table())).join(" "),
764 Symbol::NT(*nt).to_str(self.get_symbol_table()));
765 }
766 if let Some(pre) = items.get_mut(&alt_id) {
768 values.splice(0..0, std::mem::take(pre));
770 }
771 for a_id in self.var_alts[*nt as usize].iter() {
772 items.get_mut(a_id).unwrap().extend(values.clone());
773 }
774 continue;
775 }
776 if let Some(sym) = backup {
777 values.push(sym);
778 }
779 }
780 items.get_mut(&alt_id).unwrap().extend(values);
781 }
782 }
783 }
784 self.item_ops = items;
785 }
786
787 fn sort_alt_ids(&self, top_nt: VarId, alts: &[AltId]) -> Vec<AltId> {
788 const VERBOSE: bool = false;
789 if VERBOSE {
790 println!(" sorting {} alts {alts:?}", Symbol::NT(top_nt).to_str(self.get_symbol_table()));
791 for &a_id in alts {
792 let &(_nt, ref alt) = &self.parsing_table.alts[a_id as usize];
793 if let Some((v, id)) = alt.origin {
794 let tree = &self.origin.trees[v as usize];
795 println!(" [{a_id}] id = {},{id} -> {} <-> {}",
796 Symbol::NT(v).to_str(self.get_symbol_table()),
797 crate::grammar::grtree_to_str_ansi(tree, None, Some(id), Some(v), self.get_symbol_table(), false),
798 tree.to_str_index(None, self.get_symbol_table())
799 );
800 assert_eq!(v, top_nt, "v = {}, top_nt = {}", Symbol::NT(v).to_str(self.get_symbol_table()), Symbol::NT(top_nt).to_str(self.get_symbol_table()));
801 }
802 }
803 }
804 let mut sorted = vec![];
805 let mut ids = alts.iter().filter_map(|&alt_id| self.parsing_table.alts[alt_id as usize].1.origin.map(|(_var, id)| (id, alt_id)))
806 .collect::<HashMap<_, _>>();
807 let tree = &self.origin.trees[top_nt as usize];
808 for node in tree.iter_depth() {
809 if let Some((_, alt_id)) = ids.remove_entry(&node.index) {
810 sorted.push(alt_id);
811 }
812 }
813 if VERBOSE { println!(" -> {sorted:?}"); }
814 sorted
815 }
816
817 fn get_type_info(&self) -> (
874 Vec<(String, String, String)>,
875 Vec<Option<(VarId, String)>>,
876 Vec<Vec<ItemInfo>>,
877 HashMap<VarId, Vec<AltId>>
878 ) {
879 const VERBOSE: bool = false;
880
881 let pinfo = &self.parsing_table;
882 let mut nt_upper_fixer = NameFixer::new();
883 let mut nt_lower_fixer = NameFixer::new();
884 let mut nt_plower_fixer = NameFixer::new_empty(); let nt_name: Vec<(String, String, String)> = (0..pinfo.num_nt).map(|v| {
886 let name = self.symbol_table.get_nt_name(v as VarId);
887 let nu = nt_upper_fixer.get_unique_name(name.to_camelcase());
888 let nl = nt_lower_fixer.get_unique_name(nu.to_underscore_lowercase());
889 let npl = nt_plower_fixer.get_unique_name(nu.to_underscore_lowercase());
890 (nu, nl, npl)
891 }).to_vec();
892
893 let mut alt_info: Vec<Option<(VarId, String)>> = vec![None; pinfo.alts.len()];
894 let mut nt_repeat = HashMap::<VarId, Vec<ItemInfo>>::new();
895 let mut item_info: Vec<Vec<ItemInfo>> = vec![vec![]; pinfo.alts.len()];
896 let mut child_repeat_endpoints = HashMap::<VarId, Vec<AltId>>::new();
897 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
898 let is_ambig = self.nt_has_any_flags(group[0], ruleflag::PARENT_AMBIGUITY);
899 let mut is_ambig_1st_child = is_ambig;
900 let mut alt_info_to_sort = HashMap::<VarId, Vec<AltId>>::new();
901 for var in group {
902 let nt = *var as usize;
903 let nt_flags = pinfo.flags[nt];
904 if is_ambig && (nt_flags & ruleflag::PARENT_L_RECURSION != 0 || (nt_flags & ruleflag::CHILD_L_RECURSION != 0 && !is_ambig_1st_child)) {
905 continue;
906 }
907 if nt_flags & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT {
908 let is_plus = nt_flags & ruleflag::REPEAT_PLUS != 0;
911 let mut endpoints = self.gather_alts(*var);
912 if VERBOSE { println!("** {} endpoints: {endpoints:?} ", Symbol::NT(*var).to_str(self.get_symbol_table())); }
913 if is_plus {
914 endpoints = endpoints.chunks(2).map(|slice| slice[0]).to_vec();
917 } else {
918 endpoints.retain(|e| !pinfo.alts[*e as usize].1.is_sym_empty());
921 }
922 assert!(!endpoints.is_empty());
923 let endpoints = self.sort_alt_ids(group[0], &endpoints);
924 child_repeat_endpoints.insert(*var, endpoints);
925 }
926 for &alt_id in &self.var_alts[nt] {
927 let i = alt_id as usize;
928 if is_ambig_1st_child && pinfo.alts[i].1.is_sym_empty() {
929 continue;
930 }
931 item_info[i] = if let Some(item_ops) = self.item_ops.get(&alt_id) {
932 let mut indices = HashMap::<Symbol, (String, Option<usize>)>::new();
937 let mut fixer = NameFixer::new();
938 let mut owner = pinfo.alts[i].0;
939 while let Some(parent) = pinfo.parent[owner as usize] {
940 if pinfo.flags[owner as usize] & ruleflag::CHILD_REPEAT != 0 {
941 break;
946 }
947 owner = parent;
948 }
949 let is_nt_repeat = pinfo.flags[owner as usize] & ruleflag::CHILD_REPEAT != 0;
950 for s in item_ops {
951 if let Some((_, c)) = indices.get_mut(s) {
952 *c = Some(0);
953 } else {
954 let name = if let Symbol::NT(vs) = s {
955 let flag = pinfo.flags[*vs as usize];
956 if flag & ruleflag::CHILD_REPEAT != 0 {
957 let inside_alt_id = self.var_alts[*vs as usize][0];
958 let inside_alt = &pinfo.alts[inside_alt_id as usize].1;
959 if false {
960 let mut plus_name = inside_alt.symbols()[0].to_str(self.get_symbol_table()).to_underscore_lowercase();
962 plus_name.push_str(if flag & ruleflag::REPEAT_PLUS != 0 { "_plus" } else { "_star" });
963 plus_name
964 } else {
965 if is_nt_repeat && indices.is_empty() {
966 if flag & ruleflag::REPEAT_PLUS != 0 { "plus_acc".to_string() } else { "star_acc".to_string() }
968 } else {
969 if flag & ruleflag::REPEAT_PLUS != 0 { "plus".to_string() } else { "star".to_string() }
971 }
972 }
973 } else {
974 nt_name[*vs as usize].clone().1
975 }
976 } else {
977 s.to_str(self.get_symbol_table()).to_lowercase()
978 };
979 indices.insert(*s, (fixer.get_unique_name(name), None));
980 }
981 }
982
983 let has_lfact_child = nt_flags & ruleflag::PARENT_L_FACTOR != 0 &&
986 pinfo.alts[i].1.symbols().iter().any(|s| matches!(s, &Symbol::NT(c) if pinfo.flags[c as usize] & ruleflag::CHILD_L_FACT != 0));
987
988 let is_hidden_repeat_child = pinfo.flags[owner as usize] & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT;
991
992 let is_alt_sym_empty = self.is_alt_sym_empty(alt_id);
994
995 let is_duplicate = i > 0 && self.nt_has_all_flags(owner, ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) &&
998 is_alt_sym_empty;
999 let is_last_empty_iteration = (nt_flags & ruleflag::CHILD_L_RECURSION != 0
1003 || self.nt_has_all_flags(*var, ruleflag::CHILD_REPEAT | ruleflag::L_FORM)) && is_alt_sym_empty;
1004
1005 let is_rep_child_no_lform = is_nt_repeat && pinfo.flags[owner as usize] & ruleflag::L_FORM == 0;
1006
1007 let has_context = !has_lfact_child && !is_hidden_repeat_child && !is_duplicate && !is_last_empty_iteration;
1008 if VERBOSE {
1009 println!("NT {nt}, alt {alt_id}: has_lfact_child = {has_lfact_child}, is_hidden_repeat_child = {is_hidden_repeat_child}, \
1010 is_duplicate = {is_duplicate}, is_last_empty_iteration = {is_last_empty_iteration} => has_context = {has_context}");
1011 }
1012 if has_context {
1013 alt_info_to_sort.entry(owner)
1014 .and_modify(|v| v.push(alt_id))
1015 .or_insert_with(|| vec![alt_id]);
1016 }
1017 if item_ops.is_empty() && nt_flags & ruleflag::CHILD_L_RECURSION != 0 {
1018 if self.nt_value[owner as usize] {
1020 vec![ItemInfo {
1021 name: nt_name[owner as usize].1.clone(),
1022 sym: Symbol::NT(owner),
1023 owner,
1024 index: None,
1025 }]
1026 } else {
1027 vec![]
1028 }
1029 } else {
1030 let skip = if is_rep_child_no_lform { 1 } else { 0 };
1031 let mut infos = item_ops.into_iter()
1032 .skip(skip)
1033 .map(|s| {
1034 let index = if let Some((_, Some(index))) = indices.get_mut(s) {
1035 let idx = *index;
1036 *index += 1;
1037 Some(idx)
1038 } else {
1039 None
1040 };
1041 ItemInfo {
1042 name: indices[&s].0.clone(),
1043 sym: s.clone(),
1044 owner,
1045 index,
1046 }
1047 }).to_vec();
1048 if self.nt_has_all_flags(owner, ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) {
1049 let last_name = fixer.get_unique_name("last_iteration".to_string());
1051 infos.push(ItemInfo {
1052 name: last_name,
1053 sym: Symbol::Empty, owner,
1055 index: None,
1056 });
1057 };
1058 if is_nt_repeat && infos.len() > 0 && !nt_repeat.contains_key(&owner) {
1059 nt_repeat.insert(owner, infos.clone());
1060 }
1061 infos
1062 }
1063 } else {
1064 vec![]
1065 };
1066 } if is_ambig && nt_flags & ruleflag::CHILD_L_RECURSION != 0 {
1068 is_ambig_1st_child = false;
1069 }
1070 } if VERBOSE { println!("alt_info_to_sort = {alt_info_to_sort:?}"); }
1072 for (owner, alts) in alt_info_to_sort {
1073 for (num, alt) in self.sort_alt_ids(group[0], &alts).into_iter().index_start(1) {
1074 alt_info[alt as usize] = Some((owner, format!("V{num}")));
1075 }
1076 }
1077 } if VERBOSE {
1080 println!("NT names: {}", nt_name.iter()
1081 .map(|(u, l, pl)| format!("{u}/{l}/{pl}"))
1082 .join(", "));
1083 println!("alt info:");
1084 for (alt_id, alt_names) in alt_info.iter().enumerate() {
1085 if let Some((v, name)) = alt_names {
1086 println!("- alt {alt_id}, NT {v} {}, Ctx name: {name}", Symbol::NT(*v).to_str(self.get_symbol_table()));
1087 }
1088 }
1089 println!();
1090 println!("nt_name: {nt_name:?}");
1091 println!("alt_info: {alt_info:?}");
1092 println!("item_info:");
1093 for (i, item) in item_info.iter().enumerate().filter(|(_, item)| !item.is_empty()) {
1094 println!("- {i}: {{ {} }}", item.iter()
1095 .map(|ii| format!("{}{} ({})", ii.name, ii.index.map(|i| format!("[{i}]")).unwrap_or(String::new()), ii.sym.to_str(self.get_symbol_table())))
1096 .join(", "));
1097 }
1098 println!("item_info: {item_info:?}");
1099 println!("child_repeat_endpoints: {child_repeat_endpoints:?}");
1100 }
1101 (nt_name, alt_info, item_info, child_repeat_endpoints)
1102 }
1103
1104 pub fn gen_source_code(&mut self, indent: usize, wrapper: bool) -> String {
1110 let mut parts = vec![];
1111 if !self.headers.is_empty() {
1112 parts.push(self.headers.clone());
1113 }
1114 let mut tmp_parts = vec![self.source_build_parser()];
1115 if wrapper {
1116 self.make_item_ops();
1117 tmp_parts.push(self.source_wrapper());
1118 }
1119 parts.push(self.source_use());
1120 parts.extend(tmp_parts);
1121 indent_source(parts, indent)
1123 }
1124
1125 pub fn try_gen_source_code(mut self, indent: usize, wrapper: bool) -> Result<(BufLog, String), BuildError> {
1126 let src = self.gen_source_code(indent, wrapper);
1127 if self.log.has_no_errors() {
1128 Ok((self.give_log(), src))
1129 } else {
1130 Err(BuildError::new(self.give_log(), BuildErrorSource::ParserGen))
1131 }
1132 }
1133
1134 fn source_use(&self) -> Vec<String> {
1135 self.used_libs.gen_source_code()
1136 }
1137
1138 fn source_build_parser(&mut self) -> Vec<String> {
1139 static BASE_PARSER_LIBS: [&str; 5] = [
1140 "::VarId",
1141 "::AltId",
1142 "::parser::OpCode",
1143 "::parser::Parser",
1144 "::fixed_sym_table::FixedSymTable",
1145 ];
1146 static ALT_PARSER_LIBS: [&str; 2] = [
1147 "::alt::Alternative",
1148 "::parser::Symbol",
1149 ];
1150 self.log.add_note("generating build_parser() source...");
1151 let num_nt = self.symbol_table.get_num_nt();
1152 let num_t = self.symbol_table.get_num_t();
1153 self.used_libs.extend(BASE_PARSER_LIBS.into_iter().map(|s| format!("{}{s}", self.lib_crate)));
1154 self.log.add_note(format!("- creating symbol tables: {num_t} terminals, {num_nt} nonterminals"));
1155 let mut src = vec![
1156 format!("const PARSER_NUM_T: usize = {num_t};"),
1157 format!("const PARSER_NUM_NT: usize = {num_nt};"),
1158 format!("static SYMBOLS_T: [(&str, Option<&str>); PARSER_NUM_T] = [{}];",
1159 self.symbol_table.get_terminals().map(|(s, os)|
1160 format!("(\"{s}\", {})", os.as_ref().map(|s| format!("Some(\"{s}\")")).unwrap_or("None".to_string()))).join(", ")),
1161 format!("static SYMBOLS_NT: [&str; PARSER_NUM_NT] = [{}];",
1162 self.symbol_table.get_nonterminals().map(|s| format!("\"{s}\"")).join(", ")),
1163 format!("static ALT_VAR: [VarId; {}] = [{}];",
1164 self.parsing_table.alts.len(),
1165 self.parsing_table.alts.iter().map(|(v, _)| format!("{v}")).join(", ")),
1166 ];
1167 if self.include_alts {
1168 self.used_libs.extend(ALT_PARSER_LIBS.into_iter().map(|s| format!("{}{s}", self.lib_crate)));
1169 src.push(format!("static ALTERNATIVES: [&[Symbol]; {}] = [{}];",
1170 self.parsing_table.alts.len(),
1171 self.parsing_table.alts.iter().map(|(_, f)| format!("&[{}]", f.iter().map(|s| symbol_to_code(s)).join(", "))).join(", ")));
1172 }
1173 self.log.add_note(format!("- creating parsing tables: {} items, {} opcodes", self.parsing_table.table.len(), self.opcodes.len()));
1174 src.extend(vec![
1175 format!("static PARSING_TABLE: [AltId; {}] = [{}];",
1176 self.parsing_table.table.len(),
1177 self.parsing_table.table.iter().map(|v| format!("{v}")).join(", ")),
1178 format!("static OPCODES: [&[OpCode]; {}] = [{}];", self.opcodes.len(),
1179 self.opcodes.iter().map(|strip| format!("&[{}]", strip.into_iter().map(|op| format!("OpCode::{op:?}")).join(", "))).join(", ")),
1180 format!("static START_SYMBOL: VarId = {};\n", self.start),
1181
1182 format!("pub fn build_parser() -> Parser<'static> {{"),
1183 format!(" let symbol_table = FixedSymTable::new("),
1184 format!(" SYMBOLS_T.into_iter().map(|(s, os)| (s.to_string(), os.map(|s| s.to_string()))).collect(),"),
1185 format!(" SYMBOLS_NT.into_iter().map(|s| s.to_string()).collect()"),
1186 format!(" );"),
1187 format!(" Parser::new("),
1188 format!(" PARSER_NUM_NT, PARSER_NUM_T + 1,"),
1189 format!(" &ALT_VAR,"),
1190 if self.include_alts {
1191 format!(" ALTERNATIVES.into_iter().map(|s| Alternative::new(s.to_vec())).collect(),")
1192 } else {
1193 format!(" Vec::new(),")
1194 },
1195 format!(" OPCODES.into_iter().map(|strip| strip.to_vec()).collect(),"),
1196 format!(" &PARSING_TABLE,"),
1197 format!(" symbol_table,"),
1198 format!(" START_SYMBOL"),
1199 format!(" )"),
1200 format!("}}"),
1201 ]);
1202 src
1203 }
1204
1205 fn get_info_type(&self, infos: &Vec<ItemInfo>, info: &ItemInfo) -> String {
1206 let type_name_base = match info.sym {
1207 Symbol::T(_) => "String".to_string(),
1208 Symbol::NT(vs) => self.get_nt_type(vs).to_string(),
1209 Symbol::Empty => "bool".to_string(),
1210 _ => panic!("unexpected symbol {}", info.sym)
1211 };
1212 if info.index.is_some() {
1213 let nbr = infos.iter()
1214 .map(|nfo| if nfo.sym == info.sym { nfo.index.unwrap() } else { 0 })
1215 .max().unwrap() + 1;
1216 format!("[{type_name_base}; {nbr}]")
1217 } else {
1218 type_name_base
1219 }
1220 }
1221
1222 fn source_infos(&self, infos: &Vec<ItemInfo>, add_pub: bool) -> String {
1224 let pub_str = if add_pub { "pub " } else { "" };
1225 infos.iter().filter_map(|info| {
1226 if info.index.is_none() || info.index == Some(0) {
1227 let type_name = self.get_info_type(&infos, &info);
1228 Some(format!("{pub_str}{}: {}", info.name, type_name))
1229 } else {
1230 None
1231 }
1232 }).join(", ")
1233 }
1234
1235 fn is_alt_sym_empty(&self, a_id: AltId) -> bool {
1236 self.parsing_table.alts[a_id as usize].1.is_sym_empty()
1237 }
1238
1239 fn make_match_choices(&self, alts: &[AltId], name: &str, flags: u32, no_method: bool, force_id: Option<AltId>) -> (bool, Vec<String>) {
1241 assert!(!alts.is_empty(), "alts cannot be empty");
1242 let discarded = if !no_method && flags & (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT { 1 } else { 0 };
1247
1248 let is_plus_no_lform = flags & (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS | ruleflag::L_FORM) == (ruleflag::CHILD_REPEAT | ruleflag::REPEAT_PLUS);
1252 let is_alt_id_threshold = if is_plus_no_lform { 2 } else { 1 };
1253 let is_alt_id = force_id.is_none() && alts.len() - discarded > is_alt_id_threshold;
1254
1255 let mut choices = Vec::<String>::new();
1256 let force_id_str = force_id.map(|f| f.to_string()).unwrap_or_default();
1257 if alts.len() - discarded == 1 {
1258 if no_method {
1259 choices.push(format!(" {} => {{}}", alts[0]));
1260 } else {
1261 choices.push(format!(" {} => self.{name}({force_id_str}),", alts[0]));
1262 }
1263 } else {
1264 let last = alts.len() - 1 - discarded;
1265 choices.extend((0..last).map(|i| format!(" {} |", alts[i])));
1266 if no_method {
1267 choices.push(format!(" {} => {{}}", alts[last]));
1268 } else {
1269 choices.push(format!(" {} => self.{name}({}{force_id_str}),",
1270 alts[last],
1271 if is_alt_id { "alt_id" } else { "" }));
1272 }
1273 }
1274 if discarded == 1 {
1275 choices.push(format!(" {} => {{}}", alts.last().unwrap()));
1276 }
1277 (is_alt_id, choices)
1278 }
1279
1280 fn gen_match_item<F: FnOnce() -> String>(&self, common: String, span_only: F) -> String {
1284 if self.gen_span_params {
1285 let span_code = span_only();
1286 format!("({span_code}, {common})")
1287 } else {
1288 common
1289 }
1290 }
1291
1292 fn source_wrapper(&mut self) -> Vec<String> {
1294 const VERBOSE: bool = false;
1295 const MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS: bool = false;
1296
1297 static PARSER_LIBS: [&str; 5] = [
1298 "::VarId", "::parser::Call", "::parser::ListenerWrapper",
1299 "::AltId", "::log::Logger",
1300 ];
1301
1302 let mut log = std::mem::take(&mut self.log); log.add_note("generating wrapper source...");
1306 self.used_libs.extend(PARSER_LIBS.into_iter().map(|s| format!("{}{s}", self.lib_crate)));
1307 if self.gen_span_params {
1308 self.used_libs.add(format!("{}::lexer::PosSpan", self.lib_crate));
1309 }
1310
1311 let (nt_name, alt_info, item_info, child_repeat_endpoints) = self.get_type_info();
1312 let pinfo = &self.parsing_table;
1313
1314 let mut src = vec![];
1315
1316 for (v, name) in nt_name.iter().enumerate().filter(|(v, _)| self.nt_value[*v]) {
1318 let v = v as VarId;
1319 if !self.nt_type.contains_key(&v) {
1320 self.nt_type.insert(v, format!("Syn{}", name.0));
1321 }
1322 }
1323
1324 log.add_note(format!("- Contexts used in {}Listener trait:", self.name));
1326 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
1327 let mut group_names = HashMap::<VarId, Vec<AltId>>::new();
1328 for nt in group {
1330 for &alt_id in &self.var_alts[*nt as usize] {
1331 if let Some((owner, _name)) = &alt_info[alt_id as usize] {
1332 group_names.entry(*owner)
1333 .and_modify(|v| v.push(alt_id))
1334 .or_insert_with(|| vec![alt_id]);
1335 }
1336 }
1337 }
1338 if VERBOSE {
1339 println!("group {}", group.iter().map(|nt| Symbol::NT(*nt).to_str(self.get_symbol_table())).join(" "));
1340 }
1341 for &nt in group {
1342 if let Some(alts) = group_names.get(&nt) {
1343 if VERBOSE {
1344 if let Some(gn) = group_names.get(&nt) {
1345 println!("- {}: alts = {}", Symbol::NT(nt).to_str(self.get_symbol_table()), gn.iter().map(|a| a.to_string()).join(", "));
1346 let sorted = self.sort_alt_ids(group[0], gn);
1347 println!(" sorted alts: {sorted:?}");
1348 }
1349 }
1350 log.add_note(format!(" - Ctx{}:", nt_name[nt as usize].0));
1351 src.push(format!("#[derive(Debug)]"));
1352 src.push(format!("pub enum Ctx{} {{", nt_name[nt as usize].0));
1353 if VERBOSE { println!(" context Ctx{}:", nt_name[nt as usize].0); }
1354 for a_id in self.sort_alt_ids(group[0], alts) {
1355 let comment = self.full_alt_str(a_id, None, true);
1356 log.add_note(format!(" /// {comment}"));
1357 src.push(format!(" /// {comment}"));
1358 if VERBOSE { println!(" /// {comment}"); }
1359 let ctx_content = self.source_infos(&item_info[a_id as usize], false);
1360 let a_name = &alt_info[a_id as usize].as_ref().unwrap().1;
1361 let ctx_item = if ctx_content.is_empty() {
1362 if VERBOSE { println!(" {a_name},"); }
1363 format!(" {a_name},", )
1364 } else {
1365 if VERBOSE { println!(" {a_name} {{ {ctx_content} }},"); }
1366 format!(" {a_name} {{ {ctx_content} }},", )
1367 };
1368 log.add_note(ctx_item.clone());
1369 src.push(ctx_item);
1370 }
1371 src.push(format!("}}"));
1372 }
1373 }
1374 }
1375
1376 src.add_space();
1378 log.add_note("- NT types and user-defined type templates:");
1379 src.push("// NT types and user-defined type templates (copy elsewhere and uncomment when necessary):".to_string());
1380 src.add_space();
1381 let mut syns = Vec::<VarId>::new(); for (v, names) in nt_name.iter().enumerate().filter(|(v, _)| self.nt_value[*v]) {
1383 let v = v as VarId;
1384 let (nu, _nl, _npl) = names;
1385 let nt_type = self.get_nt_type(v);
1386 if self.nt_has_all_flags(v, ruleflag::CHILD_REPEAT) {
1387 let is_lform = self.nt_has_all_flags(v, ruleflag::L_FORM);
1388 let first_alt = self.var_alts[v as usize][0];
1389 let (t, var_oid) = self.origin.get(v).unwrap();
1390 if is_lform {
1391 let astr = format!("/// User-defined type for {}", self.full_alt_str(first_alt, None, true));
1392 let user_def_type = vec![
1393 format!("// {astr}"),
1394 format!("// #[derive(Debug, PartialEq)] pub struct {}();", self.get_nt_type(v)),
1395 ];
1396 log.extend_messages(user_def_type.iter().map(|s| LogMsg::Note(s[3..].to_string())));
1397 src.extend(user_def_type);
1398 let extra_src = vec![
1399 astr,
1400 format!("#[derive(Debug, PartialEq)]"),
1401 format!("pub struct {nt_type}();"),
1402 ];
1403 self.nt_extra_info.insert(v, (self.get_nt_type(v).to_string(), extra_src));
1404 } else {
1405 let top_parent = self.parsing_table.get_top_parent(v);
1406 src.push(format!("/// Computed `{}` array in `{} -> {}`",
1407 grtree_to_str(t, Some(var_oid), None, Some(top_parent), self.get_symbol_table(), true),
1408 Symbol::NT(top_parent).to_str(self.get_symbol_table()),
1409 grtree_to_str(t, None, Some(var_oid), Some(top_parent), self.get_symbol_table(), true),
1410 ));
1411 let endpoints = child_repeat_endpoints.get(&v).unwrap();
1412 if endpoints.len() > 1 {
1413 src.push(format!("#[derive(Debug, PartialEq)]"));
1415 src.push(format!("pub struct {nt_type}(pub Vec<Syn{nu}Item>);"));
1416 src.push(format!("#[derive(Debug, PartialEq)]"));
1417 src.push(format!("pub enum Syn{nu}Item {{"));
1418 for (i, &a_id) in endpoints.into_iter().index_start(1) {
1419 src.push(format!(" /// {}", self.full_alt_str(a_id, None, true)));
1420 src.push(format!(" V{i} {{ {} }},", self.source_infos(&item_info[a_id as usize], false)));
1421 }
1422 src.push(format!("}}"));
1423 } else {
1424 let a_id = endpoints[0];
1426 let infos = &item_info[a_id as usize];
1427 if infos.len() == 1 {
1428 let type_name = self.get_info_type(&infos, &infos[0]);
1430 src.push(format!("#[derive(Debug, PartialEq)]"));
1431 src.push(format!("pub struct {nt_type}(pub Vec<{type_name}>);", ));
1432 } else {
1433 src.push(format!("#[derive(Debug, PartialEq)]"));
1435 src.push(format!("pub struct {nt_type}(pub Vec<Syn{nu}Item>);"));
1436 src.push(format!("/// {}", self.full_alt_str(first_alt, None, false)));
1437 src.push(format!("#[derive(Debug, PartialEq)]"));
1438 src.push(format!("pub struct Syn{nu}Item {{ {} }}", self.source_infos(&infos, true)));
1439 }
1440 }
1441 }
1442 } else {
1443 let user_def_type = vec![
1444 format!("// /// User-defined type for `{}`", Symbol::NT(v).to_str(self.get_symbol_table())),
1445 format!("// #[derive(Debug, PartialEq)] pub struct {}();", self.get_nt_type(v)),
1446 ];
1447 log.extend_messages(user_def_type.iter().map(|s| LogMsg::Note(s[3..].to_string())));
1448 src.extend(user_def_type);
1449 let extra_src = vec![
1450 format!("/// User-defined type for `{}`", Symbol::NT(v).to_str(self.get_symbol_table())),
1451 format!("#[derive(Debug, PartialEq)]"),
1452 format!("pub struct {}();", self.get_nt_type(v)),
1453 ];
1454 self.nt_extra_info.insert(v, (self.get_nt_type(v).to_string(), extra_src));
1455 }
1456 syns.push(v);
1457 }
1458 if !self.nt_value[self.start as usize] {
1459 let nu = &nt_name[self.start as usize].0;
1460 src.push(format!("/// Top non-terminal {nu} (has no value)"));
1461 src.push(format!("#[derive(Debug, PartialEq)]"));
1462 src.push(format!("pub struct Syn{nu}();"))
1463 }
1464
1465 if VERBOSE { println!("syns = {syns:?}"); }
1467 src.add_space();
1468 src.push(format!("#[derive(Debug)]"));
1470 src.push(format!("enum SynValue {{ {} }}",
1471 syns.iter().map(|v| format!("{}({})", nt_name[*v as usize].0, self.get_nt_type(*v))).join(", ")));
1472 if !syns.is_empty() {
1473 src.add_space();
1475 src.push("impl SynValue {".to_string());
1476 for v in &syns {
1477 let (nu, _, npl) = &nt_name[*v as usize];
1478 let nt_type = self.get_nt_type(*v);
1479 src.push(format!(" fn get_{npl}(self) -> {nt_type} {{"));
1480 if syns.len() == 1 {
1481 src.push(format!(" let SynValue::{nu}(val) = self;"));
1482 src.push(format!(" val"));
1483 } else {
1484 src.push(format!(" if let SynValue::{nu}(val) = self {{ val }} else {{ panic!() }}"));
1485 }
1486 src.push(format!(" }}"));
1487 }
1488 src.push("}".to_string());
1489 }
1490
1491 let mut src_init = Vec::<Vec<String>>::new();
1493 let mut src_exit = Vec::<Vec<String>>::new();
1494 let mut src_listener_decl = Vec::<String>::new();
1495 let mut src_wrapper_impl = Vec::<String>::new();
1496 let mut exit_fixer = NameFixer::new();
1497 let mut span_init = HashSet::<VarId>::new();
1498
1499 for group in self.nt_parent.iter().filter(|vf| !vf.is_empty()) {
1501 let parent_nt = group[0] as usize;
1502 let parent_flags = self.parsing_table.flags[parent_nt];
1503 let parent_has_value = self.nt_value[parent_nt];
1504 let mut exit_alt_done = HashSet::<VarId>::new();
1505 let mut init_nt_done = HashSet::<VarId>::new();
1506 if VERBOSE { println!("- GROUP {}, parent has {}value, parent flags: {}",
1507 group.iter().map(|v| Symbol::NT(*v).to_str(self.get_symbol_table())).join(", "),
1508 if parent_has_value { "" } else { "no " },
1509 ruleflag::to_string(parent_flags).join(" | ")); }
1510 let is_ambig = parent_flags & ruleflag::PARENT_AMBIGUITY != 0;
1511 let ambig_children = if is_ambig {
1512 group.iter().filter(|&v| self.nt_has_any_flags(*v, ruleflag::CHILD_L_RECURSION)).cloned().to_vec()
1513 } else {
1514 Vec::new()
1515 };
1516 let mut ambig_op_alts = BTreeMap::<AltId, Vec<AltId>>::new();
1517 for (id, f) in ambig_children.iter() .flat_map(|v| self.gather_alts(*v))
1519 .filter_map(|f| self.parsing_table.alts[f as usize].1.get_ambig_alt_id().map(|id| (id, f)))
1520 {
1521 ambig_op_alts.entry(id).or_default().push(f);
1522 }
1523 if VERBOSE && is_ambig {
1524 println!("- ambig children vars: {}", ambig_children.iter().map(|v| Symbol::NT(*v).to_str(self.get_symbol_table())).join(", "));
1525 println!(" ambig op alts: {ambig_op_alts:?}");
1526 }
1527 for var in group {
1528 let sym_nt = Symbol::NT(*var);
1529 let nt = *var as usize;
1530 let flags = self.parsing_table.flags[nt];
1531 let is_ambig_1st_child = is_ambig && flags & ruleflag::CHILD_L_RECURSION != 0 && ambig_children.get(0) == Some(var);
1533 let is_ambig_redundant = is_ambig && flags & ruleflag::L_RECURSION != 0 && !is_ambig_1st_child;
1536 let has_value = self.nt_value[nt];
1537 let nt_comment = format!("// {}", sym_nt.to_str(self.get_symbol_table()));
1538 let is_parent = nt == parent_nt;
1539 let is_child_repeat_lform = self.nt_has_all_flags(*var, ruleflag::CHILD_REPEAT | ruleflag::L_FORM);
1540 if VERBOSE { println!(" - VAR {}, has {}value, flags: {}",
1541 sym_nt.to_str(self.get_symbol_table()),
1542 if has_value { "" } else { "no " },
1543 ruleflag::to_string(flags).join(" | ")); }
1544
1545 if self.parsing_table.parent[nt].is_none() {
1548 let (nu, nl, npl) = &nt_name[nt];
1549 init_nt_done.insert(*var);
1550 if has_value && self.nt_has_all_flags(*var, ruleflag::R_RECURSION | ruleflag::L_FORM) {
1551 span_init.insert(*var);
1552 src_wrapper_impl.push(String::new());
1553 src_listener_decl.push(format!(" fn init_{npl}(&mut self) -> {};", self.get_nt_type(nt as VarId)));
1554 src_init.push(vec![format!(" {nt} => self.init_{nl}(),"), nt_comment]);
1555 src_wrapper_impl.push(format!(" fn init_{npl}(&mut self) {{"));
1556 src_wrapper_impl.push(format!(" let val = self.listener.init_{nl}();"));
1557 src_wrapper_impl.push(format!(" self.stack.push(SynValue::{nu}(val));"));
1558 src_wrapper_impl.push(format!(" }}"));
1559 } else {
1560 src_listener_decl.push(format!(" fn init_{npl}(&mut self) {{}}"));
1561 src_init.push(vec![format!(" {nt} => self.listener.init_{npl}(),"), nt_comment]);
1562 }
1563 } else {
1564 if flags & ruleflag::CHILD_REPEAT != 0 {
1565 span_init.insert(*var);
1566 if has_value {
1567 init_nt_done.insert(*var);
1568 src_wrapper_impl.push(String::new());
1569 let (nu, _nl, npl) = &nt_name[nt];
1570 src_init.push(vec![format!(" {nt} => self.init_{npl}(),"), nt_comment]);
1571 src_wrapper_impl.push(format!(" fn init_{npl}(&mut self) {{"));
1572 if flags & ruleflag::L_FORM != 0 {
1573 src_wrapper_impl.push(format!(" let val = self.listener.init_{npl}();"));
1574 src_listener_decl.push(format!(" fn init_{npl}(&mut self) -> {};", self.get_nt_type(nt as VarId)));
1575 } else {
1576 src_wrapper_impl.push(format!(" let val = Syn{nu}(Vec::new());"));
1577 }
1578 src_wrapper_impl.push(format!(" self.stack.push(SynValue::{nu}(val));"));
1579 src_wrapper_impl.push(format!(" }}"));
1580 } else {
1581 if flags & ruleflag::L_FORM != 0 {
1582 init_nt_done.insert(*var);
1583 let (_nu, _nl, npl) = &nt_name[nt];
1584 src_init.push(vec![format!(" {nt} => self.listener.init_{npl}(),"), nt_comment]);
1585 src_listener_decl.push(format!(" fn init_{npl}(&mut self) {{}}"));
1586 } else {
1587 }
1589 }
1590 } else if flags & (ruleflag::CHILD_L_RECURSION | ruleflag::CHILD_L_FACT) != 0 {
1591 } else {
1593 }
1595 }
1596
1597 fn get_var_param(item: &ItemInfo, indices: &HashMap<Symbol, Vec<String>>, non_indices: &mut Vec<String>) -> Option<String> {
1600 if let Some(index) = item.index {
1601 if index == 0 {
1602 Some(format!("{}: [{}]", item.name, indices[&item.sym].iter().rev().join(", ")))
1603 } else {
1604 None
1605 }
1606 } else {
1607 let name = non_indices.pop().unwrap();
1608 if name == item.name {
1609 Some(name)
1610 } else {
1611 Some(format!("{}: {name}", item.name))
1612 }
1613 }
1614 }
1615
1616 fn get_var_params(item_info: &[ItemInfo], skip: usize, indices: &HashMap<Symbol, Vec<String>>, non_indices: &mut Vec<String>) -> String {
1617 item_info.iter().skip(skip).filter_map(|item| {
1618 get_var_param(item, indices, non_indices)
1619 }).join(", ")
1620 }
1621
1622 if !is_ambig_redundant && flags & ruleflag::CHILD_L_FACT == 0 {
1624 let (nu, _nl, npl) = &nt_name[nt];
1625 let (pnu, _pnl, pnpl) = &nt_name[parent_nt];
1626 if VERBOSE { println!(" {nu} (parent {pnu})"); }
1627 let no_method = !has_value && flags & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT;
1628 let (fnpl, fnu, fnt, f_valued) = if is_ambig_1st_child {
1629 (pnpl, pnu, parent_nt, parent_has_value) } else {
1631 (npl, nu, nt, has_value)
1632 };
1633 if is_parent || (is_child_repeat_lform && !no_method) || is_ambig_1st_child {
1634 let extra_param = if self.gen_span_params { ", spans: Vec<PosSpan>" } else { "" };
1635 if f_valued {
1636 src_listener_decl.push(format!(" fn exit_{fnpl}(&mut self, ctx: Ctx{fnu}{extra_param}) -> {};", self.get_nt_type(fnt as VarId)));
1637 } else {
1638 src_listener_decl.push(format!(" #[allow(unused)]"));
1639 src_listener_decl.push(format!(" fn exit_{fnpl}(&mut self, ctx: Ctx{fnu}{extra_param}) {{}}"));
1640 }
1641 }
1642 let all_exit_alts = if is_ambig_1st_child {
1643 ambig_op_alts.values().rev().map(|v| v[0]).to_vec()
1644 } else {
1645 self.gather_alts(nt as VarId)
1646 };
1647 let (last_it_alts, exit_alts) = all_exit_alts.into_iter()
1648 .partition::<Vec<_>, _>(|f| (flags & ruleflag::CHILD_L_RECURSION != 0
1650 || flags & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM | ruleflag::REPEAT_PLUS) == ruleflag::CHILD_REPEAT | ruleflag::L_FORM)
1651 && self.is_alt_sym_empty(*f));
1652 if VERBOSE {
1653 println!(" no_method: {no_method}, exit alts: {}", exit_alts.iter().join(", "));
1654 if !last_it_alts.is_empty() {
1655 println!(" last_it_alts: {}", last_it_alts.iter().join(", "));
1656 }
1657 }
1658 for f in &exit_alts {
1659 exit_alt_done.insert(*f);
1660 }
1661 let inter_or_exit_name = if flags & ruleflag::PARENT_L_RECURSION != 0 { format!("inter_{npl}") } else { format!("exit_{npl}") };
1662 let fn_name = exit_fixer.get_unique_name(inter_or_exit_name.clone());
1663 let (is_alt_id, choices) = self.make_match_choices(&exit_alts, &fn_name, flags, no_method, None);
1664 if VERBOSE { println!(" choices: {}", choices.iter().map(|s| s.trim()).join(" ")); }
1665 let comments = exit_alts.iter().map(|f| {
1666 let (v, pf) = &self.parsing_table.alts[*f as usize];
1667 if MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS {
1668 format!("// {}", self.full_alt_str(*f, None, false))
1669 } else {
1670 format!("// {}", pf.to_rule_str(*v, self.get_symbol_table(), self.parsing_table.flags[*v as usize]))
1671 }
1672 }).to_vec();
1673 src_exit.extend(choices.into_iter().zip(comments).map(|(a, b)| vec![a, b]));
1674 if is_ambig_1st_child {
1675 for (a_id, dup_alts) in ambig_op_alts.values().rev().filter_map(|v| if v.len() > 1 { v.split_first() } else { None }) {
1676 let (_, choices) = self.make_match_choices(dup_alts, &fn_name, 0, no_method, Some(*a_id));
1679 let comments = dup_alts.iter()
1680 .map(|a| {
1681 let (v, alt) = &pinfo.alts[*a as usize];
1682 format!("// {} (duplicate of {a_id})", alt.to_rule_str(*v, self.get_symbol_table(), 0))
1683 }).to_vec();
1684 src_exit.extend(choices.into_iter().zip(comments).map(|(a, b)| vec![a, b]));
1685 for a in dup_alts {
1686 exit_alt_done.insert(*a);
1687 }
1688 }
1689 }
1690 if !no_method {
1691 src_wrapper_impl.push(String::new());
1692 src_wrapper_impl.push(format!(" fn {fn_name}(&mut self{}) {{", if is_alt_id { ", alt_id: AltId" } else { "" }));
1693 }
1694 if flags & (ruleflag::CHILD_REPEAT | ruleflag::L_FORM) == ruleflag::CHILD_REPEAT {
1695
1696 fn source_lets(infos: &[ItemInfo], nt_name: &[(String, String, String)], indent: &str) -> (Vec<String>, String) {
1697 let mut src_let = vec![];
1698 let mut var_fixer = NameFixer::new();
1699 let mut indices = HashMap::<Symbol, Vec<String>>::new();
1700 let mut non_indices = Vec::<String>::new();
1701 for item in infos.iter().rev() {
1702 let varname = if let Some(index) = item.index {
1703 let name = var_fixer.get_unique_name(format!("{}_{}", item.name, index + 1));
1704 indices.entry(item.sym).and_modify(|v| v.push(name.clone())).or_insert(vec![name.clone()]);
1705 name
1706 } else {
1707 let name = item.name.clone();
1708 non_indices.push(name.clone());
1709 name
1710 };
1711 if let Symbol::NT(v) = item.sym {
1712 src_let.push(format!("{indent} let {varname} = self.stack.pop().unwrap().get_{}();", nt_name[v as usize].2));
1713 } else {
1714 src_let.push(format!("{indent} let {varname} = self.stack_t.pop().unwrap();"));
1715 }
1716 }
1717
1718 let is_simple = infos.len() == 1 && infos[0].sym.is_t(); let src_struct = if is_simple {
1720 non_indices[0].clone()
1721 } else {
1722 if infos.len() == 1 {
1723 get_var_param(&infos[0], &indices, &mut non_indices).unwrap()
1724 } else {
1725 get_var_params(&infos, 0, &indices, &mut non_indices)
1726 }
1727 };
1728 (src_let, src_struct)
1729 }
1730
1731 if has_value {
1732 let endpoints = child_repeat_endpoints.get(var).unwrap();
1733 let is_plus = flags & ruleflag::REPEAT_PLUS != 0;
1734 let vec_name = if is_plus { "plus_acc" } else { "star_acc" };
1735 let val_name = if endpoints.len() > 1 {
1736 src_wrapper_impl.push(format!(" let {} = match alt_id {{", self.gen_match_item("val".to_string(), || "n".to_string())));
1738 for (i, &a_id) in endpoints.into_iter().index_start(1) {
1739 let infos = &item_info[a_id as usize];
1740 src_wrapper_impl.push(format!(" {a_id}{} => {{", if is_plus { format!(" | {}", a_id + 1) } else { String::new() }));
1741 let (src_let, src_struct) = source_lets(infos, &nt_name, " ");
1742 src_wrapper_impl.extend(src_let);
1743 let return_value = self.gen_match_item(
1744 format!("Syn{nu}Item::V{i} {{ {} }}", src_struct),
1745 || self.span_nbrs[a_id as usize].to_string());
1746 src_wrapper_impl.push(format!(" {return_value}"));
1747 src_wrapper_impl.push(format!(" }}"));
1748 }
1749 src_wrapper_impl.push(format!(" _ => panic!(\"unexpected alt id {{alt_id}} in fn {fn_name}\"),"));
1750 src_wrapper_impl.push(format!(" }};"));
1751 "val".to_string()
1752 } else {
1753 let a_id = endpoints[0];
1755 let infos = &item_info[a_id as usize];
1756 let (src_let, src_struct) = source_lets(infos, &nt_name, "");
1757 src_wrapper_impl.extend(src_let);
1758 if self.gen_span_params {
1759 src_wrapper_impl.push(format!(" let n = {};", self.span_nbrs[a_id as usize]));
1760 }
1761 if infos.len() == 1 {
1762 let val_name = infos[0].name.clone();
1764 val_name
1765 } else {
1766 src_wrapper_impl.push(format!(" let val = Syn{nu}Item {{ {} }};", src_struct));
1768 "val".to_string()
1769 }
1770 };
1771 src_wrapper_impl.push(format!(" let Some(SynValue::{nu}(Syn{nu}({vec_name}))) = self.stack.last_mut() else {{"));
1772 src_wrapper_impl.push(format!(" panic!(\"unexpected Syn{nu} item on wrapper stack\");"));
1773 src_wrapper_impl.push(format!(" }};"));
1774 src_wrapper_impl.push(format!(" {vec_name}.push({val_name});"));
1775 if self.gen_span_params {
1776 src_wrapper_impl.extend(FOLD_SPAN_CODE.into_iter().map(|s| s.to_string()).to_vec());
1781 }
1782 }
1783 } else {
1784 assert!(!no_method, "no_method is not expected here (only used in +* with no lform)");
1785 let has_last_flag = is_child_repeat_lform && flags & ruleflag::REPEAT_PLUS != 0; let (mut last_alt_ids, exit_alts): (Vec<AltId>, Vec<AltId>) = exit_alts.into_iter().partition(|i| alt_info[*i as usize].is_none());
1787 let fnu = if is_child_repeat_lform { nu } else { pnu }; let fnpl = if is_child_repeat_lform { npl } else { pnpl }; let a_has_value = if is_child_repeat_lform { has_value } else { parent_has_value };
1790 let is_single = exit_alts.len() == 1;
1791 let indent = if is_single { " " } else { " " };
1792 if !is_single {
1793 if self.gen_span_params {
1794 src_wrapper_impl.push(format!(" let (n, ctx) = match alt_id {{"));
1795 } else {
1796 src_wrapper_impl.push(format!(" let ctx = match alt_id {{"));
1797 }
1798 }
1799 if VERBOSE { println!(" exit_alts -> {exit_alts:?}, last_alt_id -> {last_alt_ids:?}"); }
1800 for a in exit_alts {
1801 if VERBOSE {
1802 println!(" - ALTERNATIVE {a}: {} -> {}",
1803 Symbol::NT(*var).to_str(self.get_symbol_table()),
1804 self.parsing_table.alts[a as usize].1.to_str(self.get_symbol_table()));
1805 }
1806 let mut var_fixer = NameFixer::new();
1807 let mut indices = HashMap::<Symbol, Vec<String>>::new();
1808 let mut non_indices = Vec::<String>::new();
1809 let last_alt_id_maybe = if last_alt_ids.is_empty() { None } else { Some(last_alt_ids.remove(0)) };
1810 if !is_single {
1811 let last_alt_choice = if let Some(last_alt_id) = last_alt_id_maybe { format!(" | {last_alt_id}") } else { String::new() };
1812 src_wrapper_impl.push(format!(" {a}{last_alt_choice} => {{", ));
1813 }
1814 for item in item_info[a as usize].iter().rev() {
1815 let varname = if let Some(index) = item.index {
1816 let name = var_fixer.get_unique_name(format!("{}_{}", item.name, index + 1));
1817 indices.entry(item.sym).and_modify(|v| v.push(name.clone())).or_insert(vec![name.clone()]);
1818 name
1819 } else {
1820 let name = item.name.clone();
1821 non_indices.push(name.clone());
1822 name
1823 };
1824 if item.sym.is_empty() {
1825 assert!(has_last_flag);
1826 src_wrapper_impl.push(format!("{indent}let {varname} = alt_id == {};", last_alt_id_maybe.unwrap()));
1827 } else if let Symbol::NT(v) = item.sym {
1828 src_wrapper_impl.push(format!("{indent}let {varname} = self.stack.pop().unwrap().get_{}();",
1829 nt_name[v as usize].2));
1830 } else {
1831 src_wrapper_impl.push(format!("{indent}let {varname} = self.stack_t.pop().unwrap();"));
1832 }
1833 }
1834 let ctx_params = get_var_params(&item_info[a as usize], 0, &indices, &mut non_indices);
1835 let ctx = if ctx_params.is_empty() {
1836 format!("Ctx{fnu}::{}", alt_info[a as usize].as_ref().unwrap().1)
1837 } else {
1838 format!("Ctx{fnu}::{} {{ {ctx_params} }}", alt_info[a as usize].as_ref().unwrap().1)
1839 };
1840 if is_single {
1841 src_wrapper_impl.push(format!(" let ctx = {ctx};"));
1842 if self.gen_span_params {
1843 src_wrapper_impl.push(format!(" let n = {};", self.span_nbrs[a as usize]));
1844 src_wrapper_impl.extend(FOLD_SPAN_CODE.into_iter().map(|s| s.to_string()).to_vec());
1845
1846 }
1847 src_wrapper_impl.push(format!(
1848 " {}self.listener.exit_{fnpl}(ctx{});",
1849 if a_has_value { "let val = " } else { "" },
1850 if self.gen_span_params { ", spans" } else { "" }));
1851 if a_has_value {
1852 src_wrapper_impl.push(format!(" self.stack.push(SynValue::{fnu}(val));"));
1853 }
1854 } else {
1855 let ctx_value = self.gen_match_item(ctx, || self.span_nbrs[a as usize].to_string());
1856 src_wrapper_impl.push(format!("{indent}{ctx_value}"));
1857 src_wrapper_impl.push(format!(" }}"));
1858 }
1859 }
1860 if !is_single {
1861 src_wrapper_impl.push(format!(" _ => panic!(\"unexpected alt id {{alt_id}} in fn {fn_name}\")"));
1862 src_wrapper_impl.push(format!(" }};"));
1863 if self.gen_span_params {
1864 src_wrapper_impl.extend(FOLD_SPAN_CODE.into_iter().map(|s| s.to_string()).to_vec());
1865 }
1866 src_wrapper_impl.push(format!(
1867 " {}self.listener.exit_{fnpl}(ctx{});",
1868 if a_has_value { "let val = " } else { "" },
1869 if self.gen_span_params { ", spans" } else { "" }));
1870 if a_has_value {
1871 src_wrapper_impl.push(format!(" self.stack.push(SynValue::{fnu}(val));"));
1872 }
1873 }
1874 }
1875 if !no_method {
1876 src_wrapper_impl.push(format!(" }}"));
1877 }
1878 for a in last_it_alts {
1879 if let Some(info) = item_info[a as usize].get(0) {
1880 if VERBOSE { println!("last_it_alts: {a}, info = {info:?}"); }
1881 let (variant, _, fnname) = &nt_name[info.owner as usize];
1882 let typ = self.get_nt_type(info.owner);
1883 let varname = &info.name;
1884 src_listener_decl.push(format!(" #[allow(unused)]"));
1885 src_listener_decl.push(format!(" fn exitloop_{fnname}(&mut self, {varname}: &mut {typ}) {{}}"));
1886 let (v, pf) = &self.parsing_table.alts[a as usize];
1887 let alt_str = if MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS {
1888 self.full_alt_str(a, None, false)
1889 } else {
1890 pf.to_rule_str(*v, self.get_symbol_table(), self.parsing_table.flags[*v as usize])
1891 };
1892 src_exit.push(vec![format!(" {a} => self.exitloop_{fnpl}(),"), format!("// {alt_str}")]);
1893 exit_alt_done.insert(a);
1894 src_wrapper_impl.push(String::new());
1895 src_wrapper_impl.push(format!(" fn exitloop_{fnpl}(&mut self) {{"));
1896 src_wrapper_impl.push(format!(" let SynValue::{variant}({varname}) = self.stack.last_mut().unwrap(){};",
1897 if syns.len() > 1 { " else { panic!() }" } else { "" }));
1898 src_wrapper_impl.push(format!(" self.listener.exitloop_{fnname}({varname});"));
1899 src_wrapper_impl.push(format!(" }}"));
1900 }
1901 }
1902 }
1903 }
1904 for a in group.iter().flat_map(|v| &self.var_alts[*v as usize]).filter(|a| !exit_alt_done.contains(a)) {
1905 let is_called = self.opcodes[*a as usize].iter().any(|o| *o == OpCode::Exit(*a));
1906 let (v, alt) = &self.parsing_table.alts[*a as usize];
1907 let alt_str = if MATCH_COMMENTS_SHOW_DESCRIPTIVE_ALTS {
1908 self.full_alt_str(*a, None, false)
1909 } else {
1910 alt.to_rule_str(*v, self.get_symbol_table(), self.parsing_table.flags[*v as usize])
1911 };
1912 let comment = format!("// {alt_str} ({})", if is_called { "not used" } else { "never called" });
1913 if is_called {
1914 src_exit.push(vec![format!(" {a} => {{}}"), comment]);
1915 } else {
1916 src_exit.push(vec![format!(" /* {a} */"), comment]);
1917 }
1918 }
1919 let mut seg_init = Segments::from_iter(
1921 group.into_iter()
1922 .filter_map(|&v| if !init_nt_done.contains(&v) { Some(Seg(v as u32, v as u32)) } else { None })
1923 );
1924 seg_init.normalize();
1925 for seg in seg_init {
1926 let Seg(a, b) = seg;
1927 if a == b {
1928 src_init.push(vec![format!(" {a} => {{}}"), format!("// {}", Symbol::NT(a as VarId).to_str(self.get_symbol_table()))]);
1929 } else {
1930 src_init.push(vec![
1931 format!(" {a}{}{b} => {{}}", if b == a + 1 { " | " } else { " ..= " }),
1932 format!("// {}", (a..=b).map(|v| Symbol::NT(v as VarId).to_str(self.get_symbol_table())).join(", "))
1933 ]);
1934 }
1935 }
1936 }
1937
1938 src.add_space();
1940 src.push(format!("pub trait {}Listener {{", self.name));
1941 src.push(format!(" /// Checks if the listener requests an abort. This happens if an error is too difficult to recover from"));
1942 src.push(format!(" /// and may corrupt the stack content. In that case, the parser immediately stops and returns `ParserError::AbortRequest`."));
1943 src.push(format!(" fn check_abort_request(&self) -> bool {{ false }}"));
1944 src.push(format!(" fn get_mut_log(&mut self) -> &mut impl Logger;"));
1945 let extra_param = if self.gen_span_params { ", span: PosSpan" } else { "" };
1946 if self.nt_value[self.start as usize] || self.gen_span_params {
1947 src.push(format!(" #[allow(unused)]"));
1948 }
1949 if self.nt_value[self.start as usize] {
1950 src.push(format!(" fn exit(&mut self, {}: {}{extra_param}) {{}}", nt_name[self.start as usize].2, self.get_nt_type(self.start)));
1951 } else {
1952 src.push(format!(" fn exit(&mut self{extra_param}) {{}}"));
1953 }
1954 src.extend(src_listener_decl);
1962 src.push(format!("}}"));
1963
1964 src.add_space();
1966 src.push(format!("pub struct Wrapper<T> {{"));
1967 src.push(format!(" verbose: bool,"));
1968 src.push(format!(" listener: T,"));
1969 src.push(format!(" stack: Vec<SynValue>,"));
1970 src.push(format!(" max_stack: usize,"));
1971 src.push(format!(" stack_t: Vec<String>,"));
1972 if self.gen_span_params {
1973 src.push(format!(" stack_span: Vec<PosSpan>,"));
1974 }
1975 src.push(format!("}}"));
1976 src.push(format!(""));
1977 src.push(format!("impl<T: {}Listener> ListenerWrapper for Wrapper<T> {{", self.name));
1978 src.push(format!(" fn switch(&mut self, call: Call, nt: VarId, alt_id: AltId, t_data: Option<Vec<String>>) {{"));
1979 src.push(format!(" if self.verbose {{"));
1980 src.push(format!(" println!(\"switch: call={{call:?}}, nt={{nt}}, alt={{alt_id}}, t_data={{t_data:?}}\");"));
1981 src.push(format!(" }}"));
1982 src.push(format!(" if let Some(mut t_data) = t_data {{"));
1983 src.push(format!(" self.stack_t.append(&mut t_data);"));
1984 src.push(format!(" }}"));
1985 src.push(format!(" match call {{"));
1986 src.push(format!(" Call::Enter => {{"));
1987 if self.gen_span_params {
1988 let mut seg_span = Segments::from_iter(span_init.into_iter().map(|v| Seg(v as u32, v as u32)));
1990 seg_span.normalize();
1991 let pattern = seg_span.into_iter().map(|Seg(a, b)| {
1992 if a == b {
1993 a.to_string()
1994 } else if b == a + 1 {
1995 format!("{a} | {b}")
1996 } else {
1997 format!("{a} ..= {b}")
1998 }
1999 }).join(" | ");
2000 if !pattern.is_empty() {
2001 src.push(format!(" if matches!(nt, {pattern}) {{"));
2002 src.push(format!(" self.stack_span.push(PosSpan::empty());"));
2003 src.push(format!(" }}"));
2004 }
2005 }
2006 src.push(format!(" match nt {{"));
2007 src.extend(columns_to_str(src_init, Some(vec![64, 0])));
2013 src.push(format!(" _ => panic!(\"unexpected enter nonterminal id: {{nt}}\")"));
2014 src.push(format!(" }}"));
2015 src.push(format!(" }}"));
2016 src.push(format!(" Call::Loop => {{}}"));
2017 src.push(format!(" Call::Exit => {{"));
2018 src.push(format!(" match alt_id {{"));
2019 src.extend(columns_to_str(src_exit, Some(vec![64, 0])));
2027 src.push(format!(" _ => panic!(\"unexpected exit alternative id: {{alt_id}}\")"));
2028 src.push(format!(" }}"));
2029 src.push(format!(" }}"));
2030 src.push(format!(" Call::End => {{"));
2031 if self.nt_value[self.start as usize] {
2032 src.push(format!(" self.exit();"));
2033 } else {
2034 if self.gen_span_params {
2035 src.push(format!(" let span = self.stack_span.pop().unwrap();"));
2036 src.push(format!(" self.listener.exit(span);"));
2037 } else {
2038 src.push(format!(" self.listener.exit();"));
2039 }
2040 }
2041 src.push(format!(" }}"));
2042 src.push(format!(" }}"));
2043 src.push(format!(" self.max_stack = std::cmp::max(self.max_stack, self.stack.len());"));
2044 src.push(format!(" if self.verbose {{"));
2045 src.push(format!(" println!(\"> stack_t: {{}}\", self.stack_t.join(\", \"));"));
2046 src.push(format!(" println!(\"> stack: {{}}\", self.stack.iter().map(|it| format!(\"{{it:?}}\")).collect::<Vec<_>>().join(\", \"));"));
2047 src.push(format!(" }}"));
2048 src.push(format!(" }}"));
2049 src.push(format!(""));
2050 src.push(format!(" fn check_abort_request(&self) -> bool {{"));
2051 src.push(format!(" self.listener.check_abort_request()"));
2052 src.push(format!(" }}"));
2053 src.push(format!(""));
2054 src.push(format!(" fn get_mut_log(&mut self) -> &mut impl Logger {{"));
2055 src.push(format!(" self.listener.get_mut_log()"));
2056 src.push(format!(" }}"));
2057 if self.gen_span_params {
2058 src.push(format!(""));
2059 src.push(format!(" fn push_span(&mut self, span: PosSpan) {{"));
2060 src.push(format!(" self.stack_span.push(span);"));
2061 src.push(format!(" }}"));
2062 }
2063 src.push(format!(""));
2064 src.push(format!(" fn is_stack_empty(&self) -> bool {{"));
2065 src.push(format!(" self.stack.is_empty()"));
2066 src.push(format!(" }}"));
2067 src.push(format!(""));
2068 src.push(format!(" fn is_stack_t_empty(&self) -> bool {{"));
2069 src.push(format!(" self.stack_t.is_empty()"));
2070 src.push(format!(" }}"));
2071 if self.gen_span_params {
2072 src.push(format!(""));
2073 src.push(format!(" fn is_stack_span_empty(&self) -> bool {{"));
2074 src.push(format!(" self.stack_span.is_empty()"));
2075 src.push(format!(" }}"));
2076 }
2077 src.push(format!("}}"));
2078
2079 src.add_space();
2080 src.push(format!("impl<T: {}Listener> Wrapper<T> {{", self.name));
2081 src.push(format!(" pub fn new(listener: T, verbose: bool) -> Self {{"));
2082 src.push(format!(
2083 " Wrapper {{ verbose, listener, stack: Vec::new(), max_stack: 0, stack_t: Vec::new(){} }}",
2084 if self.gen_span_params { ", stack_span: Vec::new()" } else { "" }
2085 ));
2086 src.push(format!(" }}"));
2087 src.push(format!(""));
2088 src.push(format!(" pub fn get_listener(&self) -> &T {{"));
2089 src.push(format!(" &self.listener"));
2090 src.push(format!(" }}"));
2091 src.push(format!(""));
2092 src.push(format!(" pub fn get_listener_mut(&mut self) -> &mut T {{"));
2093 src.push(format!(" &mut self.listener"));
2094 src.push(format!(" }}"));
2095 src.push(format!(""));
2096 src.push(format!(" pub fn give_listener(self) -> T {{"));
2097 src.push(format!(" self.listener"));
2098 src.push(format!(" }}"));
2099 src.push(format!(""));
2100 src.push(format!(" pub fn set_verbose(&mut self, verbose: bool) {{"));
2101 src.push(format!(" self.verbose = verbose;"));
2102 src.push(format!(" }}"));
2103 if self.nt_value[self.start as usize] {
2104 src.push(format!(""));
2105 src.push(format!(" fn exit(&mut self) {{"));
2106 let (_nu, nl, npl) = &nt_name[self.start as usize];
2107 src.push(format!(" let {nl} = self.stack.pop().unwrap().get_{npl}();"));
2108 if self.gen_span_params {
2109 src.push(format!(" let span = self.stack_span.pop().unwrap();"));
2110 }
2111 src.push(format!(" self.listener.exit({nl}{});", if self.gen_span_params { ", span" } else { "" }));
2112 src.push(format!(" }}"));
2113 }
2114src.extend(src_wrapper_impl);
2134 src.push(format!("}}"));
2135 self.log = log;
2136
2137 src
2138 }
2139}
2140
2141impl LogReader for ParserGen {
2142 type Item = BufLog;
2143
2144 fn get_log(&self) -> &Self::Item {
2145 &self.log
2146 }
2147
2148 fn give_log(self) -> Self::Item {
2149 self.log
2150 }
2151}
2152
2153impl HasBuildErrorSource for ParserGen {
2154 const SOURCE: BuildErrorSource = BuildErrorSource::ParserGen;
2155}
2156
2157impl<T> BuildFrom<ProdRuleSet<T>> for ParserGen where ProdRuleSet<LL1>: BuildFrom<ProdRuleSet<T>> {
2158 fn build_from(mut rules: ProdRuleSet<T>) -> Self {
2164 let name = rules.name.take().unwrap_or(DEFAULT_LISTENER_NAME.to_string());
2165 ParserGen::build_from_rules(rules, name)
2166 }
2167}
2168
2169pub fn print_items(builder: &ParserGen, indent: usize, show_symbols: bool, show_span: bool) {
2173 let tbl = builder.get_symbol_table();
2174 let fields = (0..builder.parsing_table.alts.len())
2175 .filter_map(|a| {
2176 let a_id = a as AltId;
2177 let (v, alt) = &builder.parsing_table.alts[a];
2178 let ops = &builder.opcodes[a];
2179 if let Some(it) = builder.item_ops.get(&a_id) {
2180 let mut cols = vec![];
2181 if show_symbols {
2182 let symbols = format!("symbols![{}]", it.iter().map(|s| s.to_macro_item()).join(", "));
2183 let value = if show_span {
2184 assert!(builder.gen_span_params, "ParserGen is not configured for spans");
2185 format!("({}, {symbols})", builder.span_nbrs[a_id as usize])
2186 } else {
2187 symbols
2188 };
2189 cols.push(format!("{a_id} => {value},"));
2190 }
2191 cols.extend([
2192 format!("// {a_id:2}: {} -> {}", Symbol::NT(*v).to_str(tbl), alt.iter().map(|s| s.to_str_quote(tbl)).join(" ")),
2193 format!("| {}", ops.into_iter().map(|s| s.to_str_quote(tbl)).join(" ")),
2194 format!("| {}", &builder.span_nbrs[a_id as usize]),
2195 format!("| {}", it.iter().map(|s| s.to_str(tbl)).join(" ")),
2196 ]);
2197 Some(cols)
2198 } else {
2199 None
2200 }
2201 }).to_vec();
2202 let widths = if show_symbols { vec![40, 0, 0, 0, 0] } else { vec![16, 0, 0, 0] };
2203 for l in columns_to_str(fields, Some(widths)) {
2204 println!("{:indent$}{l}", "", indent = indent)
2205 }
2206}
2207
2208pub fn print_flags(builder: &ParserGen, indent: usize) {
2209 let tbl = builder.get_symbol_table();
2210 let prefix = format!("{:width$}//", "", width = indent);
2211 let nt_flags = builder.get_parsing_table().flags.iter().index().filter_map(|(nt, &f)|
2212 if f != 0 { Some(format!("{prefix} - {}: {} ({})", Symbol::NT(nt).to_str(tbl), ruleflag::to_string(f).join(" | "), f)) } else { None }
2213 ).join("\n");
2214 let parents = builder.get_parsing_table().parent.iter().index().filter_map(|(c, &par)|
2215 if let Some(p) = par { Some(format!("{prefix} - {} -> {}", Symbol::NT(c).to_str(tbl), Symbol::NT(p).to_str(tbl))) } else { None }
2216 ).join("\n");
2217 if !nt_flags.is_empty() {
2218 println!("{prefix} NT flags:\n{nt_flags}");
2219 }
2220 if !parents.is_empty() {
2221 println!("{prefix} parents:\n{parents}");
2222 }
2223}