1use std::{
4 any::type_name,
5 borrow::Cow,
6 collections::{HashMap, HashSet},
7 env::{current_dir, var},
8 error::Error,
9 fmt::{self, Debug, Write as fmtWrite},
10 fs::{self, create_dir_all, read_to_string, File},
11 hash::Hash,
12 io::{self, Write},
13 marker::PhantomData,
14 path::{Path, PathBuf},
15 sync::Mutex,
16};
17
18use bincode::{deserialize, serialize_into};
19use cfgrammar::{
20 newlinecache::NewlineCache,
21 yacc::{ast::ASTWithValidityInfo, YaccGrammar, YaccKind, YaccOriginalActionKind},
22 RIdx, Spanned, Symbol,
23};
24use filetime::FileTime;
25use lazy_static::lazy_static;
26use lrtable::{from_yacc, statetable::Conflicts, Minimiser, StateGraph, StateTable};
27use num_traits::{AsPrimitive, PrimInt, Unsigned};
28use regex::Regex;
29use serde::{de::DeserializeOwned, Serialize};
30
31use crate::{LexerTypes, RecoveryKind};
32
33const ACTION_PREFIX: &str = "__gt_";
34const GLOBAL_PREFIX: &str = "__GT_";
35const ACTIONS_KIND: &str = "__GtActionsKind";
36const ACTIONS_KIND_PREFIX: &str = "Ak";
37const ACTIONS_KIND_HIDDEN: &str = "__GtActionsKindHidden";
38
39const RUST_FILE_EXT: &str = "rs";
40
41const GRM_CONST_NAME: &str = "__GRM_DATA";
42const STABLE_CONST_NAME: &str = "__STABLE_DATA";
43
44lazy_static! {
45 static ref RE_DOL_NUM: Regex = Regex::new(r"\$([0-9]+)").unwrap();
46 static ref GENERATED_PATHS: Mutex<HashSet<PathBuf>> = Mutex::new(HashSet::new());
47}
48
49struct CTConflictsError<StorageT: Eq + Hash> {
50 stable: StateTable<StorageT>,
51}
52
53impl<StorageT> fmt::Display for CTConflictsError<StorageT>
54where
55 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
56 usize: AsPrimitive<StorageT>,
57{
58 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
59 let conflicts = self.stable.conflicts().unwrap();
60 write!(
61 f,
62 "CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
63 conflicts.rr_len(),
64 conflicts.sr_len()
65 )
66 }
67}
68
69impl<StorageT> fmt::Debug for CTConflictsError<StorageT>
70where
71 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
72 usize: AsPrimitive<StorageT>,
73{
74 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75 let conflicts = self.stable.conflicts().unwrap();
76 write!(
77 f,
78 "CTConflictsError{{{} Reduce/Reduce, {} Shift/Reduce}}",
79 conflicts.rr_len(),
80 conflicts.sr_len()
81 )
82 }
83}
84
85impl<StorageT> Error for CTConflictsError<StorageT>
86where
87 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
88 usize: AsPrimitive<StorageT>,
89{
90}
91
92struct ErrorString(String);
94impl fmt::Display for ErrorString {
95 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
96 let ErrorString(s) = self;
97 write!(f, "{}", s)
98 }
99}
100impl fmt::Debug for ErrorString {
101 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102 let ErrorString(s) = self;
103 write!(f, "{}", s)
104 }
105}
106impl Error for ErrorString {}
107
108#[derive(Clone, PartialEq, Eq, Debug)]
110pub enum Visibility {
111 Private,
113 Public,
115 PublicSuper,
117 PublicSelf,
119 PublicCrate,
121 PublicIn(String),
123}
124
125#[derive(Clone, Copy, PartialEq, Eq, Debug)]
129pub enum RustEdition {
130 Rust2015,
131 Rust2018,
132 Rust2021,
133}
134
135impl Visibility {
136 fn cow_str(&self) -> Cow<'static, str> {
137 match self {
138 Visibility::Private => Cow::from(""),
139 Visibility::Public => Cow::from("pub"),
140 Visibility::PublicSuper => Cow::from("pub(super)"),
141 Visibility::PublicSelf => Cow::from("pub(self)"),
142 Visibility::PublicCrate => Cow::from("pub(crate)"),
143 Visibility::PublicIn(data) => Cow::from(format!("pub(in {})", data)),
144 }
145 }
146}
147
148pub struct CTParserBuilder<'a, LexerTypesT: LexerTypes>
151where
152 LexerTypesT::StorageT: Eq + Hash,
153 usize: AsPrimitive<LexerTypesT::StorageT>,
154{
155 grammar_path: Option<PathBuf>,
159 output_path: Option<PathBuf>,
160 mod_name: Option<&'a str>,
161 recoverer: RecoveryKind,
162 yacckind: Option<YaccKind>,
163 error_on_conflicts: bool,
164 warnings_are_errors: bool,
165 show_warnings: bool,
166 visibility: Visibility,
167 rust_edition: RustEdition,
168 phantom: PhantomData<LexerTypesT>,
169}
170
171impl<
172 'a,
173 StorageT: 'static + Debug + Hash + PrimInt + Serialize + Unsigned,
174 LexerTypesT: LexerTypes<StorageT = StorageT>,
175 > CTParserBuilder<'a, LexerTypesT>
176where
177 usize: AsPrimitive<StorageT>,
178{
179 pub fn new() -> Self {
201 CTParserBuilder {
202 grammar_path: None,
203 output_path: None,
204 mod_name: None,
205 recoverer: RecoveryKind::CPCTPlus,
206 yacckind: None,
207 error_on_conflicts: true,
208 warnings_are_errors: true,
209 show_warnings: true,
210 visibility: Visibility::Private,
211 rust_edition: RustEdition::Rust2021,
212 phantom: PhantomData,
213 }
214 }
215
216 pub fn grammar_in_src_dir<P>(mut self, srcp: P) -> Result<Self, Box<dyn Error>>
233 where
234 P: AsRef<Path>,
235 {
236 if !srcp.as_ref().is_relative() {
237 return Err(format!(
238 "Grammar path '{}' must be a relative path.",
239 srcp.as_ref().to_str().unwrap_or("<invalid UTF-8>")
240 )
241 .into());
242 }
243
244 let mut grmp = current_dir()?;
245 grmp.push("src");
246 grmp.push(srcp.as_ref());
247 self.grammar_path = Some(grmp);
248
249 let mut outp = PathBuf::new();
250 outp.push(var("OUT_DIR").unwrap());
251 outp.push(srcp.as_ref().parent().unwrap().to_str().unwrap());
252 create_dir_all(&outp)?;
253 let mut leaf = srcp
254 .as_ref()
255 .file_name()
256 .unwrap()
257 .to_str()
258 .unwrap()
259 .to_owned();
260 write!(leaf, ".{}", RUST_FILE_EXT).ok();
261 outp.push(leaf);
262 Ok(self.output_path(outp))
263 }
264
265 pub fn grammar_path<P>(mut self, inp: P) -> Self
269 where
270 P: AsRef<Path>,
271 {
272 self.grammar_path = Some(inp.as_ref().to_owned());
273 self
274 }
275
276 pub fn output_path<P>(mut self, outp: P) -> Self
281 where
282 P: AsRef<Path>,
283 {
284 self.output_path = Some(outp.as_ref().to_owned());
285 self
286 }
287
288 pub fn mod_name(mut self, mod_name: &'a str) -> Self {
292 self.mod_name = Some(mod_name);
293 self
294 }
295
296 pub fn visibility(mut self, vis: Visibility) -> Self {
298 self.visibility = vis;
299 self
300 }
301
302 pub fn recoverer(mut self, rk: RecoveryKind) -> Self {
304 self.recoverer = rk;
305 self
306 }
307
308 pub fn yacckind(mut self, yk: YaccKind) -> Self {
310 self.yacckind = Some(yk);
311 self
312 }
313
314 pub fn error_on_conflicts(mut self, b: bool) -> Self {
317 self.error_on_conflicts = b;
318 self
319 }
320
321 pub fn warnings_are_errors(mut self, b: bool) -> Self {
324 self.warnings_are_errors = b;
325 self
326 }
327
328 pub fn show_warnings(mut self, b: bool) -> Self {
331 self.show_warnings = b;
332 self
333 }
334
335 pub fn rust_edition(mut self, edition: RustEdition) -> Self {
338 self.rust_edition = edition;
339 self
340 }
341
342 pub fn build(self) -> Result<CTParser<StorageT>, Box<dyn Error>> {
395 let grmp = self
396 .grammar_path
397 .as_ref()
398 .expect("grammar_path must be specified before processing.");
399 let outp = self
400 .output_path
401 .as_ref()
402 .expect("output_path must be specified before processing.");
403 let yk = match self.yacckind {
404 None => panic!("yacckind must be specified before processing."),
405 Some(YaccKind::Original(x)) => YaccKind::Original(x),
406 Some(YaccKind::Grmtools) => YaccKind::Grmtools,
407 Some(YaccKind::Eco) => panic!("Eco compile-time grammar generation not supported."),
408 };
409
410 {
411 let mut lk = GENERATED_PATHS.lock().unwrap();
412 if lk.contains(outp.as_path()) {
413 return Err(format!("Generating two parsers to the same path ('{}') is not allowed: use CTParserBuilder::output_path (and, optionally, CTParserBuilder::mod_name) to differentiate them.", &outp.to_str().unwrap()).into());
414 }
415 lk.insert(outp.clone());
416 }
417
418 let inc =
419 read_to_string(grmp).map_err(|e| format!("When reading '{}': {e}", grmp.display()))?;
420 let ast_validation = ASTWithValidityInfo::new(yk, &inc);
421 let warnings = ast_validation.ast().warnings();
422 let spanned_fmt = |x: &dyn Spanned, inc: &str, line_cache: &NewlineCache| {
423 if let Some((line, column)) =
424 line_cache.byte_to_line_num_and_col_num(inc, x.spans()[0].start())
425 {
426 format!("{} at line {line} column {column}", x)
427 } else {
428 format!("{}", x)
429 }
430 };
431
432 let res = YaccGrammar::<StorageT>::new_from_ast_with_validity_info(yk, &ast_validation);
433 let grm = match res {
434 Ok(_) if self.warnings_are_errors && !warnings.is_empty() => {
435 let mut line_cache = NewlineCache::new();
436 line_cache.feed(&inc);
437 return Err(ErrorString(if warnings.len() > 1 {
438 format!(
440 "\n\t{}",
441 warnings
442 .iter()
443 .map(|w| spanned_fmt(w, &inc, &line_cache))
444 .collect::<Vec<_>>()
445 .join("\n\t")
446 )
447 } else {
448 spanned_fmt(warnings.first().unwrap(), &inc, &line_cache)
449 }))?;
450 }
451 Ok(grm) => {
452 if !warnings.is_empty() {
453 let mut line_cache = NewlineCache::new();
454 line_cache.feed(&inc);
455 for w in warnings {
456 if std::env::var("OUT_DIR").is_ok() && self.show_warnings {
458 println!("cargo:warning={}", spanned_fmt(&w, &inc, &line_cache));
459 } else if self.show_warnings {
460 eprintln!("{}", spanned_fmt(&w, &inc, &line_cache));
461 }
462 }
463 }
464 grm
465 }
466 Err(errs) => {
467 let mut line_cache = NewlineCache::new();
468 line_cache.feed(&inc);
469 return Err(ErrorString(if errs.len() + warnings.len() > 1 {
470 format!(
472 "\n\t{}",
473 errs.iter()
474 .map(|e| spanned_fmt(e, &inc, &line_cache))
475 .chain(warnings.iter().map(|w| spanned_fmt(w, &inc, &line_cache)))
476 .collect::<Vec<_>>()
477 .join("\n\t")
478 )
479 } else {
480 spanned_fmt(errs.first().unwrap(), &inc, &line_cache)
481 }))?;
482 }
483 };
484
485 let rule_ids = grm
486 .tokens_map()
487 .iter()
488 .map(|(&n, &i)| (n.to_owned(), i.as_storaget()))
489 .collect::<HashMap<_, _>>();
490 let cache = self.rebuild_cache(&grm);
491
492 if let Ok(ref inmd) = fs::metadata(grmp) {
500 if let Ok(ref out_rs_md) = fs::metadata(outp) {
501 if FileTime::from_last_modification_time(out_rs_md)
502 > FileTime::from_last_modification_time(inmd)
503 {
504 if let Ok(outc) = read_to_string(outp) {
505 if outc.contains(&cache) {
506 return Ok(CTParser {
507 regenerated: false,
508 rule_ids,
509 conflicts: None,
510 });
511 }
512 }
513 }
514 }
515 }
516
517 fs::remove_file(outp).ok();
524
525 let (sgraph, stable) = from_yacc(&grm, Minimiser::Pager)?;
526 if self.error_on_conflicts {
527 if let Some(c) = stable.conflicts() {
528 match (grm.expect(), grm.expectrr()) {
529 (Some(i), Some(j)) if i == c.sr_len() && j == c.rr_len() => (),
530 (Some(i), None) if i == c.sr_len() && 0 == c.rr_len() => (),
531 (None, Some(j)) if 0 == c.sr_len() && j == c.rr_len() => (),
532 (None, None) if 0 == c.rr_len() && 0 == c.sr_len() => (),
533 _ => return Err(Box::new(CTConflictsError { stable })),
534 }
535 }
536 }
537
538 let mod_name = match self.mod_name {
539 Some(s) => s.to_owned(),
540 None => {
541 let mut stem = grmp.to_str().unwrap();
546 loop {
547 let new_stem = Path::new(stem).file_stem().unwrap().to_str().unwrap();
548 if stem == new_stem {
549 break;
550 }
551 stem = new_stem;
552 }
553 format!("{}_y", stem)
554 }
555 };
556
557 self.output_file(&grm, &stable, &mod_name, outp, &cache)?;
558 let conflicts = if stable.conflicts().is_some() {
559 Some((grm, sgraph, stable))
560 } else {
561 None
562 };
563 Ok(CTParser {
564 regenerated: true,
565 rule_ids,
566 conflicts,
567 })
568 }
569
570 #[deprecated(
577 since = "0.11.0",
578 note = "Please use grammar_in_src_dir(), build(), and token_map() instead"
579 )]
580 #[allow(deprecated)]
581 pub fn process_file_in_src(
582 &mut self,
583 srcp: &str,
584 ) -> Result<HashMap<String, StorageT>, Box<dyn Error>> {
585 let mut inp = current_dir()?;
586 inp.push("src");
587 inp.push(srcp);
588 let mut outp = PathBuf::new();
589 outp.push(var("OUT_DIR").unwrap());
590 outp.push(Path::new(srcp).parent().unwrap().to_str().unwrap());
591 create_dir_all(&outp)?;
592 let mut leaf = Path::new(srcp)
593 .file_name()
594 .unwrap()
595 .to_str()
596 .unwrap()
597 .to_owned();
598 write!(leaf, ".{}", RUST_FILE_EXT).ok();
599 outp.push(leaf);
600 self.process_file(inp, outp)
601 }
602
603 #[deprecated(
639 since = "0.11.0",
640 note = "Please use grammar_path(), output_path(), build(), and token_map() instead"
641 )]
642 #[allow(deprecated)]
643 pub fn process_file<P, Q>(
644 &mut self,
645 inp: P,
646 outp: Q,
647 ) -> Result<HashMap<String, StorageT>, Box<dyn Error>>
648 where
649 P: AsRef<Path>,
650 Q: AsRef<Path>,
651 {
652 self.grammar_path = Some(inp.as_ref().to_owned());
653 self.output_path = Some(outp.as_ref().to_owned());
654 let cl: CTParserBuilder<LexerTypesT> = CTParserBuilder {
655 grammar_path: self.grammar_path.clone(),
656 output_path: self.output_path.clone(),
657 mod_name: self.mod_name,
658 recoverer: self.recoverer,
659 yacckind: self.yacckind,
660 error_on_conflicts: self.error_on_conflicts,
661 warnings_are_errors: self.warnings_are_errors,
662 show_warnings: self.show_warnings,
663 visibility: self.visibility.clone(),
664 rust_edition: self.rust_edition,
665 phantom: PhantomData,
666 };
667 Ok(cl.build()?.rule_ids)
668 }
669
670 fn output_file<P: AsRef<Path>>(
671 &self,
672 grm: &YaccGrammar<StorageT>,
673 stable: &StateTable<StorageT>,
674 mod_name: &str,
675 outp_rs: P,
676 cache: &str,
677 ) -> Result<(), Box<dyn Error>> {
678 let mut outs = String::new();
679 writeln!(outs, "{} mod {} {{", self.visibility.cow_str(), mod_name).ok();
680 if let Some(YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools) =
682 self.yacckind
683 {
684 outs.push_str(&self.gen_user_actions(grm)?);
685 }
686 outs.push_str(" mod _parser_ {\n");
687 outs.push_str(
688 " #![allow(clippy::type_complexity)]
689 #![allow(clippy::unnecessary_wraps)]
690 #![deny(unsafe_code)]
691 #[allow(unused_imports)]
692 use super::*;
693",
694 );
695
696 outs.push_str(&self.gen_parse_function(grm, stable)?);
697 outs.push_str(&self.gen_rule_consts(grm));
698 outs.push_str(&self.gen_token_epp(grm));
699 match self.yacckind.unwrap() {
700 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
701 outs.push_str(&self.gen_wrappers(grm));
702 }
703 YaccKind::Original(YaccOriginalActionKind::NoAction)
704 | YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => (),
705 _ => unreachable!(),
706 }
707 outs.push_str(" } // End of `mod _parser_`\n\n");
708 outs.push_str(" #[allow(unused_imports)]\n");
709 outs.push_str(" pub use _parser_::*;\n");
710 outs.push_str(" #[allow(unused_imports)]\n");
711 outs.push_str(" use ::lrpar::Lexeme;\n");
712 outs.push_str("} // End of `mod {mod_name}` \n\n");
713
714 outs.push_str(cache);
716
717 let mut f = File::create(outp_rs)?;
718 f.write_all(outs.as_bytes())?;
719
720 Ok(())
721 }
722
723 fn rebuild_cache(&self, grm: &YaccGrammar<StorageT>) -> String {
726 let mut cache = String::new();
729 cache.push_str("\n/* CACHE INFORMATION\n");
730
731 writeln!(cache, " Build time: {:?}", env!("VERGEN_BUILD_TIMESTAMP")).ok();
735
736 writeln!(cache, " Grammar path: {:?}", self.grammar_path).ok();
737 writeln!(cache, " Mod name: {:?}", self.mod_name).ok();
738 writeln!(cache, " Recoverer: {:?}", self.recoverer).ok();
739 writeln!(cache, " YaccKind: {:?}", self.yacckind).ok();
740 writeln!(cache, " Visibility: {:?}", self.visibility.cow_str()).ok();
741 writeln!(
742 cache,
743 " Error on conflicts: {:?}\n",
744 self.error_on_conflicts
745 )
746 .ok();
747
748 for tidx in grm.iter_tidxs() {
750 let n = match grm.token_name(tidx) {
751 Some(n) => format!("'{}'", n),
752 None => "<unknown>".to_string(),
753 };
754 writeln!(cache, " {} {}", usize::from(tidx), n).ok();
755 }
756
757 cache.push_str("*/\n");
758 cache
759 }
760
761 fn gen_parse_function(
763 &self,
764 grm: &YaccGrammar<StorageT>,
765 stable: &StateTable<StorageT>,
766 ) -> Result<String, Box<dyn Error>> {
767 let mut outs = String::new();
768
769 serialize_bin_output(grm, GRM_CONST_NAME, &mut outs)?;
772 serialize_bin_output(stable, STABLE_CONST_NAME, &mut outs)?;
773
774 match self.yacckind.unwrap() {
775 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
776 let parse_param = match grm.parse_param() {
777 Some((name, tyname)) => format!(", {}: {}", name, tyname),
778 None => "".to_owned(),
779 };
780 write!(outs,
781 "
782 #[allow(dead_code)]
783 pub fn parse<'lexer, 'input: 'lexer>(
784 lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>{parse_param})
785 -> (::std::option::Option<{actiont}>, ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>)
786 {{",
787 storaget = type_name::<StorageT>(),
788 lexertypest = type_name::<LexerTypesT>(),
789 parse_param = parse_param,
790 actiont = grm.actiontype(self.user_start_ridx(grm)).as_ref().unwrap(),
791 ).ok();
792 }
793 YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
794 write!(
795 outs,
796 "
797 #[allow(dead_code)]
798 pub fn parse(lexer: &dyn ::lrpar::NonStreamingLexer<{lexertypest}>)
799 -> (::std::option::Option<::lrpar::Node<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {storaget}>>,
800 ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>)
801 {{",
802 storaget = type_name::<StorageT>(),
803 lexertypest = type_name::<LexerTypesT>(),
804 )
805 .ok();
806 }
807 YaccKind::Original(YaccOriginalActionKind::NoAction) => {
808 write!(
809 outs,
810 "
811 #[allow(dead_code)]
812 pub fn parse(lexer: &dyn ::lrpar::NonStreamingLexer<{lexertypest}>)
813 -> ::std::vec::Vec<::lrpar::LexParseError<{storaget}, {lexertypest}>>
814 {{",
815 storaget = type_name::<StorageT>(),
816 lexertypest = type_name::<LexerTypesT>(),
817 )
818 .ok();
819 }
820 YaccKind::Eco => unreachable!(),
821 };
822
823 write!(
824 outs,
825 "
826 let (grm, stable) = ::lrpar::ctbuilder::_reconstitute({}, {});",
827 GRM_CONST_NAME, STABLE_CONST_NAME
828 )
829 .ok();
830
831 let recoverer = match self.recoverer {
832 RecoveryKind::CPCTPlus => "CPCTPlus",
833 RecoveryKind::None => "None",
834 };
835 match self.yacckind.unwrap() {
836 YaccKind::Original(YaccOriginalActionKind::UserAction) | YaccKind::Grmtools => {
837 let wrappers = grm
839 .iter_pidxs()
840 .map(|pidx| {
841 format!(
842 "&{prefix}wrapper_{}",
843 usize::from(pidx),
844 prefix = ACTION_PREFIX
845 )
846 })
847 .collect::<Vec<_>>()
848 .join(",\n ");
849 let (parse_param, parse_paramty) = match grm.parse_param() {
850 Some((name, tyname)) => (name.clone(), tyname.clone()),
851 None => ("()".to_owned(), "()".to_owned()),
852 };
853 write!(outs,
854 "\n #[allow(clippy::type_complexity)]
855 let actions: ::std::vec::Vec<&dyn Fn(::cfgrammar::RIdx<{storaget}>,
856 &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
857 ::cfgrammar::Span,
858 ::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>,
859 {parse_paramty})
860 -> {actionskind}<'input>> = ::std::vec![{wrappers}];\n",
861 actionskind = ACTIONS_KIND,
862 storaget = type_name::<StorageT>(),
863 lexertypest = type_name::<LexerTypesT>(),
864 parse_paramty = parse_paramty,
865 wrappers = wrappers,
866 edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" },
867 ).ok();
868 write!(
869 outs,
870 "
871 match ::lrpar::RTParserBuilder::new(&grm, &stable)
872 .recoverer(::lrpar::RecoveryKind::{recoverer})
873 .parse_actions(lexer, &actions, {parse_param}) {{
874 (Some({actionskind}::{actionskindprefix}{ridx}(x)), y) => (Some(x), y),
875 (None, y) => (None, y),
876 _ => unreachable!()
877 }}",
878 parse_param = parse_param,
879 actionskind = ACTIONS_KIND,
880 actionskindprefix = ACTIONS_KIND_PREFIX,
881 ridx = usize::from(self.user_start_ridx(grm)),
882 recoverer = recoverer,
883 )
884 .ok();
885 }
886 YaccKind::Original(YaccOriginalActionKind::GenericParseTree) => {
887 write!(
888 outs,
889 "
890 ::lrpar::RTParserBuilder::new(&grm, &stable)
891 .recoverer(::lrpar::RecoveryKind::{})
892 .parse_generictree(lexer)\n",
893 recoverer
894 )
895 .ok();
896 }
897 YaccKind::Original(YaccOriginalActionKind::NoAction) => {
898 write!(
899 outs,
900 "
901 ::lrpar::RTParserBuilder::new(&grm, &stable)
902 .recoverer(::lrpar::RecoveryKind::{})
903 .parse_noaction(lexer)\n",
904 recoverer
905 )
906 .ok();
907 }
908 YaccKind::Eco => unreachable!(),
909 };
910
911 outs.push_str("\n }\n\n");
912 Ok(outs)
913 }
914
915 fn gen_rule_consts(&self, grm: &YaccGrammar<StorageT>) -> String {
916 let mut outs = String::new();
917 for ridx in grm.iter_rules() {
918 if !grm.rule_to_prods(ridx).contains(&grm.start_prod()) {
919 write!(
920 outs,
921 " #[allow(dead_code)]\n pub const R_{}: {} = {:?};\n",
922 grm.rule_name_str(ridx).to_ascii_uppercase(),
923 type_name::<StorageT>(),
924 usize::from(ridx)
925 )
926 .ok();
927 }
928 }
929 outs
930 }
931
932 fn gen_token_epp(&self, grm: &YaccGrammar<StorageT>) -> String {
933 let mut tidxs = Vec::new();
934 for tidx in grm.iter_tidxs() {
935 match grm.token_epp(tidx) {
936 Some(n) => tidxs.push(format!("Some(\"{}\")", str_escape(n))),
937 None => tidxs.push("None".to_string()),
938 }
939 }
940 format!(
941 " const {prefix}EPP: &[::std::option::Option<&str>] = &[{}];
942
943 /// Return the %epp entry for token `tidx` (where `None` indicates \"the token has no
944 /// pretty-printed value\"). Panics if `tidx` doesn't exist.
945 #[allow(dead_code)]
946 pub fn token_epp<'a>(tidx: ::cfgrammar::TIdx<{storaget}>) -> ::std::option::Option<&'a str> {{
947 {prefix}EPP[usize::from(tidx)]
948 }}",
949 tidxs.join(", "),
950 storaget = type_name::<StorageT>(),
951 prefix = GLOBAL_PREFIX
952 )
953 }
954
955 fn gen_wrappers(&self, grm: &YaccGrammar<StorageT>) -> String {
957 let mut outs = String::new();
958
959 outs.push_str("\n\n // Wrappers\n\n");
960
961 let (parse_paramname, parse_paramdef) = match grm.parse_param() {
962 Some((name, tyname)) => (name.to_owned(), format!("{}: {}", name, tyname)),
963 None => ("()".to_owned(), "_: ()".to_owned()),
964 };
965 for pidx in grm.iter_pidxs() {
966 let ridx = grm.prod_to_rule(pidx);
967
968 write!(outs,
972 " fn {prefix}wrapper_{}<'lexer, 'input: 'lexer>({prefix}ridx: ::cfgrammar::RIdx<{storaget}>,
973 {prefix}lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
974 {prefix}span: ::cfgrammar::Span,
975 mut {prefix}args: ::std::vec::Drain<{edition_lifetime} ::lrpar::parser::AStackType<<{lexertypest} as ::lrpar::LexerTypes>::LexemeT, {actionskind}<'input>>>,
976 {parse_paramdef})
977 -> {actionskind}<'input> {{",
978 usize::from(pidx),
979 storaget = type_name::<StorageT>(),
980 lexertypest = type_name::<LexerTypesT>(),
981 prefix = ACTION_PREFIX,
982 parse_paramdef = parse_paramdef,
983 actionskind = ACTIONS_KIND,
984 edition_lifetime = if self.rust_edition != RustEdition::Rust2015 { "'_, " } else { "" },
985 ).ok();
986
987 if grm.action(pidx).is_some() {
988 for i in 0..grm.prod(pidx).len() {
990 match grm.prod(pidx)[i] {
991 Symbol::Rule(ref_ridx) => {
992 write!(outs,
993 "
994 #[allow(clippy::let_unit_value)]
995 let {prefix}arg_{i} = match {prefix}args.next().unwrap() {{
996 ::lrpar::parser::AStackType::ActionType({actionskind}::{actionskindprefix}{ref_ridx}(x)) => x,
997 _ => unreachable!()
998 }};",
999 i = i + 1,
1000 ref_ridx = usize::from(ref_ridx),
1001 prefix = ACTION_PREFIX,
1002 actionskind = ACTIONS_KIND,
1003 actionskindprefix = ACTIONS_KIND_PREFIX
1004 ).ok();
1005 }
1006 Symbol::Token(_) => {
1007 write!(
1008 outs,
1009 "
1010 let {prefix}arg_{} = match {prefix}args.next().unwrap() {{
1011 ::lrpar::parser::AStackType::Lexeme(l) => {{
1012 if l.faulty() {{
1013 Err(l)
1014 }} else {{
1015 Ok(l)
1016 }}
1017 }},
1018 ::lrpar::parser::AStackType::ActionType(_) => unreachable!()
1019 }};",
1020 i + 1,
1021 prefix = ACTION_PREFIX
1022 )
1023 .ok();
1024 }
1025 }
1026 }
1027
1028 let args = (0..grm.prod(pidx).len())
1030 .map(|i| format!("{prefix}arg_{i}", prefix = ACTION_PREFIX, i = i + 1))
1031 .collect::<Vec<_>>();
1032 match grm.actiontype(ridx) {
1036 Some(s) if s == "()" => {
1037 write!(outs, "\n {prefix}action_{pidx}({prefix}ridx, {prefix}lexer, {prefix}span, {parse_paramname}, {args});
1038 {actionskind}::{actionskindprefix}{ridx}(())",
1039 actionskind = ACTIONS_KIND,
1040 actionskindprefix = ACTIONS_KIND_PREFIX,
1041 prefix = ACTION_PREFIX,
1042 ridx = usize::from(ridx),
1043 pidx = usize::from(pidx),
1044 parse_paramname = parse_paramname,
1045 args = args.join(", ")).ok();
1046 }
1047 _ => {
1048 write!(outs, "\n {actionskind}::{actionskindprefix}{ridx}({prefix}action_{pidx}({prefix}ridx, {prefix}lexer, {prefix}span, {parse_paramname}, {args}))",
1049 actionskind = ACTIONS_KIND,
1050 actionskindprefix = ACTIONS_KIND_PREFIX,
1051 prefix = ACTION_PREFIX,
1052 ridx = usize::from(ridx),
1053 pidx = usize::from(pidx),
1054 parse_paramname = parse_paramname,
1055 args = args.join(", ")).ok();
1056 }
1057 }
1058 } else if pidx == grm.start_prod() {
1059 if parse_paramname != "()" {
1064 write!(outs, "\n let _ = {parse_paramname:};").ok();
1067 }
1068 outs.push_str("\n unreachable!()");
1069 } else {
1070 panic!(
1071 "Production in rule '{}' must have an action body.",
1072 grm.rule_name_str(grm.prod_to_rule(pidx))
1073 );
1074 }
1075 outs.push_str("\n }\n\n");
1076 }
1077
1078 write!(
1081 outs,
1082 " #[allow(dead_code)]
1083 enum {}<'input> {{\n",
1084 ACTIONS_KIND
1085 )
1086 .ok();
1087 for ridx in grm.iter_rules() {
1088 if grm.actiontype(ridx).is_none() {
1089 continue;
1090 }
1091
1092 writeln!(
1093 outs,
1094 " {actionskindprefix}{ridx}({actiont}),",
1095 actionskindprefix = ACTIONS_KIND_PREFIX,
1096 ridx = usize::from(ridx),
1097 actiont = grm.actiontype(ridx).as_ref().unwrap()
1098 )
1099 .ok();
1100 }
1101 write!(
1102 outs,
1103 " _{actionskindhidden}(::std::marker::PhantomData<&'input ()>)
1104 }}\n\n",
1105 actionskindhidden = ACTIONS_KIND_HIDDEN
1106 )
1107 .ok();
1108
1109 outs
1110 }
1111
1112 fn gen_user_actions(&self, grm: &YaccGrammar<StorageT>) -> Result<String, Box<dyn Error>> {
1114 let mut outs = String::new();
1115
1116 if let Some(s) = grm.programs() {
1117 outs.push_str("\n// User code from the program section\n\n");
1118 outs.push_str(s);
1119 outs.push_str("\n// End of user code from the program section\n\n");
1120 }
1121
1122 outs.push_str("\n // User actions\n\n");
1124 let (parse_paramname, parse_paramdef) = match grm.parse_param() {
1125 Some((name, tyname)) => (name.to_owned(), format!("{}: {}", name, tyname)),
1126 None => ("()".to_owned(), "_: ()".to_owned()),
1127 };
1128 for pidx in grm.iter_pidxs() {
1129 if pidx == grm.start_prod() {
1130 continue;
1131 }
1132
1133 let mut args = Vec::with_capacity(grm.prod(pidx).len());
1135 for i in 0..grm.prod(pidx).len() {
1136 let argt = match grm.prod(pidx)[i] {
1137 Symbol::Rule(ref_ridx) => grm.actiontype(ref_ridx).as_ref().unwrap().clone(),
1138 Symbol::Token(_) => format!(
1139 "::std::result::Result<{lexemet}, {lexemet}>",
1140 lexemet = type_name::<LexerTypesT::LexemeT>(),
1141 ),
1142 };
1143 args.push(format!("mut {}arg_{}: {}", ACTION_PREFIX, i + 1, argt));
1144 }
1145
1146 let returnt = {
1150 let actiont = grm.actiontype(grm.prod_to_rule(pidx)).as_ref().unwrap();
1151 if actiont == "()" {
1152 "".to_owned()
1153 } else {
1154 format!("\n -> {}", actiont)
1155 }
1156 };
1157 write!(
1158 outs,
1159 " // {rulename}
1160 #[allow(clippy::too_many_arguments)]
1161 fn {prefix}action_{}<'lexer, 'input: 'lexer>({prefix}ridx: ::cfgrammar::RIdx<{storaget}>,
1162 {prefix}lexer: &'lexer dyn ::lrpar::NonStreamingLexer<'input, {lexertypest}>,
1163 {prefix}span: ::cfgrammar::Span,
1164 {parse_paramdef},
1165 {args}){returnt} {{\n",
1166 usize::from(pidx),
1167 rulename = grm.rule_name_str(grm.prod_to_rule(pidx)),
1168 storaget = type_name::<StorageT>(),
1169 lexertypest = type_name::<LexerTypesT>(),
1170 prefix = ACTION_PREFIX,
1171 returnt = returnt,
1172 parse_paramdef = parse_paramdef,
1173 args = args.join(",\n ")
1174 )
1175 .ok();
1176
1177 if parse_paramname != "()" {
1178 writeln!(outs, " let _ = {parse_paramname:};").ok();
1181 }
1182
1183 let pre_action = grm.action(pidx).as_ref().ok_or_else(|| {
1186 format!(
1187 "Rule {} has a production which is missing action code",
1188 grm.rule_name_str(grm.prod_to_rule(pidx))
1189 )
1190 })?;
1191 let mut last = 0;
1192 loop {
1193 match pre_action[last..].find('$') {
1194 Some(off) => {
1195 if pre_action[last + off..].starts_with("$$") {
1196 outs.push_str(&pre_action[last..last + off + "$".len()]);
1197 last = last + off + "$$".len();
1198 } else if pre_action[last + off..].starts_with("$lexer") {
1199 outs.push_str(&pre_action[last..last + off]);
1200 write!(outs, "{prefix}lexer", prefix = ACTION_PREFIX).ok();
1201 last = last + off + "$lexer".len();
1202 } else if pre_action[last + off..].starts_with("$span") {
1203 outs.push_str(&pre_action[last..last + off]);
1204 write!(outs, "{prefix}span", prefix = ACTION_PREFIX).ok();
1205 last = last + off + "$span".len();
1206 } else if last + off + 1 < pre_action.len()
1207 && pre_action[last + off + 1..].starts_with(|c: char| c.is_numeric())
1208 {
1209 outs.push_str(&pre_action[last..last + off]);
1210 write!(outs, "{prefix}arg_", prefix = ACTION_PREFIX).ok();
1211 last = last + off + "$".len();
1212 } else {
1213 panic!(
1214 "Unknown text following '$' operator: {}",
1215 &pre_action[last + off..]
1216 );
1217 }
1218 }
1219 None => {
1220 outs.push_str(&pre_action[last..]);
1221 break;
1222 }
1223 }
1224 }
1225
1226 outs.push_str("\n }\n\n");
1227 }
1228 Ok(outs)
1229 }
1230
1231 fn user_start_ridx(&self, grm: &YaccGrammar<StorageT>) -> RIdx<StorageT> {
1235 debug_assert_eq!(grm.prod(grm.start_prod()).len(), 1);
1236 match grm.prod(grm.start_prod())[0] {
1237 Symbol::Rule(ridx) => ridx,
1238 _ => unreachable!(),
1239 }
1240 }
1241}
1242
1243fn str_escape(s: &str) -> String {
1245 s.replace('\\', "\\\\").replace('"', "\\\"")
1246}
1247
1248#[doc(hidden)]
1251pub fn _reconstitute<StorageT: DeserializeOwned + Hash + PrimInt + Unsigned>(
1252 grm_buf: &[u8],
1253 stable_buf: &[u8],
1254) -> (YaccGrammar<StorageT>, StateTable<StorageT>) {
1255 let grm = deserialize(grm_buf).unwrap();
1256 let stable = deserialize(stable_buf).unwrap();
1257 (grm, stable)
1258}
1259
1260fn serialize_bin_output<T: Serialize + ?Sized>(
1261 ser: &T,
1262 name: &str,
1263 buffer: &mut String,
1264) -> Result<(), Box<dyn Error>> {
1265 let mut w = ArrayWriter::new(name);
1266 serialize_into(&mut w, ser)?;
1267 let data = w.finish();
1268 buffer.push_str(&data);
1269 Ok(())
1270}
1271
1272struct ArrayWriter {
1274 buffer: String,
1275}
1276
1277impl ArrayWriter {
1278 fn new(name: &str) -> Self {
1280 Self {
1281 buffer: format!(r#"#[allow(dead_code)] const {}: &[u8] = &["#, name),
1282 }
1283 }
1284
1285 fn finish(mut self) -> String {
1287 self.buffer.push_str("];\n");
1288 self.buffer
1289 }
1290}
1291
1292impl Write for ArrayWriter {
1293 #[allow(dead_code)]
1294 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1295 for b in buf {
1296 self.buffer.write_fmt(format_args!("{},", b)).unwrap();
1297 }
1298 Ok(buf.len())
1299 }
1300
1301 #[allow(dead_code)]
1302 fn flush(&mut self) -> io::Result<()> {
1303 Ok(())
1304 }
1305}
1306
1307pub struct CTParser<StorageT = u32>
1309where
1310 StorageT: Eq + Hash,
1311{
1312 regenerated: bool,
1313 rule_ids: HashMap<String, StorageT>,
1314 conflicts: Option<(
1315 YaccGrammar<StorageT>,
1316 StateGraph<StorageT>,
1317 StateTable<StorageT>,
1318 )>,
1319}
1320
1321impl<StorageT> CTParser<StorageT>
1322where
1323 StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
1324 usize: AsPrimitive<StorageT>,
1325{
1326 pub fn regenerated(&self) -> bool {
1328 self.regenerated
1329 }
1330
1331 pub fn token_map(&self) -> &HashMap<String, StorageT> {
1334 &self.rule_ids
1335 }
1336
1337 #[allow(private_interfaces)]
1343 pub fn conflicts(
1344 &self,
1345 _: crate::unstable::UnstableApi,
1346 ) -> Option<(
1347 &YaccGrammar<StorageT>,
1348 &StateGraph<StorageT>,
1349 &StateTable<StorageT>,
1350 &Conflicts<StorageT>,
1351 )> {
1352 if let Some((grm, sgraph, stable)) = &self.conflicts {
1353 return Some((grm, sgraph, stable, stable.conflicts().unwrap()));
1354 }
1355 None
1356 }
1357}
1358
1359#[cfg(test)]
1360mod test {
1361 use std::{fs::File, io::Write, path::PathBuf};
1362
1363 use super::{CTConflictsError, CTParserBuilder};
1364 use crate::test_utils::TestLexerTypes;
1365 use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind};
1366 use tempfile::TempDir;
1367
1368 #[test]
1369 fn test_conflicts() {
1370 let temp = TempDir::new().unwrap();
1371 let mut file_path = PathBuf::from(temp.as_ref());
1372 file_path.push("grm.y");
1373 let mut f = File::create(&file_path).unwrap();
1374 let _ = f.write_all(
1375 "%start A
1376%%
1377A : 'a' 'b' | B 'b';
1378B : 'a' | C;
1379C : 'a';"
1380 .as_bytes(),
1381 );
1382
1383 match CTParserBuilder::<TestLexerTypes>::new()
1384 .error_on_conflicts(false)
1385 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1386 .grammar_path(file_path.to_str().unwrap())
1387 .output_path(file_path.with_extension("ignored"))
1388 .build()
1389 .unwrap()
1390 .conflicts(crate::unstable::UnstableApi)
1391 {
1392 Some((_, _, _, conflicts)) => {
1393 assert_eq!(conflicts.sr_len(), 1);
1394 assert_eq!(conflicts.rr_len(), 1);
1395 }
1396 None => panic!("Expected error data"),
1397 }
1398 }
1399
1400 #[test]
1401 fn test_conflicts_error() {
1402 let temp = TempDir::new().unwrap();
1403 let mut file_path = PathBuf::from(temp.as_ref());
1404 file_path.push("grm.y");
1405 let mut f = File::create(&file_path).unwrap();
1406 let _ = f.write_all(
1407 "%start A
1408%%
1409A : 'a' 'b' | B 'b';
1410B : 'a' | C;
1411C : 'a';"
1412 .as_bytes(),
1413 );
1414
1415 match CTParserBuilder::<TestLexerTypes>::new()
1416 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1417 .grammar_path(file_path.to_str().unwrap())
1418 .output_path(file_path.with_extension("ignored"))
1419 .build()
1420 {
1421 Ok(_) => panic!("Expected error"),
1422 Err(e) => {
1423 let cs = e.downcast_ref::<CTConflictsError<u16>>();
1424 assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1425 assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1426 }
1427 }
1428 }
1429
1430 #[test]
1431 fn test_expect_error() {
1432 let temp = TempDir::new().unwrap();
1433 let mut file_path = PathBuf::from(temp.as_ref());
1434 file_path.push("grm.y");
1435 let mut f = File::create(&file_path).unwrap();
1436 let _ = f.write_all(
1437 "%start A
1438%expect 2
1439%%
1440A: 'a' 'b' | B 'b';
1441B: 'a';"
1442 .as_bytes(),
1443 );
1444
1445 match CTParserBuilder::<TestLexerTypes>::new()
1446 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1447 .grammar_path(file_path.to_str().unwrap())
1448 .output_path(file_path.with_extension("ignored"))
1449 .build()
1450 {
1451 Ok(_) => panic!("Expected error"),
1452 Err(e) => {
1453 let cs = e.downcast_ref::<CTConflictsError<u16>>();
1454 assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 0);
1455 assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1456 }
1457 }
1458 }
1459
1460 #[test]
1461 fn test_expectrr_error() {
1462 let temp = TempDir::new().unwrap();
1463 let mut file_path = PathBuf::from(temp.as_ref());
1464 file_path.push("grm.y");
1465 let mut f = File::create(&file_path).unwrap();
1466 let _ = f.write_all(
1467 "%start A
1468%expect 1
1469%expect-rr 2
1470%%
1471A : 'a' 'b' | B 'b';
1472B : 'a' | C;
1473C : 'a';"
1474 .as_bytes(),
1475 );
1476
1477 match CTParserBuilder::<TestLexerTypes>::new()
1478 .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree))
1479 .grammar_path(file_path.to_str().unwrap())
1480 .output_path(file_path.with_extension("ignored"))
1481 .build()
1482 {
1483 Ok(_) => panic!("Expected error"),
1484 Err(e) => {
1485 let cs = e.downcast_ref::<CTConflictsError<u16>>();
1486 assert_eq!(cs.unwrap().stable.conflicts().unwrap().rr_len(), 1);
1487 assert_eq!(cs.unwrap().stable.conflicts().unwrap().sr_len(), 1);
1488 }
1489 }
1490 }
1491}