rusty_lr_buildscript/
lib.rs

1//! Build script for rusty_lr
2//!
3//! This crate is private and not intended to be used directly.
4//! Please use the [`rusty_lr`](https://crates.io/crates/rusty_lr) crate instead.
5//!
6//! ```ignore
7//! fn main() {
8//!     println!("cargo::rerun-if-changed=src/parser/parser.rs");
9//!
10//!     let output_dir = std::env::var("OUT_DIR").unwrap();
11//!     let output = format!("{}/parser.rs", output_dir);
12//!     Builder::new()
13//!        .file("src/parser/parser.rs")
14//!        .build(&output);
15//! }
16//!
17
18pub mod output;
19mod split;
20mod utils;
21
22use codespan_reporting::diagnostic::Diagnostic;
23use codespan_reporting::diagnostic::Label;
24use codespan_reporting::files::SimpleFiles;
25use codespan_reporting::term;
26use codespan_reporting::term::termcolor::ColorChoice;
27use codespan_reporting::term::termcolor::StandardStream;
28
29use proc_macro2::TokenStream;
30
31use quote::quote;
32use rusty_lr_core::TerminalSymbol;
33use rusty_lr_parser::error::ArgError;
34use rusty_lr_parser::error::ParseArgError;
35use rusty_lr_parser::error::ParseError;
36
37use std::fs::read;
38use std::fs::write;
39
40/// reexport
41pub use rusty_lr_parser::target_rusty_lr_version;
42
43/// Main entry for the build script
44pub struct Builder {
45    /// input_file to read
46    input_file: Option<String>,
47
48    /// Print note information about any shift/reduce, reduce/reduce conflicts.
49    /// If the target is deterministic parser, conflict will be treated as an error,
50    /// so this option will be ignored.
51    /// This option is only for non-deterministic GLR parser.
52    note_conflicts: bool,
53
54    /// Print debug information about conflicts resolving process by any `%left`, `%right`, or `%precedence` directive.
55    note_conflicts_resolving: bool,
56
57    /// Print debug information about optimization process.
58    note_optimization: bool,
59
60    /// Print to stderr.
61    stderr: bool,
62
63    /// Print backtrace of production rules when conflicts occurred. ruleset could be messed up
64    note_backtrace: bool,
65
66    /// if true, an executable called this function
67    pub is_executable: bool,
68
69    /// if Some, override the settings with these values
70    glr: Option<bool>,
71    dense: Option<bool>,
72}
73
74impl Builder {
75    pub fn new() -> Self {
76        Self {
77            input_file: None,
78            note_conflicts: true,
79            note_conflicts_resolving: true,
80            stderr: false,
81            note_optimization: true,
82            note_backtrace: true,
83            is_executable: false,
84
85            glr: None,
86            dense: None,
87        }
88    }
89
90    /// override the settings
91    pub fn glr(&mut self, glr: bool) -> &mut Self {
92        self.glr = Some(glr);
93        self
94    }
95    /// override the settings
96    pub fn dense(&mut self, dense: bool) -> &mut Self {
97        self.dense = Some(dense);
98        self
99    }
100
101    /// set input file
102    pub fn file(&mut self, filename: &str) -> &mut Self {
103        self.input_file = Some(filename.to_string());
104        self
105    }
106
107    /// Print note information about any shift/reduce, reduce/reduce conflicts.
108    /// If the target is deterministic parser, conflict will be treated as an error,
109    /// so this option will be ignored.
110    /// This option is only for non-deterministic GLR parser.
111    pub fn note_conflicts(&mut self, val: bool) -> &mut Self {
112        self.note_conflicts = val;
113        self
114    }
115
116    /// Print debug information about conflicts resolving process by any `%left`, `%right`, or `%precedence` directive.
117    pub fn note_conflicts_resolving(&mut self, val: bool) -> &mut Self {
118        self.note_conflicts_resolving = val;
119        self
120    }
121
122    /// Print debug information about optimization process.
123    pub fn note_optimization(&mut self, val: bool) -> &mut Self {
124        self.note_optimization = val;
125        self
126    }
127
128    /// Print to stderr.
129    pub fn stderr(&mut self, val: bool) -> &mut Self {
130        self.stderr = val;
131        self
132    }
133
134    /// Print backtrace of production rules when conflicts occurred. ruleset could be messed up
135    pub fn note_backtrace(&mut self, val: bool) -> &mut Self {
136        self.note_backtrace = val;
137        self
138    }
139
140    fn stream(&self) -> StandardStream {
141        if self.stderr {
142            StandardStream::stderr(ColorChoice::Auto)
143        } else {
144            StandardStream::stdout(ColorChoice::Auto)
145        }
146    }
147
148    /// build and emit code to output file
149    pub fn build(&self, output_file: &str) {
150        let output = match self.build_impl() {
151            Ok(output) => {
152                let stream1 = output.user_stream;
153                let stream2 = output.generated_stream;
154                quote! {
155                    #stream1
156                    #stream2
157                }
158            }
159            Err(msg) => {
160                panic!("{}", msg)
161            }
162        };
163
164        write(output_file, output.to_string()).expect("Failed to write to file");
165    }
166
167    /// extend `labels` with messages about the source of the rule
168    /// if `ruleid` is auto-generated rule, "{} was generated here" will be added
169    /// if `ruleid` is user-written rule, "{} was defined here" will be added
170    fn extend_rule_source_label(
171        labels: &mut Vec<codespan_reporting::diagnostic::Label<usize>>,
172        fileid: usize,
173        ruleid: usize,
174        grammar: &rusty_lr_parser::grammar::Grammar,
175        prefix_str: &str,
176        message_secondary: &str,
177    ) {
178        let (nonterm, local_rule) = grammar.get_rule_by_id(ruleid).expect("Rule not found");
179        if let Some(origin_span) = nonterm.origin_span() {
180            let origin_range = origin_span.0.byte_range().start..origin_span.1.byte_range().end;
181            let message = format!("{}{} was generated here", prefix_str, nonterm.pretty_name,);
182            let mut duplicated_primary = false;
183            for label in labels.iter() {
184                if label.range == origin_range && label.message == message {
185                    duplicated_primary = true;
186                    break;
187                }
188            }
189            if !duplicated_primary {
190                labels.push(Label::primary(fileid, origin_range).with_message(message));
191            }
192        } else {
193            let (rule_begin, rule_end) = nonterm.rules[local_rule].span_pair();
194            let rule_range = rule_begin.byte_range().start..rule_end.byte_range().end;
195            let origin_range = nonterm.name.span().byte_range();
196
197            let primary_message = format!("{}{} was defined here", prefix_str, nonterm.pretty_name);
198            let mut duplicated_primary = false;
199            let mut duplicated_secondary = false;
200            for label in labels.iter() {
201                if label.range == origin_range && label.message == primary_message {
202                    duplicated_primary = true;
203                    break;
204                }
205            }
206            for label in labels.iter() {
207                if label.range == rule_range && label.message == message_secondary {
208                    duplicated_secondary = true;
209                    break;
210                }
211            }
212            if !duplicated_primary {
213                labels.push(Label::primary(fileid, origin_range).with_message(primary_message));
214            }
215            if !duplicated_secondary {
216                labels.push(
217                    Label::secondary(fileid, rule_range)
218                        .with_message(message_secondary.to_string()),
219                );
220            }
221        }
222    }
223
224    /// for internal use
225    pub fn build_impl(&self) -> Result<output::Output, String> {
226        if self.input_file.is_none() {
227            eprintln!("Input file not set");
228            return Err("Input file not set".to_string());
229        }
230
231        let input_file = self.input_file.as_ref().unwrap();
232        // read file
233        let input_bytes = match read(input_file) {
234            Ok(bytes) => bytes,
235            Err(e) => {
236                let message = format!("Error reading file: {}", e);
237                eprintln!("{}", message);
238                return Err(message);
239            }
240        };
241
242        let str = match String::from_utf8(input_bytes) {
243            Ok(str) => str,
244            Err(e) => {
245                let message = format!("Error reading utf-8: {}", e);
246                eprintln!("{}", message);
247                return Err(message);
248            }
249        };
250
251        let mut files = SimpleFiles::new();
252        let file_id = files.add(input_file, str.clone());
253
254        // lex with proc-macro2
255        let token_stream: TokenStream = match str.parse() {
256            Ok(token_stream) => token_stream,
257            Err(e) => {
258                let range = e.span().byte_range();
259                let diag = Diagnostic::error()
260                    .with_message("Lexing error")
261                    .with_labels(vec![
262                        Label::primary(file_id, range).with_message(e.to_string())
263                    ]);
264                let writer = self.stream();
265                let config = codespan_reporting::term::Config::default();
266                term::emit(&mut writer.lock(), &config, &files, &diag)
267                    .expect("Failed to write to stderr");
268                return Err("Lexing error".to_string());
269            }
270        };
271
272        // split stream by '%%'
273        let (output_stream, macro_stream) = match split::split_stream(token_stream) {
274            Ok((output_stream, macro_stream)) => (output_stream, macro_stream),
275            Err(_) => {
276                let diag = Diagnostic::error()
277                    .with_message("Cannot find `%%`")
278                    .with_notes(vec![
279                    "Please put `%%` to separate the code part and the context-free grammar part"
280                        .to_string(),
281                ]);
282                let writer = self.stream();
283                let config = codespan_reporting::term::Config::default();
284                term::emit(&mut writer.lock(), &config, &files, &diag)
285                    .expect("Failed to write to stderr");
286                return Err(diag.message);
287            }
288        };
289
290        let mut grammar_args = match rusty_lr_parser::grammar::Grammar::parse_args(macro_stream) {
291            Ok(grammar_args) => grammar_args,
292            Err(e) => {
293                let diag =
294                    match e {
295                        ParseArgError::MacroLineParse { span, message } => {
296                            let range = span.byte_range();
297
298                            Diagnostic::error()
299                                .with_message("Parse Failed")
300                                .with_labels(vec![
301                                    Label::primary(file_id, range).with_message("Error here")
302                                ])
303                                .with_notes(vec![message])
304                        }
305
306                        _ => {
307                            let message = e.short_message();
308                            let span = e.span().byte_range();
309                            Diagnostic::error().with_message(message).with_labels(vec![
310                                Label::primary(file_id, span).with_message("occured here"),
311                            ])
312                        }
313                    };
314
315                let writer = self.stream();
316                let config = codespan_reporting::term::Config::default();
317                term::emit(&mut writer.lock(), &config, &files, &diag)
318                    .expect("Failed to write to stderr");
319                return Err(diag.message);
320            }
321        };
322
323        for error in &grammar_args.error_recovered {
324            let range = if let Some((first, last)) = error.span.pair {
325                let first_range = first.byte_range();
326                let last_range = last.byte_range();
327                first_range.start..last_range.end
328            } else {
329                0..1 // default range if span is not defined
330            };
331            let diag = Diagnostic::error()
332                .with_message("Syntax error in grammar")
333                .with_labels(vec![
334                    Label::primary(file_id, range).with_message(error.message.clone())
335                ])
336                .with_notes(vec![format!("refer to: {}", error.link)]);
337            let writer = self.stream();
338            let config = codespan_reporting::term::Config::default();
339            term::emit(&mut writer.lock(), &config, &files, &diag)
340                .expect("Failed to write to stderr");
341        }
342
343        if !grammar_args.error_recovered.is_empty() {
344            return Err("Syntax error in grammar".to_string());
345        }
346
347        match rusty_lr_parser::grammar::Grammar::arg_check_error(&mut grammar_args) {
348            Ok(_) => {}
349            Err(e) => {
350                let diag = match e {
351                    ArgError::MultipleModulePrefixDefinition(
352                        (span1, tokenstream1),
353                        (span2, tokenstream2),
354                    ) => {
355                        let range1 = utils::span_stream_range(span1, tokenstream1);
356                        let range2 = utils::span_stream_range(span2, tokenstream2);
357
358                        Diagnostic::error()
359                            .with_message("Multiple %moduleprefix definition")
360                            .with_labels(vec![
361                                Label::primary(file_id, range1).with_message("First definition"),
362                                Label::primary(file_id, range2).with_message("Other definition"),
363                            ])
364                            .with_notes(vec![
365                                "Only one %moduleprefix definition is allowed".to_string()
366                            ])
367                    }
368                    ArgError::MultipleUserDataDefinition(
369                        (span1, tokenstream1),
370                        (span2, tokenstream2),
371                    ) => {
372                        let range1 = utils::span_stream_range(span1, tokenstream1);
373                        let range2 = utils::span_stream_range(span2, tokenstream2);
374
375                        Diagnostic::error()
376                            .with_message("Multiple %userdata definition")
377                            .with_labels(vec![
378                                Label::primary(file_id, range1).with_message("First definition"),
379                                Label::primary(file_id, range2).with_message("Other definition"),
380                            ])
381                            .with_notes(
382                                vec!["Only one %userdata definition is allowed".to_string()],
383                            )
384                    }
385                    ArgError::MultipleErrorDefinition(
386                        (span1, tokenstream1),
387                        (span2, tokenstream2),
388                    ) => {
389                        let range1 = utils::span_stream_range(span1, tokenstream1);
390                        let range2 = utils::span_stream_range(span2, tokenstream2);
391
392                        Diagnostic::error()
393                            .with_message("Multiple %error definition")
394                            .with_labels(vec![
395                                Label::primary(file_id, range1).with_message("First definition"),
396                                Label::primary(file_id, range2).with_message("Other definition"),
397                            ])
398                            .with_notes(vec!["Only one %error definition is allowed".to_string()])
399                    }
400                    ArgError::MultipleTokenTypeDefinition(
401                        (span1, tokenstream1),
402                        (span2, tokenstream2),
403                    ) => {
404                        let range1 = utils::span_stream_range(span1, tokenstream1);
405                        let range2 = utils::span_stream_range(span2, tokenstream2);
406
407                        Diagnostic::error()
408                            .with_message("Multiple %tokentype definition")
409                            .with_labels(vec![
410                                Label::primary(file_id, range1).with_message("First definition"),
411                                Label::primary(file_id, range2).with_message("Other definition"),
412                            ])
413                            .with_notes(vec![
414                                "Only one %tokentype definition is allowed".to_string()
415                            ])
416                    }
417                    ArgError::MultipleEofDefinition(
418                        (span1, tokenstream1),
419                        (span2, tokenstream2),
420                    ) => {
421                        let range1 = utils::span_stream_range(span1, tokenstream1);
422                        let range2 = utils::span_stream_range(span2, tokenstream2);
423
424                        Diagnostic::error()
425                            .with_message("Multiple %eof definition")
426                            .with_labels(vec![
427                                Label::primary(file_id, range1).with_message("First definition"),
428                                Label::primary(file_id, range2).with_message("Other definition"),
429                            ])
430                            .with_notes(vec!["Only one %eof definition is allowed".to_string()])
431                    }
432                    ArgError::MultipleStartDefinition(ident1, ident2) => {
433                        let range1 = ident1.span().byte_range();
434                        let range2 = ident2.span().byte_range();
435
436                        Diagnostic::error()
437                            .with_message("Multiple %start definition")
438                            .with_labels(vec![
439                                Label::primary(file_id, range1).with_message("First definition"),
440                                Label::primary(file_id, range2).with_message("Other definition"),
441                            ])
442                            .with_notes(vec!["Only one %start definition is allowed".to_string()])
443                    }
444
445                    ArgError::StartNotDefined => Diagnostic::error()
446                        .with_message("%start not defined")
447                        .with_labels(vec![])
448                        .with_notes(vec![
449                            "%start must be defined".to_string(),
450                            ">>> %start <non-terminal>".to_string(),
451                        ]),
452                    ArgError::EofNotDefined => Diagnostic::error()
453                        .with_message("%eof not defined")
454                        .with_labels(vec![])
455                        .with_notes(vec![
456                            "%eof must be defined".to_string(),
457                            ">>> %eof <terminal>".to_string(),
458                        ]),
459                    ArgError::TokenTypeNotDefined => Diagnostic::error()
460                        .with_message("%tokentype not defined")
461                        .with_labels(vec![])
462                        .with_notes(vec![
463                            "%tokentype must be defined".to_string(),
464                            ">>> %tokentype <TokenType>".to_string(),
465                        ]),
466
467                    ArgError::MultiplePrecDefinition(span) => Diagnostic::error()
468                        .with_message("multiple %prec definition")
469                        .with_labels(vec![Label::primary(file_id, span.byte_range())
470                            .with_message("This %prec is defined here")])
471                        .with_notes(vec!["%prec must be unique".to_string()]),
472
473                    ArgError::MultipleDPrecDefinition(span) => Diagnostic::error()
474                        .with_message("multiple %dprec definition")
475                        .with_labels(vec![Label::primary(file_id, span.byte_range())
476                            .with_message("This %dprec is defined here")])
477                        .with_notes(vec!["%dprec must be unique".to_string()]),
478                    _ => {
479                        let message = e.short_message();
480                        let span = e.span().byte_range();
481                        Diagnostic::error()
482                            .with_message(message)
483                            .with_labels(vec![
484                                Label::primary(file_id, span).with_message("occured here")
485                            ])
486                    }
487                };
488
489                let writer = self.stream();
490                let config = codespan_reporting::term::Config::default();
491                term::emit(&mut writer.lock(), &config, &files, &diag)
492                    .expect("Failed to write to stderr");
493                return Err(diag.message);
494            }
495        }
496
497        if let Some(glr) = self.glr {
498            grammar_args.glr = glr;
499        }
500        if let Some(dense) = self.dense {
501            grammar_args.dense = dense;
502        }
503
504        // parse lines
505        let mut grammar = match rusty_lr_parser::grammar::Grammar::from_grammar_args(grammar_args) {
506            Ok(grammar) => grammar,
507            Err(e) => {
508                let diag = match e {
509                    ParseError::MultipleRuleDefinition(ident1, ident2) => {
510                        let range1 = ident1.span().byte_range();
511                        let range2 = ident2.span().byte_range();
512
513                        Diagnostic::error()
514                            .with_message("Multiple rule definition")
515                            .with_labels(vec![
516                                Label::primary(file_id, range1).with_message("First definition"),
517                                Label::primary(file_id, range2).with_message("Other definition"),
518                            ])
519                            .with_notes(vec!["Rule name must be unique".to_string()])
520                    }
521
522                    ParseError::MultipleReduceDefinition { terminal, old, new } => {
523                        let old_range = old.0.byte_range();
524                        let old_string = match old.1 {
525                            rusty_lr_core::rule::ReduceType::Left => "%left",
526                            rusty_lr_core::rule::ReduceType::Right => "%right",
527                        };
528                        let new_range = new.0.byte_range();
529                        let new_string = match new.1 {
530                            rusty_lr_core::rule::ReduceType::Left => "%left",
531                            rusty_lr_core::rule::ReduceType::Right => "%right",
532                        };
533
534                        Diagnostic::error()
535                            .with_message(format!("Multiple reduce definition: {}", terminal))
536                            .with_labels(vec![
537                                Label::primary(file_id, old_range)
538                                    .with_message(format!("was set as {} here", old_string)),
539                                Label::primary(file_id, new_range)
540                                    .with_message(format!("was set as {} here", new_string)),
541                            ])
542                            .with_notes(vec![
543                                "Reduce type must be unique, either %left or %right".to_string()
544                            ])
545                    }
546
547                    ParseError::TermNonTermConflict {
548                        name,
549                        terminal,
550                        non_terminal,
551                    } => {
552                        let range = name.span().byte_range();
553
554                        Diagnostic::error()
555                            .with_message("Ambiguous token name")
556                            .with_labels(vec![
557                                Label::primary(file_id, range).with_message(
558                                    "This name is used for both terminal and non-terminal",
559                                ),
560                                Label::secondary(file_id, terminal.span().byte_range())
561                                    .with_message("Terminal definition here"),
562                                Label::secondary(file_id, non_terminal.span().byte_range())
563                                    .with_message("Non-terminal definition here"),
564                            ])
565                            .with_notes(vec![
566                                "Terminal and non-terminal name must be unique".to_string()
567                            ])
568                    }
569
570                    ParseError::InvalidTerminalRange(
571                        (first, first_index, first_stream),
572                        (last, last_index, last_stream),
573                    ) => {
574                        let range1 = first.span().byte_range();
575                        let range2 = last.span().byte_range();
576                        let range = range1.start..range2.end;
577                        let range1 = utils::tokenstream_range(first_stream);
578                        let range2 = utils::tokenstream_range(last_stream);
579
580                        Diagnostic::error()
581                        .with_message("Invalid terminal range")
582                        .with_labels(vec![
583                            Label::primary(file_id, range).with_message("Invalid range here"),
584                            Label::secondary(file_id, range1).with_message(format!("First terminal symbol (index {})", first_index)),
585                            Label::secondary(file_id, range2).with_message(format!("Last terminal symbol (index {})", last_index)),
586                        ]).with_notes(vec![
587                            "First terminal symbol has to be less than or equal to the last terminal symbol".to_string()
588                        ])
589                    }
590
591                    ParseError::StartNonTerminalNotDefined(ident) => {
592                        let range = ident.span().byte_range();
593
594                        Diagnostic::error()
595                            .with_message("Start non-terminal not defined")
596                            .with_labels(vec![Label::primary(file_id, range)
597                                .with_message("This name is given to %start")])
598                            .with_notes(vec!["Non-terminal name must be defined".to_string()])
599                    }
600
601                    ParseError::TerminalNotDefined(ident) => {
602                        let range = ident.span().byte_range();
603
604                        Diagnostic::error()
605                            .with_message("Terminal symbol not defined")
606                            .with_labels(vec![Label::primary(file_id, range)
607                                .with_message("This terminal symbol is not defined")])
608                            .with_notes(vec!["Terminal symbol must be defined".to_string()])
609                    }
610
611                    ParseError::MultipleTokenDefinition(ident1, ident2) => {
612                        let range1 = ident1.span().byte_range();
613                        let range2 = ident2.span().byte_range();
614
615                        Diagnostic::error()
616                            .with_message("Multiple %token definition")
617                            .with_labels(vec![
618                                Label::primary(file_id, range1).with_message("First definition"),
619                                Label::primary(file_id, range2).with_message("Other definition"),
620                            ])
621                            .with_notes(vec!["Token name must be unique".to_string()])
622                    }
623
624                    ParseError::ReservedName(ident) => {
625                        let range = ident.span().byte_range();
626
627                        Diagnostic::error()
628                            .with_message(format!("'{ident}' is reserved name"))
629                            .with_labels(vec![Label::primary(file_id, range)
630                                .with_message("This name is reserved")])
631                    }
632                    ParseError::UnsupportedLiteralType(literal) => {
633                        let range = literal.into_iter().next().unwrap().span().byte_range();
634
635                        Diagnostic::error()
636                            .with_message("Unsupported literal type")
637                            .with_labels(vec![Label::primary(file_id, range)
638                                .with_message("This literal type is not supported")])
639                            .with_notes(vec![
640                                "If %tokentype is `char`, only `char` or `&str` are supported"
641                                    .to_string(),
642                                "If %tokentype is `u8`, only `u8` or `&[u8]` are supported"
643                                    .to_string(),
644                            ])
645                    }
646                    ParseError::InvalidLiteralRange(first, last) => {
647                        let first_range = first.span().byte_range();
648                        let last_range = last.span().byte_range();
649                        let range = first_range.start..last_range.end;
650
651                        Diagnostic::error()
652                            .with_message("Invalid literal range")
653                            .with_labels(vec![
654                                Label::primary(file_id, range).with_message("Invalid range here"),
655                            ])
656                            .with_notes(vec![
657                                "First terminal symbol has to be less than or equal to the last terminal symbol".to_string()
658                            ])
659                    }
660                    ParseError::TokenInLiteralMode(span) => {
661                        let range = span.byte_range();
662                        Diagnostic::error()
663                            .with_message("%token with %tokentype `char` or `u8` is not supported")
664                            .with_labels(vec![Label::primary(file_id, range)
665                                .with_message("use the literal value directly")])
666                    }
667                    ParseError::MultiplePrecedenceOrderDefinition { cur, old } => {
668                        Diagnostic::error()
669                            .with_message("Multiple operator precedence defined")
670                            .with_labels(vec![
671                                Label::primary(file_id, cur.span().byte_range())
672                                    .with_message("defined here"),
673                                Label::secondary(file_id, old.byte_range())
674                                    .with_message("first defined here"),
675                            ])
676                            .with_notes(vec!["%prec name must be unique".to_string()])
677                    }
678                    ParseError::PrecedenceNotDefined(ident) => {
679                        let range = ident.span().byte_range();
680                        Diagnostic::error()
681                            .with_message("Precedence is not defined for this token")
682                            .with_labels(vec![
683                                Label::primary(file_id, range).with_message("token used here")
684                            ])
685                            .with_notes(vec![
686                                "use %left, %right, or %precedence to define precedence"
687                                    .to_string(),
688                                "refer to https://github.com/ehwan/RustyLR/blob/main/SYNTAX.md#operator-precedence".to_string()
689                            ])
690                    }
691                    ParseError::NonTerminalPrecedenceNotDefined(span, _) => {
692                        let range = span.byte_range();
693                        Diagnostic::error()
694                            .with_message("Precedence is not defined for this non-terminal")
695                            .with_labels(vec![Label::primary(file_id, range)
696                                .with_message(format!("non-terminal used here"))])
697                            .with_notes(vec![
698                                "Every production rule of this non-terminal must have a precedence defined"
699                                    .to_string(),
700                                "use %left, %right, or %precedence to define precedence"
701                                    .to_string(),
702                            ])
703                    }
704                    ParseError::RuleTypeDefinedButActionNotDefined { name, span } => {
705                        // `name` must not be generated rule,
706                        // since it is programmically generated, it must have a proper reduce action
707                        let span = span.0.byte_range().start..span.1.byte_range().end;
708                        Diagnostic::error()
709                            .with_message("Reduce action not defined")
710                            .with_labels(vec![
711                                Label::secondary(file_id, name.span().byte_range())
712                                    .with_message("This rule has a type definition"),
713                                Label::primary(file_id, span)
714                                    .with_message("This rule line has no reduce action"),
715                            ])
716                            .with_notes(vec!["".to_string()])
717                    }
718
719                    ParseError::OnlyTerminalSet(span_begin, span_end) => {
720                        let range = span_begin.byte_range().start..span_end.byte_range().end;
721                        Diagnostic::error()
722                            .with_message("Only terminal or terminal set is allowed")
723                            .with_labels(vec![Label::primary(file_id, range)
724                                .with_message("This pattern is not terminal")])
725                            .with_notes(vec!["".to_string()])
726                    }
727                    ParseError::NonTerminalNotDefined(ident) => {
728                        let range = ident.span().byte_range();
729                        Diagnostic::error()
730                            .with_message("Non-terminal not defined")
731                            .with_labels(vec![Label::primary(file_id, range)
732                                .with_message("This non-terminal is not defined")])
733                    }
734                    ParseError::OnlyUsizeLiteral(span) => {
735                        let range = span.byte_range();
736                        Diagnostic::error()
737                            .with_message("Only usize literal is allowed for %dprec")
738                            .with_labels(vec![Label::primary(file_id, range)])
739                    }
740
741                    _ => {
742                        let message = e.short_message();
743                        let span = e.span().byte_range();
744                        Diagnostic::error()
745                            .with_message(message)
746                            .with_labels(vec![
747                                Label::primary(file_id, span).with_message("occured here")
748                            ])
749                    }
750                };
751
752                let writer = self.stream();
753                let config = codespan_reporting::term::Config::default();
754                term::emit(&mut writer.lock(), &config, &files, &diag)
755                    .expect("Failed to write to stderr");
756
757                return Err(diag.message);
758            }
759        };
760
761        let mut optimize_diags = Vec::new();
762
763        // diagnostics for optimization
764        if grammar.optimize {
765            use rusty_lr_parser::grammar::OptimizeRemove;
766            let mut optimized = grammar.optimize(25);
767
768            optimized.removed.sort_by_key(|a| match a {
769                OptimizeRemove::TerminalClassRuleMerge(_) => 0,
770                OptimizeRemove::SingleNonTerminalRule(_, _) => 1,
771                OptimizeRemove::NonTermNotUsed(_) => 2,
772                OptimizeRemove::Cycle(_) => 3,
773                OptimizeRemove::NonTermDataNotUsed(_) => 4,
774            });
775
776            if self.note_optimization {
777                // terminals merged into terminal class
778                let mut class_message = Vec::new();
779                for (class_idx, class_def) in grammar.terminal_classes.iter().enumerate() {
780                    let len: usize = class_def
781                        .terminals
782                        .iter()
783                        .map(|term| grammar.terminals[*term].name.count())
784                        .sum();
785                    if len == 1 {
786                        continue;
787                    }
788                    let msg = format!(
789                        "TerminalClass{}: {}",
790                        class_def.multiterm_counter,
791                        grammar.class_pretty_name_list(TerminalSymbol::Term(class_idx), 10)
792                    );
793                    class_message.push(msg);
794                }
795                if !class_message.is_empty() {
796                    let diag = Diagnostic::note()
797                        .with_message("These terminals are merged into terminal class".to_string())
798                        .with_notes(class_message);
799
800                    optimize_diags.push(diag);
801                }
802
803                for o in optimized.removed {
804                    match o {
805                        OptimizeRemove::TerminalClassRuleMerge(rule) => {
806                            let message = "Production Rule deleted";
807                            let (b, e) = rule.span_pair();
808                            let range = b.byte_range().start..e.byte_range().end;
809                            let labels =
810                                vec![Label::primary(file_id, range).with_message("defined here")];
811                            let notes =
812                                vec!["Will be merged into rule using terminal class".to_string()];
813                            let diag = Diagnostic::note()
814                                .with_message(message)
815                                .with_labels(labels)
816                                .with_notes(notes);
817
818                            optimize_diags.push(diag);
819                        }
820                        OptimizeRemove::SingleNonTerminalRule(rule, nonterm_span) => {
821                            let message = "NonTerminal deleted";
822                            let mut labels = Vec::new();
823                            let notes = vec![
824                                "This non-terminal will be replaced by it's unique child rule"
825                                    .to_string(),
826                            ];
827
828                            labels.push(
829                                Label::primary(file_id, nonterm_span.byte_range())
830                                    .with_message("non-terminal defined here"),
831                            );
832
833                            let (b, e) = rule.span_pair();
834                            let rule_range = b.byte_range().start..e.byte_range().end;
835                            labels.push(
836                                Label::secondary(file_id, rule_range)
837                                    .with_message("this rule has only one child rule"),
838                            );
839                            let diag = Diagnostic::note()
840                                .with_message(message)
841                                .with_labels(labels)
842                                .with_notes(notes);
843
844                            optimize_diags.push(diag);
845                        }
846                        OptimizeRemove::NonTermNotUsed(span) => {
847                            let message = "NonTerminal deleted";
848                            let mut labels = Vec::new();
849                            let notes =
850                                vec!["This non-terminal cannot be reached from initial state"
851                                    .to_string()];
852
853                            labels.push(
854                                Label::primary(file_id, span.byte_range())
855                                    .with_message("non-terminal defined here"),
856                            );
857
858                            let diag = Diagnostic::warning()
859                                .with_message(message)
860                                .with_labels(labels)
861                                .with_notes(notes);
862
863                            optimize_diags.push(diag);
864                        }
865                        OptimizeRemove::Cycle(span) => {
866                            let message = "Cycle detected";
867                            let mut labels = Vec::new();
868                            let notes =
869                                vec!["This non-terminal is involved in bad cycle".to_string()];
870
871                            labels.push(
872                                Label::primary(file_id, span.byte_range())
873                                    .with_message("non-terminal defined here"),
874                            );
875
876                            let diag = Diagnostic::warning()
877                                .with_message(message)
878                                .with_labels(labels)
879                                .with_notes(notes);
880
881                            optimize_diags.push(diag);
882                        }
883                        OptimizeRemove::NonTermDataNotUsed(nonterm_idx) => {
884                            let nonterm = &grammar.nonterminals[nonterm_idx];
885                            let message = "NonTerminal data type not used";
886                            let mut labels = Vec::new();
887                            let notes = vec![
888                                "This non-terminal's data type is not used in any reduce action"
889                                    .to_string(),
890                                "Consider removing data type to optimize memory usage".to_string(),
891                            ];
892
893                            labels.push(
894                                Label::primary(file_id, nonterm.name.span().byte_range())
895                                    .with_message("non-terminal defined here"),
896                            );
897
898                            let diag = Diagnostic::warning()
899                                .with_message(message)
900                                .with_labels(labels)
901                                .with_notes(notes);
902
903                            optimize_diags.push(diag);
904                        }
905                    }
906                }
907
908                // if other terminals were not used, print warning about removing them
909                let other_terminal_class =
910                    &grammar.terminal_classes[grammar.other_terminal_class_id];
911                if !grammar.other_used && other_terminal_class.terminals.len() > 1 {
912                    let class_name =
913                        grammar.class_pretty_name_abbr(grammar.other_terminal_class_id);
914                    let terms = grammar.class_pretty_name_list(
915                        TerminalSymbol::Term(grammar.other_terminal_class_id),
916                        10,
917                    );
918                    let mut notes = Vec::new();
919                    notes.push(format!("{class_name}: {terms}"));
920
921                    let diag = Diagnostic::warning()
922                        .with_message("These terminals are not used in the grammar")
923                        .with_notes(notes);
924
925                    optimize_diags.push(diag);
926                }
927            }
928        }
929
930        grammar.builder = grammar.create_builder();
931        let diags_collector = grammar.build_grammar();
932
933        let mut conflict_diags = Vec::new();
934        let mut conflict_diags_resolved = Vec::new();
935        let nonterm_mapper = |nonterm| grammar.nonterm_pretty_name(nonterm);
936        let class_mapper = |class| grammar.class_pretty_name_list(class, 5);
937
938        // calculate conflicts
939        for (max_priority, reduce_rules, deleted_rules) in diags_collector.reduce_reduce_resolved {
940            let mut labels = Vec::new();
941            for rule in reduce_rules {
942                let priority = grammar.builder.rules[rule].priority;
943                Self::extend_rule_source_label(
944                    &mut labels,
945                    file_id,
946                    rule,
947                    &grammar,
948                    "(Reduce) ",
949                    format!("(Reduce) rule with the highest priority: {priority}").as_str(),
950                );
951            }
952            for del in deleted_rules {
953                let priority = grammar.builder.rules[del].priority;
954                Self::extend_rule_source_label(
955                    &mut labels,
956                    file_id,
957                    del,
958                    &grammar,
959                    "[Removed] (Reduce) ",
960                    format!("[Removed] (Reduce) rule with lower priority: {priority}").as_str(),
961                );
962            }
963
964            let message = "Reduce/Reduce conflict resolved";
965            let notes = vec![
966                format!("Max priority: {max_priority}"),
967                "Set priority for the rule with %dprec".to_string(),
968            ];
969            conflict_diags_resolved.push(
970                Diagnostic::note()
971                    .with_message(message)
972                    .with_labels(labels)
973                    .with_notes(notes),
974            );
975        }
976        for ((term, shift_rules), (shift_prec, reduce_rules)) in
977            diags_collector.shift_reduce_resolved_shift
978        {
979            let mut labels = Vec::new();
980            // shift_prec >= reduce_prec
981
982            for (reduce_rule, reduce_prec) in reduce_rules {
983                let (nonterm_info, local_id) = grammar.get_rule_by_id(reduce_rule).unwrap();
984                let rule_info = &nonterm_info.rules[local_id];
985                if shift_prec > reduce_prec {
986                    Self::extend_rule_source_label(
987                        &mut labels,
988                        file_id,
989                        reduce_rule,
990                        &grammar,
991                        "[Removed] (Reduce) ",
992                        format!("[Removed] (Reduce) lower precedence than shift: {reduce_prec}")
993                            .as_str(),
994                    );
995                } else {
996                    let reduce_type = "%right";
997                    Self::extend_rule_source_label(
998                        &mut labels,
999                        file_id,
1000                        reduce_rule,
1001                        &grammar,
1002                        "[Removed] (Reduce) ",
1003                        format!("[Removed] (Reduce) has {reduce_type} associativity").as_str(),
1004                    );
1005                }
1006                if !nonterm_info.is_auto_generated() {
1007                    let op_range = rule_info.prec.unwrap().1.byte_range();
1008                    labels.push(
1009                        Label::secondary(file_id, op_range)
1010                            .with_message("[Removed] (Reduce) operator for reduce rule"),
1011                    );
1012                }
1013            }
1014            for shift_rule in shift_rules {
1015                Self::extend_rule_source_label(
1016                    &mut labels,
1017                    file_id,
1018                    shift_rule.rule,
1019                    &grammar,
1020                    "(Shift) ",
1021                    format!("(Shift) precedence: {shift_prec}").as_str(),
1022                );
1023            }
1024
1025            let message = format!(
1026                "Shift/Reduce conflict resolved with terminal(class): {}",
1027                grammar.class_pretty_name_list(term, 5)
1028            );
1029            let notes = vec![
1030                        "Operator of production rule is the rightmost terminal symbol with precedence defined".to_string(),
1031                        "Set operator for rule explicitly with %prec".to_string(),
1032                        "Set precedence for operator with %left, %right, or %precedence"
1033                            .to_string(),
1034                    ];
1035            conflict_diags_resolved.push(
1036                Diagnostic::note()
1037                    .with_message(message)
1038                    .with_labels(labels)
1039                    .with_notes(notes),
1040            );
1041        }
1042        for ((term, shift_rules), (shift_prec, reduce_rules)) in
1043            diags_collector.shift_reduce_resolved_reduce
1044        {
1045            let mut labels = Vec::new();
1046
1047            for (reduce_rule, reduce_prec) in reduce_rules {
1048                let (nonterm_info, local_id) = grammar.get_rule_by_id(reduce_rule).unwrap();
1049                let rule_info = &nonterm_info.rules[local_id];
1050                if reduce_prec > shift_prec {
1051                    Self::extend_rule_source_label(
1052                        &mut labels,
1053                        file_id,
1054                        reduce_rule,
1055                        &grammar,
1056                        "(Reduce) ",
1057                        format!("(Reduce) higher precedence than shift: {reduce_prec}").as_str(),
1058                    );
1059                } else {
1060                    let reduce_type = "%left";
1061                    Self::extend_rule_source_label(
1062                        &mut labels,
1063                        file_id,
1064                        reduce_rule,
1065                        &grammar,
1066                        "(Reduce) ",
1067                        format!("(Reduce) has {reduce_type} associativity").as_str(),
1068                    );
1069                }
1070
1071                if !nonterm_info.is_auto_generated() {
1072                    let op_range = rule_info.prec.unwrap().1.byte_range();
1073                    labels.push(
1074                        Label::secondary(file_id, op_range)
1075                            .with_message("(Reduce) operator for reduce rule"),
1076                    );
1077                }
1078            }
1079
1080            for shift_rule in shift_rules {
1081                Self::extend_rule_source_label(
1082                    &mut labels,
1083                    file_id,
1084                    shift_rule.rule,
1085                    &grammar,
1086                    "[Removed] (Shift) ",
1087                    format!("[Removed] (Shift) lower precedence than reduce: {shift_prec}")
1088                        .as_str(),
1089                );
1090            }
1091
1092            let message = format!(
1093                "Shift/Reduce conflict resolved with terminal(class): {}",
1094                grammar.class_pretty_name_list(term, 5)
1095            );
1096            let notes = vec![
1097                        "Operator of production rule is the rightmost terminal symbol with precedence defined".to_string(),
1098                        "Set operator for rule explicitly with %prec".to_string(),
1099                        "Set precedence for operator with %left, %right, or %precedence"
1100                            .to_string(),
1101                    ];
1102            conflict_diags_resolved.push(
1103                Diagnostic::note()
1104                    .with_message(message)
1105                    .with_labels(labels)
1106                    .with_notes(notes),
1107            );
1108        }
1109
1110        for ((term, shift_rules, shift_rules_backtrace), reduce_rules) in
1111            diags_collector.shift_reduce_conflicts
1112        {
1113            let mut labels = Vec::new();
1114            let mut notes = vec![
1115                        "Operator of production rule is the rightmost terminal symbol with precedence defined".to_string(),
1116                        "Set operator for rule explicitly with %prec".to_string(),
1117                        "Set precedence for operator with %left, %right, or %precedence"
1118                            .to_string(),
1119                    ];
1120
1121            if self.note_backtrace {
1122                if self.is_executable {
1123                    notes.push("--no-backtrace to disable backtracing".to_string());
1124                }
1125                notes.push("Backtrace for the shift rule:".to_string());
1126                for shift_rule in shift_rules_backtrace {
1127                    let rule_str = grammar.builder.rules[shift_rule.rule]
1128                        .rule
1129                        .clone()
1130                        .map(class_mapper, nonterm_mapper)
1131                        .into_shifted(shift_rule.shifted);
1132                    notes.push(format!("\t>>> {rule_str}"));
1133                }
1134            }
1135            for shift_rule in shift_rules {
1136                Self::extend_rule_source_label(
1137                    &mut labels,
1138                    file_id,
1139                    shift_rule.rule,
1140                    &grammar,
1141                    "(Shift) ",
1142                    "(Shift) ",
1143                );
1144            }
1145            for (reduce_rule, reduce_rule_backtrace) in reduce_rules {
1146                Self::extend_rule_source_label(
1147                    &mut labels,
1148                    file_id,
1149                    reduce_rule,
1150                    &grammar,
1151                    "(Reduce) ",
1152                    "(Reduce) ",
1153                );
1154
1155                if self.note_backtrace {
1156                    let name = nonterm_mapper(grammar.builder.rules[reduce_rule].rule.name);
1157
1158                    notes.push(format!("Backtrace for the reduce rule ({name}):"));
1159                    notes.extend(reduce_rule_backtrace.into_iter().map(|shifted_rule| {
1160                        let rule_str = grammar.builder.rules[shifted_rule.rule]
1161                            .rule
1162                            .clone()
1163                            .map(class_mapper, nonterm_mapper)
1164                            .into_shifted(shifted_rule.shifted);
1165
1166                        format!("\t>>> {rule_str}")
1167                    }));
1168                }
1169            }
1170
1171            let message = format!(
1172                "Shift/Reduce conflict detected with terminal(class): {}",
1173                grammar.class_pretty_name_list(term, 5)
1174            );
1175
1176            conflict_diags.push(
1177                Diagnostic::error()
1178                    .with_message(message)
1179                    .with_labels(labels)
1180                    .with_notes(notes),
1181            );
1182        }
1183        for (reduce_rules, reduce_terms) in diags_collector.reduce_reduce_conflicts {
1184            let mut labels = Vec::new();
1185
1186            let mut notes = vec!["Set priority for the rule with %dprec".to_string()];
1187            for (reduce_rule, reduce_rule_from) in reduce_rules {
1188                Self::extend_rule_source_label(
1189                    &mut labels,
1190                    file_id,
1191                    reduce_rule,
1192                    &grammar,
1193                    "(Reduce) ",
1194                    "(Reduce) ",
1195                );
1196
1197                if self.note_backtrace {
1198                    if self.is_executable {
1199                        notes.push("--no-backtrace to disable backtracing".to_string());
1200                    }
1201                    let name = nonterm_mapper(grammar.builder.rules[reduce_rule].rule.name);
1202
1203                    notes.push(format!("Backtrace for the reduce rule ({name}):"));
1204                    notes.extend(reduce_rule_from.into_iter().map(|shifted_rule| {
1205                        let rule_str = grammar.builder.rules[shifted_rule.rule]
1206                            .rule
1207                            .clone()
1208                            .map(class_mapper, nonterm_mapper)
1209                            .into_shifted(shifted_rule.shifted);
1210
1211                        format!("\t>>> {rule_str}")
1212                    }));
1213                }
1214            }
1215
1216            let message = format!(
1217                "Reduce/Reduce conflict detected with terminals: {}",
1218                reduce_terms
1219                    .into_iter()
1220                    .map(class_mapper)
1221                    .collect::<Vec<_>>()
1222                    .join(", ")
1223            );
1224
1225            conflict_diags.push(
1226                Diagnostic::error()
1227                    .with_message(message)
1228                    .with_labels(labels)
1229                    .with_notes(notes),
1230            );
1231        }
1232
1233        // print note about shift/reduce conflict resolved with `%left` or `%right`
1234        if self.note_conflicts_resolving {
1235            for diag in conflict_diags_resolved.into_iter() {
1236                let writer = self.stream();
1237                let config = codespan_reporting::term::Config::default();
1238                term::emit(&mut writer.lock(), &config, &files, &diag)
1239                    .expect("Failed to write to verbose stream");
1240            }
1241        }
1242
1243        if !grammar.glr {
1244            let has_diags = !conflict_diags.is_empty();
1245            for diag in conflict_diags.into_iter() {
1246                let writer = self.stream();
1247                let config = codespan_reporting::term::Config::default();
1248                term::emit(&mut writer.lock(), &config, &files, &diag)
1249                    .expect("Failed to write to stderr");
1250            }
1251            if has_diags {
1252                return Err("Grammar building failed".to_string());
1253            }
1254        }
1255        // print note about reduce/reduce conflict and shift/reduce conflict not resolved
1256        else if self.note_conflicts {
1257            for diag in conflict_diags.into_iter() {
1258                let diag = Diagnostic::help()
1259                    .with_message(diag.message)
1260                    .with_labels(diag.labels)
1261                    .with_notes(diag.notes);
1262                let writer = self.stream();
1263                let config = codespan_reporting::term::Config::default();
1264                term::emit(&mut writer.lock(), &config, &files, &diag)
1265                    .expect("Failed to write to stderr");
1266            }
1267        }
1268
1269        for diag in optimize_diags.into_iter() {
1270            let writer = self.stream();
1271            let config = codespan_reporting::term::Config::default();
1272            term::emit(&mut writer.lock(), &config, &files, &diag)
1273                .expect("Failed to write to verbose stream");
1274        }
1275
1276        // expand macro
1277        let expanded_stream = grammar.emit_compiletime();
1278
1279        let num_classes = grammar.terminal_classes.len();
1280        let num_states = grammar.states.len();
1281
1282        // this comments will be printed to the output file
1283        // build again here whether it was built before
1284        // since many informations are removed in the rusty_lr_parser output
1285        let rules_comments = grammar
1286            .builder
1287            .rules
1288            .iter()
1289            .enumerate()
1290            .map(|(rule_id, rule)| {
1291                format!(
1292                    "{rule_id}: {}",
1293                    rule.rule.clone().map(class_mapper, nonterm_mapper)
1294                )
1295            })
1296            .collect::<Vec<_>>()
1297            .join("\n");
1298        let debug_comments = format!(
1299            "{:=^80}\n
1300# of terminal classes: {num_classes}\n# of states: {num_states}\n
1301{rules_comments}\n",
1302            "Grammar"
1303        );
1304
1305        println!("# of terminal classes: {num_classes}");
1306        println!("# of states: {num_states}");
1307
1308        Ok(output::Output {
1309            user_stream: output_stream,
1310            generated_stream: expanded_stream,
1311            debug_comments,
1312            grammar,
1313        })
1314    }
1315}
1316
1317impl Default for Builder {
1318    fn default() -> Self {
1319        Self::new()
1320    }
1321}
rusty_lr_buildscript/lib.rs

rusty_lr_buildscript/
lib.rs