Skip to main content

lalrpop/build/
mod.rs

1//! Utilities for running in a build script.
2
3use crate::file_text::FileText;
4use crate::grammar::parse_tree as pt;
5use crate::grammar::repr as r;
6use crate::lexer::intern_token;
7use crate::lr1;
8use crate::message::builder::InlineBuilder;
9use crate::message::{Content, Message};
10use crate::normalize;
11use crate::parser;
12use crate::rust::RustWrite;
13use crate::session::{ColorConfig, Session};
14use crate::tls::Tls;
15use crate::tok;
16use crate::util::Sep;
17use itertools::Itertools;
18use sha3::{Digest, Sha3_256};
19use walkdir::WalkDir;
20
21use std::ffi::OsStr;
22use std::fs;
23use std::io::{self, BufRead, IsTerminal, Read, Write};
24use std::path::{Path, PathBuf};
25use std::rc::Rc;
26
27mod action;
28mod fake_term;
29
30use self::fake_term::FakeTerminal;
31
32const LALRPOP_VERSION_HEADER: &str = concat!(
33    "// auto-generated: \"",
34    env!("CARGO_PKG_NAME"),
35    " ",
36    env!("CARGO_PKG_VERSION"),
37    "\""
38);
39
40fn hash_file(file: &Path) -> io::Result<String> {
41    let mut file = fs::File::open(file)?;
42    let mut file_bytes = Vec::new();
43    file.read_to_end(&mut file_bytes).unwrap();
44
45    let mut sha3 = Sha3_256::new();
46    sha3.update(&file_bytes);
47
48    let output = sha3.finalize();
49
50    Ok(format!("// sha3: {:02x}", output.iter().format("")))
51}
52
53pub fn process_dir<P: AsRef<Path>>(session: Rc<Session>, root_dir: P) -> io::Result<()> {
54    let lalrpop_files = lalrpop_files(root_dir)?;
55    for lalrpop_file in lalrpop_files {
56        process_file(session.clone(), lalrpop_file)?;
57    }
58    Ok(())
59}
60
61pub fn process_file<P: AsRef<Path>>(session: Rc<Session>, lalrpop_file: P) -> io::Result<()> {
62    let lalrpop_file = lalrpop_file.as_ref();
63    let rs_file = resolve_rs_file(&session, lalrpop_file)?;
64    let report_file = resolve_report_file(&session, lalrpop_file)?;
65    process_file_into(session, lalrpop_file, &rs_file, &report_file)
66}
67
68fn resolve_rs_file(session: &Session, lalrpop_file: &Path) -> io::Result<PathBuf> {
69    gen_resolve_file(session, lalrpop_file, "rs")
70}
71
72fn resolve_report_file(session: &Session, lalrpop_file: &Path) -> io::Result<PathBuf> {
73    gen_resolve_file(session, lalrpop_file, "report")
74}
75
76fn gen_resolve_file(session: &Session, lalrpop_file: &Path, ext: &str) -> io::Result<PathBuf> {
77    let out_dir = if let Some(ref d) = session.out_dir {
78        // If there is an out_directory, we still expect it to mirror the
79        // directory structure of where we found the lalrpop file relative to
80        // the starting point.
81        if let Some(p) = lalrpop_file
82            .parent()
83            .and_then(|p| {
84                // We need to strip the in_dir from the path, if it exists.
85                session.in_dir.as_ref().map(|in_dir| {
86                    // If this file was from the in_directory, then it was necessarily a prefix of the path?
87                    let path_from_in = p.strip_prefix(in_dir).ok().unwrap();
88
89                    // Strip the src directory if we can?
90                    // Is this only for maintaining the old behavior of starting
91                    // from the root instead of starting in source?
92                    path_from_in.strip_prefix("src").unwrap_or(path_from_in)
93                })
94            })
95            .filter(|p| p != &Path::new(""))
96        {
97            // There is some additional path structure, so add it
98            d.join(p)
99        } else {
100            d.to_path_buf()
101        }
102    } else {
103        lalrpop_file
104            .parent()
105            .unwrap_or_else(|| Path::new("."))
106            .to_path_buf()
107    };
108
109    // Ideally we do something like syn::parse_str::<syn::Ident>(lalrpop_file.file_name())?;
110    // But I don't think we want a full blown syn dependency unless fully converting to proc macros.
111    if lalrpop_file
112        .file_name()
113        .ok_or(io::Error::new(
114            io::ErrorKind::InvalidInput,
115            format!(
116                "LALRPOP could not extract a valid file name: {}",
117                lalrpop_file.display()
118            ),
119        ))?
120        .to_str()
121        .ok_or(io::Error::new(
122            io::ErrorKind::InvalidInput,
123            format!(
124                "LALRPOP file names must be valid UTF-8: {}",
125                lalrpop_file.display()
126            ),
127        ))?
128        .contains(char::is_whitespace)
129    {
130        return Err(io::Error::new(
131            io::ErrorKind::InvalidInput,
132            format!(
133                "LALRPOP file names cannot contain whitespace: {}",
134                lalrpop_file.display()
135            ),
136        ));
137    }
138
139    Ok(out_dir
140        .join(
141            lalrpop_file
142                .file_name()
143                .ok_or_else(|| io::Error::from(io::ErrorKind::InvalidInput))?,
144        )
145        .with_extension(ext))
146}
147
148fn process_file_into(
149    session: Rc<Session>,
150    lalrpop_file: &Path,
151    rs_file: &Path,
152    report_file: &Path,
153) -> io::Result<()> {
154    session.emit_rerun_directive(lalrpop_file);
155    if session.force_build || needs_rebuild(lalrpop_file, rs_file)? {
156        log!(
157            session,
158            Informative,
159            "processing file `{}`",
160            lalrpop_file.to_string_lossy()
161        );
162
163        // Load the LALRPOP source text for this file:
164        let file_text = Rc::new(FileText::from_path(lalrpop_file.to_path_buf())?);
165
166        if let Some(parent) = rs_file.parent() {
167            fs::create_dir_all(parent)?;
168        }
169        remove_old_file(rs_file)?;
170
171        // Store the session and file-text in TLS -- this is not
172        // intended to be used in this high-level code, but it gives
173        // easy access to this information pervasively in the
174        // low-level LR(1) and grammar normalization code. This is
175        // particularly useful for error-reporting.
176        let _tls = Tls::install(session.clone(), file_text.clone());
177
178        // Do the LALRPOP processing itself and write the resulting
179        // buffer into a file. We use a buffer so that if LR(1)
180        // generation fails at some point, we don't leave a partial
181        // file behind.
182        {
183            let grammar = parse_and_normalize_grammar(&session, &file_text)?;
184            let buffer = emit_recursive_ascent(&session, &grammar, report_file)?;
185            let mut output_file = fs::File::create(rs_file)?;
186            writeln!(output_file, "{LALRPOP_VERSION_HEADER}")?;
187            writeln!(output_file, "{}", hash_file(lalrpop_file)?)?;
188            output_file.write_all(&buffer)?;
189        }
190    }
191    Ok(())
192}
193
194fn remove_old_file(rs_file: &Path) -> io::Result<()> {
195    match fs::remove_file(rs_file) {
196        Ok(()) => Ok(()),
197        Err(e) => {
198            // Unix reports NotFound, Windows PermissionDenied!
199            match e.kind() {
200                io::ErrorKind::NotFound | io::ErrorKind::PermissionDenied => Ok(()),
201                _ => Err(e),
202            }
203        }
204    }
205}
206
207fn needs_rebuild(lalrpop_file: &Path, rs_file: &Path) -> io::Result<bool> {
208    match fs::File::open(rs_file) {
209        Ok(rs_file) => {
210            let mut version_str = String::new();
211            let mut hash_str = String::new();
212
213            let mut f = io::BufReader::new(rs_file);
214
215            f.read_line(&mut version_str)?;
216            f.read_line(&mut hash_str)?;
217
218            Ok(hash_str.trim() != hash_file(lalrpop_file)?
219                || version_str.trim() != LALRPOP_VERSION_HEADER)
220        }
221        Err(e) => match e.kind() {
222            io::ErrorKind::NotFound => Ok(true),
223            _ => Err(e),
224        },
225    }
226}
227
228/// Handles a [walkdir::Error] if the root cause is a dangling symlink.
229///
230/// Returns `Ok` if the error could be handled, otherwise returns `Err(err)`.
231fn handle_dangling_symlink_error(err: walkdir::Error) -> Result<(), walkdir::Error> {
232    let is_not_found = err.io_error().map(|io_err| io_err.kind()) == Some(io::ErrorKind::NotFound);
233    if !is_not_found {
234        return Err(err);
235    }
236
237    // As of now on Linux, this is the path of the symlink (not where it points to) in case of a
238    // dangling symlink:
239    let path = match err.path() {
240        Some(path) => path,
241        None => {
242            return Err(err);
243        }
244    };
245
246    if !path.is_symlink() {
247        return Err(err);
248    }
249
250    eprintln!(
251        "Warning: ignoring dangling/erroneous symlink {}",
252        path.display()
253    );
254    Ok(())
255}
256
257fn lalrpop_files<P: AsRef<Path>>(root_dir: P) -> io::Result<Vec<PathBuf>> {
258    let mut result = vec![];
259
260    let walkdir = WalkDir::new(root_dir)
261        .follow_links(true)
262        // Use deterministic ordering:
263        .sort_by_file_name();
264    for entry in walkdir {
265        let entry = match entry {
266            Ok(entry) => entry,
267            Err(err) => {
268                handle_dangling_symlink_error(err)?;
269                continue;
270            }
271        };
272
273        // `file_type` follows symlinks, so if `entry` points to a symlink to a file, then
274        // `is_file` returns true.
275        if !entry.file_type().is_file() {
276            continue;
277        }
278
279        let path = entry.path();
280        if path.extension() != Some(OsStr::new("lalrpop")) {
281            continue;
282        }
283
284        result.push(PathBuf::from(path));
285    }
286
287    Ok(result)
288}
289
290fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::Result<r::Grammar> {
291    let grammar = parser::parse_grammar(file_text.text())
292        .map_err(|error| report_parse_error(file_text, error, report_error))?;
293
294    match normalize::normalize(session, grammar) {
295        Ok(grammar) => Ok(grammar),
296        Err(error) => Err(report_error(file_text, error.span, &error.message))?,
297    }
298}
299
300/// Reports a parse error via a custom reporter.
301///
302/// Maps [`parser::ParseError`] to a custom error type `E`. The user of this function
303/// can then handle the error as they see fit by passing a callback that constructs `E`
304/// from the error message, source file text, and span.
305pub fn report_parse_error<E>(
306    file_text: &FileText,
307    error: parser::ParseError<'_>,
308    mut reporter: impl FnMut(&FileText, pt::Span, &str) -> E,
309) -> E {
310    match error {
311        parser::ParseError::InvalidToken { location } => {
312            let ch = file_text.text()[location..].chars().next().unwrap();
313            reporter(
314                file_text,
315                pt::Span(location, location),
316                &format!("invalid character `{ch}`"),
317            )
318        }
319
320        parser::ParseError::UnrecognizedEof { location, .. } => reporter(
321            file_text,
322            pt::Span(location, location),
323            "unexpected end of file",
324        ),
325
326        parser::ParseError::UnrecognizedToken {
327            token: (lo, _, hi),
328            expected,
329        } => {
330            let _ = expected; // didn't implement this yet :)
331            let text = &file_text.text()[lo..hi];
332            reporter(
333                file_text,
334                pt::Span(lo, hi),
335                &format!("unexpected token: `{text}`"),
336            )
337        }
338
339        parser::ParseError::ExtraToken { token: (lo, _, hi) } => {
340            let text = &file_text.text()[lo..hi];
341            reporter(
342                file_text,
343                pt::Span(lo, hi),
344                &format!("extra token at end of input: `{text}`"),
345            )
346        }
347
348        parser::ParseError::User { error } => {
349            let string = match error.code {
350                tok::ErrorCode::UnrecognizedToken => "unrecognized token",
351                tok::ErrorCode::UnterminatedEscape => "unterminated escape; missing '`'?",
352                tok::ErrorCode::UnterminatedAsciiEscape => {
353                    "unterminated ascii escape; missing second digit?"
354                }
355                tok::ErrorCode::UnrecognizedEscape => {
356                    "unrecognized escape; only \\n, \\r, \\t, \\0, \\\", \\\\, and \\x## are recognized"
357                }
358                tok::ErrorCode::UnterminatedStringLiteral => {
359                    "unterminated string literal; missing `\"`?"
360                }
361                tok::ErrorCode::UnterminatedCharacterLiteral => {
362                    "unterminated character literal; missing `'`?"
363                }
364                tok::ErrorCode::UnterminatedAttribute => "unterminated #! attribute; missing `]`?",
365                tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?",
366                tok::ErrorCode::UnterminatedCode => {
367                    "unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?"
368                }
369                tok::ErrorCode::UnterminatedBlockComment => {
370                    "unterminated block comment; missing `*/`?"
371                }
372            };
373
374            reporter(
375                file_text,
376                pt::Span(error.location, error.location + 1),
377                string,
378            )
379        }
380    }
381}
382
383fn report_error(file_text: &FileText, span: pt::Span, message: &str) -> io::Error {
384    println!("{} error: {}", file_text.span_str(span), message);
385
386    let out = io::stderr();
387    let mut out = out.lock();
388    file_text.highlight(span, &mut out).unwrap();
389
390    io::Error::new(io::ErrorKind::InvalidData, message)
391}
392
393fn report_message(message: Message) -> term::Result<()> {
394    let content = InlineBuilder::new().push(Box::new(message)).end();
395    report_content(&*content)?;
396    println!();
397    Ok(())
398}
399
400fn report_content(content: &dyn Content) -> term::Result<()> {
401    // FIXME -- can we query the size of the terminal somehow?
402    let canvas = content.emit_to_canvas(80);
403
404    let try_colors = match Tls::session().color_config {
405        ColorConfig::Yes => true,
406        ColorConfig::No => false,
407        ColorConfig::IfTty => io::stdout().is_terminal(),
408    };
409
410    if try_colors {
411        if let Some(mut stdout) = term::stdout() {
412            return canvas.write_to(&mut *stdout);
413        }
414    }
415
416    let stdout = io::stdout();
417    let mut stdout = FakeTerminal::new(stdout.lock());
418    canvas.write_to(&mut stdout)
419}
420
421fn emit_module_attributes<W: Write>(
422    grammar: &r::Grammar,
423    rust: &mut RustWrite<W>,
424) -> io::Result<()> {
425    rust.write_module_attributes(grammar)
426}
427
428fn emit_uses<W: Write>(grammar: &r::Grammar, rust: &mut RustWrite<W>) -> io::Result<()> {
429    rust.write_uses("", grammar)
430}
431
432fn emit_recursive_ascent(
433    session: &Session,
434    grammar: &r::Grammar,
435    report_file: &Path,
436) -> io::Result<Vec<u8>> {
437    let mut rust = RustWrite::new(vec![]);
438
439    // We generate a module structure like this:
440    //
441    // ```
442    // mod <output-file> {
443    //     // For each public symbol:
444    //     pub fn parse_XYZ();
445    //     mod __XYZ { ... }
446    //
447    //     // For each bit of action code:
448    //     <action-code>
449    // }
450    // ```
451    //
452    // Note that the action code goes in the outer module.  This is
453    // intentional because it means that the foo.lalrpop file serves
454    // as a module in the rust hierarchy, so if the action code
455    // includes things like `super::` it will resolve in the natural
456    // way.
457
458    emit_module_attributes(grammar, &mut rust)?;
459    emit_uses(grammar, &mut rust)?;
460
461    if grammar.start_nonterminals.is_empty() {
462        println!("Error: no public symbols declared in grammar");
463        return Err(io::Error::from(io::ErrorKind::InvalidData));
464    }
465
466    // Find a better visibility for some generated items.
467    // This will be the maximum of the visibility of all starting nonterminals.
468    let mut max_start_nt_visibility = pt::Visibility::Priv;
469    for (user_nt, start_nt) in &grammar.start_nonterminals {
470        match (
471            &max_start_nt_visibility,
472            &grammar.nonterminals[start_nt].visibility,
473        ) {
474            (r::Visibility::Pub(None), _) | (_, r::Visibility::Priv) => {}
475            (v1, v2) if v1 == v2 => {}
476            (r::Visibility::Priv, v) => max_start_nt_visibility = v.clone(),
477            _ => max_start_nt_visibility = r::Visibility::Pub(None),
478        };
479        // We generate these, so there should always be exactly 1
480        // production. Otherwise the LR(1) algorithm doesn't know
481        // where to stop!
482        assert_eq!(grammar.productions_for(start_nt).len(), 1);
483
484        log!(
485            session,
486            Verbose,
487            "Building states for public nonterminal `{}`",
488            user_nt
489        );
490
491        let _lr1_tls = lr1::Lr1Tls::install(grammar.terminals.clone());
492
493        let lr1result = lr1::build_states(grammar, start_nt.clone());
494        if session.emit_report {
495            let mut output_report_file = fs::File::create(report_file)?;
496            lr1::generate_report(&mut output_report_file, &lr1result)?;
497        }
498
499        let states = match lr1result {
500            Ok(states) => states,
501            Err(error) => {
502                let _ = lr1::report_error(grammar, &error, report_message);
503                return Err(io::Error::from(io::ErrorKind::InvalidData));
504            }
505        };
506
507        match grammar.algorithm.codegen {
508            r::LrCodeGeneration::RecursiveAscent => lr1::codegen::ascent::compile(
509                grammar,
510                user_nt.clone(),
511                start_nt.clone(),
512                &states,
513                "super",
514                &mut rust,
515            )?,
516            r::LrCodeGeneration::TableDriven => lr1::codegen::parse_table::compile(
517                grammar,
518                user_nt.clone(),
519                start_nt.clone(),
520                &states,
521                "super",
522                &mut rust,
523            )?,
524
525            r::LrCodeGeneration::TestAll => lr1::codegen::test_all::compile(
526                grammar,
527                user_nt.clone(),
528                start_nt.clone(),
529                &states,
530                &mut rust,
531            )?,
532        }
533
534        rust!(rust, "#[allow(unused_imports)]");
535        rust!(
536            rust,
537            "{}use self::{}parse{}::{}Parser;",
538            grammar.nonterminals[user_nt].visibility,
539            grammar.prefix,
540            start_nt,
541            user_nt
542        );
543    }
544
545    if let Some(ref intern_token) = grammar.intern_token {
546        intern_token::compile(grammar, intern_token, &mut rust)?;
547        rust!(
548            rust,
549            "pub(crate) use self::{}lalrpop_util::lexer::Token;",
550            grammar.prefix
551        );
552    }
553
554    action::emit_action_code(grammar, &mut rust)?;
555
556    rust!(rust, "");
557    rust!(rust, "#[allow(clippy::type_complexity, dead_code)]");
558    emit_to_triple_trait(grammar, max_start_nt_visibility, &mut rust)?;
559
560    Ok(rust.into_inner())
561}
562
563fn write_where_clause<W: Write>(
564    where_clauses: &[r::WhereClause],
565    to_triple_where_clauses: &Sep<&Vec<r::WhereClause>>,
566    rust: &mut RustWrite<W>,
567) -> io::Result<()> {
568    if !where_clauses.is_empty() {
569        rust!(rust, "where {}", to_triple_where_clauses);
570    }
571
572    Ok(())
573}
574
575fn emit_to_triple_trait<W: Write>(
576    grammar: &r::Grammar,
577    max_start_nt_visibility: r::Visibility,
578    rust: &mut RustWrite<W>,
579) -> io::Result<()> {
580    #[allow(non_snake_case)]
581    let (L, T, E) = (
582        grammar.types.terminal_loc_type(),
583        grammar.types.terminal_token_type(),
584        grammar.types.error_type(),
585    );
586
587    let parse_error = format!(
588        "{p}lalrpop_util::ParseError<{L}, {T}, {E}>",
589        p = grammar.prefix,
590        L = L,
591        T = T,
592        E = E,
593    );
594
595    let mut user_type_parameters = String::new();
596    for type_parameter in &grammar.type_parameters {
597        user_type_parameters.push_str(&format!("{type_parameter}, "));
598    }
599
600    let where_clauses = &grammar.where_clauses;
601    let to_triple_where_clauses = Sep(",", where_clauses);
602
603    rust!(
604        rust,
605        "{}trait {}ToTriple<{}>",
606        max_start_nt_visibility,
607        grammar.prefix,
608        user_type_parameters,
609    );
610    write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
611    rust!(rust, "{{");
612    rust!(
613        rust,
614        "fn to_triple(self) -> Result<({L},{T},{L}), {parse_error}>;",
615        L = L,
616        T = T,
617        parse_error = parse_error,
618    );
619    rust!(rust, "}}");
620
621    rust!(rust, "");
622    if grammar.types.opt_terminal_loc_type().is_some() {
623        rust!(
624            rust,
625            "impl<{utp}> {p}ToTriple<{utp}> for ({L}, {T}, {L})",
626            p = grammar.prefix,
627            utp = user_type_parameters,
628            L = L,
629            T = T,
630        );
631        write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
632        rust!(rust, "{{");
633        rust!(
634            rust,
635            "fn to_triple(self) -> Result<({L},{T},{L}), {parse_error}> {{",
636            L = L,
637            T = T,
638            parse_error = parse_error,
639        );
640        rust!(rust, "Ok(self)");
641        rust!(rust, "}}");
642        rust!(rust, "}}");
643
644        rust!(
645            rust,
646            "impl<{utp}> {p}ToTriple<{utp}> for Result<({L}, {T}, {L}), {E}>",
647            utp = user_type_parameters,
648            p = grammar.prefix,
649            L = L,
650            T = T,
651            E = E,
652        );
653        write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
654        rust!(rust, "{{");
655        rust!(
656            rust,
657            "fn to_triple(self) -> Result<({L},{T},{L}), {parse_error}> {{",
658            L = L,
659            T = T,
660            parse_error = parse_error,
661        );
662        rust!(
663            rust,
664            "self.map_err(|error| {p}lalrpop_util::ParseError::User {{ error }})",
665            p = grammar.prefix
666        );
667        rust!(rust, "}}");
668        rust!(rust, "}}");
669    } else {
670        rust!(
671            rust,
672            "impl<{utp}> {p}ToTriple<{utp}> for {T}",
673            utp = user_type_parameters,
674            p = grammar.prefix,
675            T = T,
676        );
677        write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
678        rust!(rust, "{{");
679        rust!(
680            rust,
681            "fn to_triple(self) -> Result<((),{T},()), {parse_error}> {{",
682            T = T,
683            parse_error = parse_error,
684        );
685        rust!(rust, "Ok(((), self, ()))");
686        rust!(rust, "}}");
687        rust!(rust, "}}");
688
689        rust!(
690            rust,
691            "impl<{utp}> {p}ToTriple<{utp}> for Result<{T},{E}>",
692            utp = user_type_parameters,
693            p = grammar.prefix,
694            T = T,
695            E = E,
696        );
697        write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
698        rust!(rust, "{{");
699        rust!(
700            rust,
701            "fn to_triple(self) -> Result<((),{T},()), {parse_error}> {{",
702            T = T,
703            parse_error = parse_error,
704        );
705        rust!(rust, "match self {{");
706        rust!(rust, "Ok(v) => Ok(((), v, ())),");
707        rust!(
708            rust,
709            "Err(error) => Err({p}lalrpop_util::ParseError::User {{ error }}),",
710            p = grammar.prefix
711        );
712        rust!(rust, "}}"); // match
713        rust!(rust, "}}"); // fn
714        rust!(rust, "}}"); // impl
715    }
716
717    Ok(())
718}