1use crate::file_text::FileText;
4use crate::grammar::parse_tree as pt;
5use crate::grammar::repr as r;
6use crate::lexer::intern_token;
7use crate::lr1;
8use crate::message::builder::InlineBuilder;
9use crate::message::{Content, Message};
10use crate::normalize;
11use crate::parser;
12use crate::rust::RustWrite;
13use crate::session::{ColorConfig, Session};
14use crate::tls::Tls;
15use crate::tok;
16use crate::util::Sep;
17use itertools::Itertools;
18use sha3::{Digest, Sha3_256};
19use walkdir::WalkDir;
20
21use std::ffi::OsStr;
22use std::fs;
23use std::io::{self, BufRead, IsTerminal, Read, Write};
24use std::path::{Path, PathBuf};
25use std::rc::Rc;
26
27mod action;
28mod fake_term;
29
30use self::fake_term::FakeTerminal;
31
32const LALRPOP_VERSION_HEADER: &str = concat!(
33 "// auto-generated: \"",
34 env!("CARGO_PKG_NAME"),
35 " ",
36 env!("CARGO_PKG_VERSION"),
37 "\""
38);
39
40fn hash_file(file: &Path) -> io::Result<String> {
41 let mut file = fs::File::open(file)?;
42 let mut file_bytes = Vec::new();
43 file.read_to_end(&mut file_bytes).unwrap();
44
45 let mut sha3 = Sha3_256::new();
46 sha3.update(&file_bytes);
47
48 let output = sha3.finalize();
49
50 Ok(format!("// sha3: {:02x}", output.iter().format("")))
51}
52
53pub fn process_dir<P: AsRef<Path>>(session: Rc<Session>, root_dir: P) -> io::Result<()> {
54 let lalrpop_files = lalrpop_files(root_dir)?;
55 for lalrpop_file in lalrpop_files {
56 process_file(session.clone(), lalrpop_file)?;
57 }
58 Ok(())
59}
60
61pub fn process_file<P: AsRef<Path>>(session: Rc<Session>, lalrpop_file: P) -> io::Result<()> {
62 let lalrpop_file = lalrpop_file.as_ref();
63 let rs_file = resolve_rs_file(&session, lalrpop_file)?;
64 let report_file = resolve_report_file(&session, lalrpop_file)?;
65 process_file_into(session, lalrpop_file, &rs_file, &report_file)
66}
67
68fn resolve_rs_file(session: &Session, lalrpop_file: &Path) -> io::Result<PathBuf> {
69 gen_resolve_file(session, lalrpop_file, "rs")
70}
71
72fn resolve_report_file(session: &Session, lalrpop_file: &Path) -> io::Result<PathBuf> {
73 gen_resolve_file(session, lalrpop_file, "report")
74}
75
76fn gen_resolve_file(session: &Session, lalrpop_file: &Path, ext: &str) -> io::Result<PathBuf> {
77 let out_dir = if let Some(ref d) = session.out_dir {
78 if let Some(p) = lalrpop_file
82 .parent()
83 .and_then(|p| {
84 session.in_dir.as_ref().map(|in_dir| {
86 let path_from_in = p.strip_prefix(in_dir).ok().unwrap();
88
89 path_from_in.strip_prefix("src").unwrap_or(path_from_in)
93 })
94 })
95 .filter(|p| p != &Path::new(""))
96 {
97 d.join(p)
99 } else {
100 d.to_path_buf()
101 }
102 } else {
103 lalrpop_file
104 .parent()
105 .unwrap_or_else(|| Path::new("."))
106 .to_path_buf()
107 };
108
109 if lalrpop_file
112 .file_name()
113 .ok_or(io::Error::new(
114 io::ErrorKind::InvalidInput,
115 format!(
116 "LALRPOP could not extract a valid file name: {}",
117 lalrpop_file.display()
118 ),
119 ))?
120 .to_str()
121 .ok_or(io::Error::new(
122 io::ErrorKind::InvalidInput,
123 format!(
124 "LALRPOP file names must be valid UTF-8: {}",
125 lalrpop_file.display()
126 ),
127 ))?
128 .contains(char::is_whitespace)
129 {
130 return Err(io::Error::new(
131 io::ErrorKind::InvalidInput,
132 format!(
133 "LALRPOP file names cannot contain whitespace: {}",
134 lalrpop_file.display()
135 ),
136 ));
137 }
138
139 Ok(out_dir
140 .join(
141 lalrpop_file
142 .file_name()
143 .ok_or_else(|| io::Error::from(io::ErrorKind::InvalidInput))?,
144 )
145 .with_extension(ext))
146}
147
148fn process_file_into(
149 session: Rc<Session>,
150 lalrpop_file: &Path,
151 rs_file: &Path,
152 report_file: &Path,
153) -> io::Result<()> {
154 session.emit_rerun_directive(lalrpop_file);
155 if session.force_build || needs_rebuild(lalrpop_file, rs_file)? {
156 log!(
157 session,
158 Informative,
159 "processing file `{}`",
160 lalrpop_file.to_string_lossy()
161 );
162
163 let file_text = Rc::new(FileText::from_path(lalrpop_file.to_path_buf())?);
165
166 if let Some(parent) = rs_file.parent() {
167 fs::create_dir_all(parent)?;
168 }
169 remove_old_file(rs_file)?;
170
171 let _tls = Tls::install(session.clone(), file_text.clone());
177
178 {
183 let grammar = parse_and_normalize_grammar(&session, &file_text)?;
184 let buffer = emit_recursive_ascent(&session, &grammar, report_file)?;
185 let mut output_file = fs::File::create(rs_file)?;
186 writeln!(output_file, "{LALRPOP_VERSION_HEADER}")?;
187 writeln!(output_file, "{}", hash_file(lalrpop_file)?)?;
188 output_file.write_all(&buffer)?;
189 }
190 }
191 Ok(())
192}
193
194fn remove_old_file(rs_file: &Path) -> io::Result<()> {
195 match fs::remove_file(rs_file) {
196 Ok(()) => Ok(()),
197 Err(e) => {
198 match e.kind() {
200 io::ErrorKind::NotFound | io::ErrorKind::PermissionDenied => Ok(()),
201 _ => Err(e),
202 }
203 }
204 }
205}
206
207fn needs_rebuild(lalrpop_file: &Path, rs_file: &Path) -> io::Result<bool> {
208 match fs::File::open(rs_file) {
209 Ok(rs_file) => {
210 let mut version_str = String::new();
211 let mut hash_str = String::new();
212
213 let mut f = io::BufReader::new(rs_file);
214
215 f.read_line(&mut version_str)?;
216 f.read_line(&mut hash_str)?;
217
218 Ok(hash_str.trim() != hash_file(lalrpop_file)?
219 || version_str.trim() != LALRPOP_VERSION_HEADER)
220 }
221 Err(e) => match e.kind() {
222 io::ErrorKind::NotFound => Ok(true),
223 _ => Err(e),
224 },
225 }
226}
227
228fn handle_dangling_symlink_error(err: walkdir::Error) -> Result<(), walkdir::Error> {
232 let is_not_found = err.io_error().map(|io_err| io_err.kind()) == Some(io::ErrorKind::NotFound);
233 if !is_not_found {
234 return Err(err);
235 }
236
237 let path = match err.path() {
240 Some(path) => path,
241 None => {
242 return Err(err);
243 }
244 };
245
246 if !path.is_symlink() {
247 return Err(err);
248 }
249
250 eprintln!(
251 "Warning: ignoring dangling/erroneous symlink {}",
252 path.display()
253 );
254 Ok(())
255}
256
257fn lalrpop_files<P: AsRef<Path>>(root_dir: P) -> io::Result<Vec<PathBuf>> {
258 let mut result = vec![];
259
260 let walkdir = WalkDir::new(root_dir)
261 .follow_links(true)
262 .sort_by_file_name();
264 for entry in walkdir {
265 let entry = match entry {
266 Ok(entry) => entry,
267 Err(err) => {
268 handle_dangling_symlink_error(err)?;
269 continue;
270 }
271 };
272
273 if !entry.file_type().is_file() {
276 continue;
277 }
278
279 let path = entry.path();
280 if path.extension() != Some(OsStr::new("lalrpop")) {
281 continue;
282 }
283
284 result.push(PathBuf::from(path));
285 }
286
287 Ok(result)
288}
289
290fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::Result<r::Grammar> {
291 let grammar = parser::parse_grammar(file_text.text())
292 .map_err(|error| report_parse_error(file_text, error, report_error))?;
293
294 match normalize::normalize(session, grammar) {
295 Ok(grammar) => Ok(grammar),
296 Err(error) => Err(report_error(file_text, error.span, &error.message))?,
297 }
298}
299
300pub fn report_parse_error<E>(
306 file_text: &FileText,
307 error: parser::ParseError<'_>,
308 mut reporter: impl FnMut(&FileText, pt::Span, &str) -> E,
309) -> E {
310 match error {
311 parser::ParseError::InvalidToken { location } => {
312 let ch = file_text.text()[location..].chars().next().unwrap();
313 reporter(
314 file_text,
315 pt::Span(location, location),
316 &format!("invalid character `{ch}`"),
317 )
318 }
319
320 parser::ParseError::UnrecognizedEof { location, .. } => reporter(
321 file_text,
322 pt::Span(location, location),
323 "unexpected end of file",
324 ),
325
326 parser::ParseError::UnrecognizedToken {
327 token: (lo, _, hi),
328 expected,
329 } => {
330 let _ = expected; let text = &file_text.text()[lo..hi];
332 reporter(
333 file_text,
334 pt::Span(lo, hi),
335 &format!("unexpected token: `{text}`"),
336 )
337 }
338
339 parser::ParseError::ExtraToken { token: (lo, _, hi) } => {
340 let text = &file_text.text()[lo..hi];
341 reporter(
342 file_text,
343 pt::Span(lo, hi),
344 &format!("extra token at end of input: `{text}`"),
345 )
346 }
347
348 parser::ParseError::User { error } => {
349 let string = match error.code {
350 tok::ErrorCode::UnrecognizedToken => "unrecognized token",
351 tok::ErrorCode::UnterminatedEscape => "unterminated escape; missing '`'?",
352 tok::ErrorCode::UnterminatedAsciiEscape => {
353 "unterminated ascii escape; missing second digit?"
354 }
355 tok::ErrorCode::UnrecognizedEscape => {
356 "unrecognized escape; only \\n, \\r, \\t, \\0, \\\", \\\\, and \\x## are recognized"
357 }
358 tok::ErrorCode::UnterminatedStringLiteral => {
359 "unterminated string literal; missing `\"`?"
360 }
361 tok::ErrorCode::UnterminatedCharacterLiteral => {
362 "unterminated character literal; missing `'`?"
363 }
364 tok::ErrorCode::UnterminatedAttribute => "unterminated #! attribute; missing `]`?",
365 tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?",
366 tok::ErrorCode::UnterminatedCode => {
367 "unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?"
368 }
369 tok::ErrorCode::UnterminatedBlockComment => {
370 "unterminated block comment; missing `*/`?"
371 }
372 };
373
374 reporter(
375 file_text,
376 pt::Span(error.location, error.location + 1),
377 string,
378 )
379 }
380 }
381}
382
383fn report_error(file_text: &FileText, span: pt::Span, message: &str) -> io::Error {
384 println!("{} error: {}", file_text.span_str(span), message);
385
386 let out = io::stderr();
387 let mut out = out.lock();
388 file_text.highlight(span, &mut out).unwrap();
389
390 io::Error::new(io::ErrorKind::InvalidData, message)
391}
392
393fn report_message(message: Message) -> term::Result<()> {
394 let content = InlineBuilder::new().push(Box::new(message)).end();
395 report_content(&*content)?;
396 println!();
397 Ok(())
398}
399
400fn report_content(content: &dyn Content) -> term::Result<()> {
401 let canvas = content.emit_to_canvas(80);
403
404 let try_colors = match Tls::session().color_config {
405 ColorConfig::Yes => true,
406 ColorConfig::No => false,
407 ColorConfig::IfTty => io::stdout().is_terminal(),
408 };
409
410 if try_colors {
411 if let Some(mut stdout) = term::stdout() {
412 return canvas.write_to(&mut *stdout);
413 }
414 }
415
416 let stdout = io::stdout();
417 let mut stdout = FakeTerminal::new(stdout.lock());
418 canvas.write_to(&mut stdout)
419}
420
421fn emit_module_attributes<W: Write>(
422 grammar: &r::Grammar,
423 rust: &mut RustWrite<W>,
424) -> io::Result<()> {
425 rust.write_module_attributes(grammar)
426}
427
428fn emit_uses<W: Write>(grammar: &r::Grammar, rust: &mut RustWrite<W>) -> io::Result<()> {
429 rust.write_uses("", grammar)
430}
431
432fn emit_recursive_ascent(
433 session: &Session,
434 grammar: &r::Grammar,
435 report_file: &Path,
436) -> io::Result<Vec<u8>> {
437 let mut rust = RustWrite::new(vec![]);
438
439 emit_module_attributes(grammar, &mut rust)?;
459 emit_uses(grammar, &mut rust)?;
460
461 if grammar.start_nonterminals.is_empty() {
462 println!("Error: no public symbols declared in grammar");
463 return Err(io::Error::from(io::ErrorKind::InvalidData));
464 }
465
466 let mut max_start_nt_visibility = pt::Visibility::Priv;
469 for (user_nt, start_nt) in &grammar.start_nonterminals {
470 match (
471 &max_start_nt_visibility,
472 &grammar.nonterminals[start_nt].visibility,
473 ) {
474 (r::Visibility::Pub(None), _) | (_, r::Visibility::Priv) => {}
475 (v1, v2) if v1 == v2 => {}
476 (r::Visibility::Priv, v) => max_start_nt_visibility = v.clone(),
477 _ => max_start_nt_visibility = r::Visibility::Pub(None),
478 };
479 assert_eq!(grammar.productions_for(start_nt).len(), 1);
483
484 log!(
485 session,
486 Verbose,
487 "Building states for public nonterminal `{}`",
488 user_nt
489 );
490
491 let _lr1_tls = lr1::Lr1Tls::install(grammar.terminals.clone());
492
493 let lr1result = lr1::build_states(grammar, start_nt.clone());
494 if session.emit_report {
495 let mut output_report_file = fs::File::create(report_file)?;
496 lr1::generate_report(&mut output_report_file, &lr1result)?;
497 }
498
499 let states = match lr1result {
500 Ok(states) => states,
501 Err(error) => {
502 let _ = lr1::report_error(grammar, &error, report_message);
503 return Err(io::Error::from(io::ErrorKind::InvalidData));
504 }
505 };
506
507 match grammar.algorithm.codegen {
508 r::LrCodeGeneration::RecursiveAscent => lr1::codegen::ascent::compile(
509 grammar,
510 user_nt.clone(),
511 start_nt.clone(),
512 &states,
513 "super",
514 &mut rust,
515 )?,
516 r::LrCodeGeneration::TableDriven => lr1::codegen::parse_table::compile(
517 grammar,
518 user_nt.clone(),
519 start_nt.clone(),
520 &states,
521 "super",
522 &mut rust,
523 )?,
524
525 r::LrCodeGeneration::TestAll => lr1::codegen::test_all::compile(
526 grammar,
527 user_nt.clone(),
528 start_nt.clone(),
529 &states,
530 &mut rust,
531 )?,
532 }
533
534 rust!(rust, "#[allow(unused_imports)]");
535 rust!(
536 rust,
537 "{}use self::{}parse{}::{}Parser;",
538 grammar.nonterminals[user_nt].visibility,
539 grammar.prefix,
540 start_nt,
541 user_nt
542 );
543 }
544
545 if let Some(ref intern_token) = grammar.intern_token {
546 intern_token::compile(grammar, intern_token, &mut rust)?;
547 rust!(
548 rust,
549 "pub(crate) use self::{}lalrpop_util::lexer::Token;",
550 grammar.prefix
551 );
552 }
553
554 action::emit_action_code(grammar, &mut rust)?;
555
556 rust!(rust, "");
557 rust!(rust, "#[allow(clippy::type_complexity, dead_code)]");
558 emit_to_triple_trait(grammar, max_start_nt_visibility, &mut rust)?;
559
560 Ok(rust.into_inner())
561}
562
563fn write_where_clause<W: Write>(
564 where_clauses: &[r::WhereClause],
565 to_triple_where_clauses: &Sep<&Vec<r::WhereClause>>,
566 rust: &mut RustWrite<W>,
567) -> io::Result<()> {
568 if !where_clauses.is_empty() {
569 rust!(rust, "where {}", to_triple_where_clauses);
570 }
571
572 Ok(())
573}
574
575fn emit_to_triple_trait<W: Write>(
576 grammar: &r::Grammar,
577 max_start_nt_visibility: r::Visibility,
578 rust: &mut RustWrite<W>,
579) -> io::Result<()> {
580 #[allow(non_snake_case)]
581 let (L, T, E) = (
582 grammar.types.terminal_loc_type(),
583 grammar.types.terminal_token_type(),
584 grammar.types.error_type(),
585 );
586
587 let parse_error = format!(
588 "{p}lalrpop_util::ParseError<{L}, {T}, {E}>",
589 p = grammar.prefix,
590 L = L,
591 T = T,
592 E = E,
593 );
594
595 let mut user_type_parameters = String::new();
596 for type_parameter in &grammar.type_parameters {
597 user_type_parameters.push_str(&format!("{type_parameter}, "));
598 }
599
600 let where_clauses = &grammar.where_clauses;
601 let to_triple_where_clauses = Sep(",", where_clauses);
602
603 rust!(
604 rust,
605 "{}trait {}ToTriple<{}>",
606 max_start_nt_visibility,
607 grammar.prefix,
608 user_type_parameters,
609 );
610 write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
611 rust!(rust, "{{");
612 rust!(
613 rust,
614 "fn to_triple(self) -> Result<({L},{T},{L}), {parse_error}>;",
615 L = L,
616 T = T,
617 parse_error = parse_error,
618 );
619 rust!(rust, "}}");
620
621 rust!(rust, "");
622 if grammar.types.opt_terminal_loc_type().is_some() {
623 rust!(
624 rust,
625 "impl<{utp}> {p}ToTriple<{utp}> for ({L}, {T}, {L})",
626 p = grammar.prefix,
627 utp = user_type_parameters,
628 L = L,
629 T = T,
630 );
631 write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
632 rust!(rust, "{{");
633 rust!(
634 rust,
635 "fn to_triple(self) -> Result<({L},{T},{L}), {parse_error}> {{",
636 L = L,
637 T = T,
638 parse_error = parse_error,
639 );
640 rust!(rust, "Ok(self)");
641 rust!(rust, "}}");
642 rust!(rust, "}}");
643
644 rust!(
645 rust,
646 "impl<{utp}> {p}ToTriple<{utp}> for Result<({L}, {T}, {L}), {E}>",
647 utp = user_type_parameters,
648 p = grammar.prefix,
649 L = L,
650 T = T,
651 E = E,
652 );
653 write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
654 rust!(rust, "{{");
655 rust!(
656 rust,
657 "fn to_triple(self) -> Result<({L},{T},{L}), {parse_error}> {{",
658 L = L,
659 T = T,
660 parse_error = parse_error,
661 );
662 rust!(
663 rust,
664 "self.map_err(|error| {p}lalrpop_util::ParseError::User {{ error }})",
665 p = grammar.prefix
666 );
667 rust!(rust, "}}");
668 rust!(rust, "}}");
669 } else {
670 rust!(
671 rust,
672 "impl<{utp}> {p}ToTriple<{utp}> for {T}",
673 utp = user_type_parameters,
674 p = grammar.prefix,
675 T = T,
676 );
677 write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
678 rust!(rust, "{{");
679 rust!(
680 rust,
681 "fn to_triple(self) -> Result<((),{T},()), {parse_error}> {{",
682 T = T,
683 parse_error = parse_error,
684 );
685 rust!(rust, "Ok(((), self, ()))");
686 rust!(rust, "}}");
687 rust!(rust, "}}");
688
689 rust!(
690 rust,
691 "impl<{utp}> {p}ToTriple<{utp}> for Result<{T},{E}>",
692 utp = user_type_parameters,
693 p = grammar.prefix,
694 T = T,
695 E = E,
696 );
697 write_where_clause(where_clauses, &to_triple_where_clauses, rust)?;
698 rust!(rust, "{{");
699 rust!(
700 rust,
701 "fn to_triple(self) -> Result<((),{T},()), {parse_error}> {{",
702 T = T,
703 parse_error = parse_error,
704 );
705 rust!(rust, "match self {{");
706 rust!(rust, "Ok(v) => Ok(((), v, ())),");
707 rust!(
708 rust,
709 "Err(error) => Err({p}lalrpop_util::ParseError::User {{ error }}),",
710 p = grammar.prefix
711 );
712 rust!(rust, "}}"); rust!(rust, "}}"); rust!(rust, "}}"); }
716
717 Ok(())
718}