1use pest::Parser;
12
13pub mod ast;
14
15mod generated {
16 use pest_derive::Parser;
17
18 #[derive(Parser)]
20 #[grammar = "parser/cmake.pest"]
21 pub(super) struct CmakeParser;
22}
23
24use generated::{CmakeParser, Rule};
25
26use crate::error::{Error, Result};
27use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
28
29pub fn parse(source: &str) -> Result<File> {
34 let mut pairs = CmakeParser::parse(Rule::file, source).map_err(|e| {
35 Error::Parse(crate::error::ParseError {
36 display_name: "<source>".to_owned(),
37 source_text: source.to_owned().into_boxed_str(),
38 start_line: 1,
39 diagnostic: crate::error::ParseDiagnostic::from_pest(&e),
40 })
41 })?;
42 let file_pair = pairs
43 .next()
44 .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
45
46 build_file(file_pair)
47}
48
49fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
50 debug_assert_eq!(pair.as_rule(), Rule::file);
51
52 let items = pair.into_inner();
53 let mut statements = Vec::with_capacity(items.size_hint().0);
54 let mut pending_blank_lines = 0usize;
55 let mut line_has_content = false;
56
57 for item in items {
58 collect_file_item(
59 item,
60 &mut statements,
61 &mut pending_blank_lines,
62 &mut line_has_content,
63 )?;
64 }
65
66 flush_blank_lines(&mut statements, &mut pending_blank_lines);
67 Ok(File { statements })
68}
69
70fn collect_file_item(
71 item: pest::iterators::Pair<'_, Rule>,
72 statements: &mut Vec<Statement>,
73 pending_blank_lines: &mut usize,
74 line_has_content: &mut bool,
75) -> Result<()> {
76 match item.as_rule() {
77 Rule::file_item => {
78 for inner in item.into_inner() {
79 collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
80 }
81 Ok(())
82 }
83 Rule::command_invocation => {
84 flush_blank_lines(statements, pending_blank_lines);
85 statements.push(Statement::Command(build_command(item)?));
86 *line_has_content = true;
87 Ok(())
88 }
89 Rule::template_placeholder => {
90 flush_blank_lines(statements, pending_blank_lines);
91 statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
92 *line_has_content = true;
93 Ok(())
94 }
95 Rule::bracket_comment => {
96 let comment = Comment::Bracket(item.as_str().to_owned());
97 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
98 flush_blank_lines(statements, pending_blank_lines);
99 statements.push(Statement::Comment(comment));
100 }
101 *line_has_content = true;
102 Ok(())
103 }
104 Rule::line_comment => {
105 let comment = Comment::Line(item.as_str().to_owned());
106 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
107 flush_blank_lines(statements, pending_blank_lines);
108 statements.push(Statement::Comment(comment));
109 }
110 *line_has_content = true;
111 Ok(())
112 }
113 Rule::newline => {
114 if *line_has_content {
115 *line_has_content = false;
116 } else {
117 *pending_blank_lines += 1;
118 }
119 Ok(())
120 }
121 Rule::space | Rule::EOI => Ok(()),
122 other => Err(Error::Formatter(format!(
123 "unexpected top-level parser rule: {other:?}"
124 ))),
125 }
126}
127
128fn attach_trailing_comment(
129 statements: &mut [Statement],
130 comment: Comment,
131 line_has_content: bool,
132) -> Option<Comment> {
133 if !line_has_content {
134 return Some(comment);
135 }
136
137 match statements.last_mut() {
138 Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
139 command.trailing_comment = Some(comment);
140 None
141 }
142 _ => Some(comment),
143 }
144}
145
146fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
147 if *pending_blank_lines == 0 {
148 return;
149 }
150
151 match statements.last_mut() {
152 Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
153 _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
154 }
155
156 *pending_blank_lines = 0;
157}
158
159fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
160 debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
161
162 let span = pair.as_span();
163 let mut name = None;
164 let mut arguments = Vec::new();
165
166 for inner in pair.into_inner() {
167 match inner.as_rule() {
168 Rule::identifier => {
169 name = Some(inner.as_str().to_owned());
170 }
171 Rule::arguments => {
172 arguments = build_arguments(inner)?;
173 }
174 Rule::space => {}
175 other => {
176 return Err(Error::Formatter(format!(
177 "unexpected command parser rule: {other:?}"
178 )));
179 }
180 }
181 }
182
183 Ok(CommandInvocation {
184 name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
185 arguments,
186 trailing_comment: None,
187 span: (span.start(), span.end()),
188 })
189}
190
191fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
192 debug_assert_eq!(pair.as_rule(), Rule::arguments);
193
194 let inner = pair.into_inner();
195 let mut args = Vec::with_capacity(inner.size_hint().0);
196
197 for p in inner {
198 collect_argument_part(p, &mut args)?;
199 }
200
201 Ok(args)
202}
203
204fn collect_argument_part(
205 pair: pest::iterators::Pair<'_, Rule>,
206 out: &mut Vec<Argument>,
207) -> Result<()> {
208 match pair.as_rule() {
209 Rule::argument_part => {
210 for inner in pair.into_inner() {
211 collect_argument_part(inner, out)?;
212 }
213 Ok(())
214 }
215 Rule::arguments => {
216 for inner in pair.into_inner() {
217 collect_argument_part(inner, out)?;
218 }
219 Ok(())
220 }
221 Rule::argument => {
222 let mut inner = pair.into_inner();
223 let argument = inner
224 .next()
225 .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
226 out.push(build_argument(argument)?);
227 Ok(())
228 }
229 Rule::bracket_comment => {
230 out.push(Argument::InlineComment(Comment::Bracket(
231 pair.as_str().to_owned(),
232 )));
233 Ok(())
234 }
235 Rule::line_ending => {
236 collect_line_ending_comments(pair, out);
237 Ok(())
238 }
239 Rule::space => Ok(()),
240 other => Err(Error::Formatter(format!(
241 "unexpected argument parser rule: {other:?}"
242 ))),
243 }
244}
245
246fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
247 for inner in pair.into_inner() {
248 if inner.as_rule() == Rule::line_comment {
249 out.push(Argument::InlineComment(Comment::Line(
250 inner.as_str().to_owned(),
251 )));
252 }
253 }
254}
255
256fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
257 match pair.as_rule() {
258 Rule::bracket_argument => {
259 let raw = pair.as_str().to_owned();
260 Ok(Argument::Bracket(validate_bracket_argument(raw)?))
261 }
262 Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
263 Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
264 Ok(Argument::Unquoted(pair.as_str().to_owned()))
265 }
266 other => Err(Error::Formatter(format!(
267 "unexpected argument rule: {other:?}"
268 ))),
269 }
270}
271
272fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
274 let open_equals = raw
275 .strip_prefix('[')
276 .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
277 .bytes()
278 .take_while(|&b| b == b'=')
279 .count();
280
281 let close_equals = raw
282 .strip_suffix(']')
283 .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
284 .bytes()
285 .rev()
286 .take_while(|&b| b == b'=')
287 .count();
288
289 if open_equals != close_equals {
290 return Err(Error::Formatter(format!(
291 "invalid bracket argument delimiter: {raw}"
292 )));
293 }
294
295 Ok(BracketArgument {
296 level: open_equals,
297 raw,
298 })
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304
305 fn parse_ok(src: &str) -> File {
306 parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
307 }
308
309 #[test]
310 fn empty_file() {
311 let f = parse_ok("");
312 assert!(f.statements.is_empty());
313 }
314
315 #[test]
316 fn simple_command() {
317 let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
318 assert_eq!(f.statements.len(), 1);
319 let Statement::Command(cmd) = &f.statements[0] else {
320 panic!()
321 };
322 assert_eq!(cmd.name, "cmake_minimum_required");
323 assert_eq!(cmd.arguments.len(), 2);
324 assert!(cmd.trailing_comment.is_none());
325 }
326
327 #[test]
328 fn command_no_args() {
329 let f = parse_ok("some_command()\n");
330 let Statement::Command(cmd) = &f.statements[0] else {
331 panic!()
332 };
333 assert!(cmd.arguments.is_empty());
334 }
335
336 #[test]
337 fn quoted_argument() {
338 let f = parse_ok("message(\"hello world\")\n");
339 let Statement::Command(cmd) = &f.statements[0] else {
340 panic!()
341 };
342 assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
343 }
344
345 #[test]
346 fn bracket_argument_zero_equals() {
347 let f = parse_ok("set(VAR [[hello]])\n");
348 let Statement::Command(cmd) = &f.statements[0] else {
349 panic!()
350 };
351 let Argument::Bracket(b) = &cmd.arguments[1] else {
352 panic!()
353 };
354 assert_eq!(b.level, 0);
355 }
356
357 #[test]
358 fn bracket_argument_one_equals() {
359 let f = parse_ok("set(VAR [=[hello]=])\n");
360 let Statement::Command(cmd) = &f.statements[0] else {
361 panic!()
362 };
363 let Argument::Bracket(b) = &cmd.arguments[1] else {
364 panic!()
365 };
366 assert_eq!(b.level, 1);
367 }
368
369 #[test]
370 fn bracket_argument_two_equals() {
371 let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
372 let Statement::Command(cmd) = &f.statements[0] else {
373 panic!()
374 };
375 let Argument::Bracket(b) = &cmd.arguments[1] else {
376 panic!()
377 };
378 assert_eq!(b.level, 2);
379 }
380
381 #[test]
382 fn invalid_bracket_argument_returns_error() {
383 let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
384 assert!(matches!(err, Error::Formatter(_)));
385 }
386
387 #[test]
388 fn invalid_syntax_returns_parse_error_with_crate_owned_diagnostic() {
389 let err = parse("message(\n").unwrap_err();
390 let Error::Parse(parse_err) = err else {
391 panic!("expected parse error");
392 };
393
394 assert_eq!(parse_err.display_name, "<source>");
395 assert_eq!(parse_err.source_text.as_ref(), "message(\n");
396 assert_eq!(parse_err.start_line, 1);
397 assert!(
398 parse_err.diagnostic.message.contains("expected"),
399 "unexpected parse diagnostic: {:?}",
400 parse_err.diagnostic
401 );
402 assert_eq!(parse_err.diagnostic.line, 2);
403 assert_eq!(parse_err.diagnostic.column, 1);
404 }
405
406 #[test]
407 fn line_comment_standalone() {
408 let f = parse_ok("# this is a comment\n");
409 assert!(matches!(
410 &f.statements[0],
411 Statement::Comment(Comment::Line(_))
412 ));
413 }
414
415 #[test]
416 fn bracket_comment() {
417 let f = parse_ok("#[[ multi\nline ]]\n");
418 assert!(matches!(
419 &f.statements[0],
420 Statement::Comment(Comment::Bracket(_))
421 ));
422 }
423
424 #[test]
425 fn variable_reference_in_unquoted() {
426 let f = parse_ok("message(${MY_VAR})\n");
427 let Statement::Command(cmd) = &f.statements[0] else {
428 panic!()
429 };
430 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
431 }
432
433 #[test]
434 fn env_variable_reference() {
435 let f = parse_ok("message($ENV{PATH})\n");
436 let Statement::Command(cmd) = &f.statements[0] else {
437 panic!()
438 };
439 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
440 }
441
442 #[test]
443 fn generator_expression() {
444 let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
445 let Statement::Command(cmd) = &f.statements[0] else {
446 panic!()
447 };
448 assert_eq!(cmd.arguments.len(), 2);
449 }
450
451 #[test]
452 fn multiline_argument_list() {
453 let src = "target_link_libraries(mylib\n PUBLIC dep1\n PRIVATE dep2\n)\n";
454 let f = parse_ok(src);
455 let Statement::Command(cmd) = &f.statements[0] else {
456 panic!()
457 };
458 assert_eq!(cmd.name, "target_link_libraries");
459 assert_eq!(cmd.arguments.len(), 5); }
461
462 #[test]
463 fn inline_bracket_comment_in_arguments() {
464 let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
465 let f = parse_ok(src);
466 let Statement::Command(cmd) = &f.statements[0] else {
467 panic!()
468 };
469 assert_eq!(cmd.arguments.len(), 3);
470 assert!(matches!(
471 &cmd.arguments[1],
472 Argument::InlineComment(Comment::Bracket(_))
473 ));
474 }
475
476 #[test]
477 fn line_comment_between_arguments() {
478 let src = "target_sources(foo\n PRIVATE a.cc # keep grouping\n b.cc\n)\n";
479 let f = parse_ok(src);
480 let Statement::Command(cmd) = &f.statements[0] else {
481 panic!()
482 };
483 assert!(cmd.arguments.iter().any(Argument::is_comment));
484 }
485
486 #[test]
487 fn trailing_comment_after_command() {
488 let src = "message(STATUS \"hello\") # trailing\n";
489 let f = parse_ok(src);
490 let Statement::Command(cmd) = &f.statements[0] else {
491 panic!()
492 };
493 assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
494 }
495
496 #[test]
497 fn file_without_final_newline() {
498 let f = parse_ok("project(MyProject)");
499 assert_eq!(f.statements.len(), 1);
500 }
501
502 #[test]
503 fn blank_lines_are_preserved() {
504 let f = parse_ok("message(foo)\n\nproject(bar)\n");
505 assert_eq!(f.statements.len(), 3);
506 assert!(matches!(f.statements[1], Statement::BlankLines(1)));
507 }
508
509 #[test]
510 fn leading_blank_lines_are_preserved() {
511 let f = parse_ok("\nmessage(foo)\n");
512 assert!(matches!(f.statements[0], Statement::BlankLines(1)));
513 }
514
515 #[test]
516 fn escape_sequences_in_quoted() {
517 let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
518 assert!(!f.statements.is_empty());
519 }
520
521 #[test]
522 fn escaped_quotes_in_quoted_argument_parse() {
523 let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
524 let Statement::Command(cmd) = &f.statements[0] else {
525 panic!()
526 };
527 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
528 assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
529 }
530
531 #[test]
532 fn multiple_commands() {
533 let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
534 let f = parse_ok(src);
535 assert_eq!(f.statements.len(), 2);
536 }
537
538 #[test]
539 fn nested_variable_reference() {
540 let f = parse_ok("message(${${OUTER}})\n");
541 let Statement::Command(cmd) = &f.statements[0] else {
542 panic!()
543 };
544 assert_eq!(cmd.arguments.len(), 1);
545 }
546
547 #[test]
548 fn underscore_command_name_is_valid() {
549 let f = parse_ok("_my_command(ARG)\n");
550 let Statement::Command(cmd) = &f.statements[0] else {
551 panic!()
552 };
553 assert_eq!(cmd.name, "_my_command");
554 }
555
556 #[test]
557 fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
558 let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
559 let Statement::Command(cmd) = &f.statements[0] else {
560 panic!()
561 };
562 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
563 assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
564 }
565
566 #[test]
567 fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
568 let f = parse_ok(concat!(
569 "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
570 " AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
571 ));
572 let Statement::Command(cmd) = &f.statements[0] else {
573 panic!()
574 };
575 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
576 assert_eq!(
577 args,
578 vec![
579 "NOT",
580 "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
581 ]
582 );
583 }
584
585 #[test]
586 fn source_file_with_utf8_bom_parses() {
587 let f = parse_ok("\u{FEFF}project(MyProject)\n");
588 assert_eq!(f.statements.len(), 1);
589 }
590
591 #[test]
592 fn top_level_template_placeholder_parses() {
593 let f = parse_ok("@PACKAGE_INIT@\n");
594 assert_eq!(
595 f.statements,
596 vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
597 );
598 }
599
600 #[test]
601 fn legacy_unquoted_argument_with_embedded_quotes_parses() {
602 let f = parse_ok("set(x -Da=\"b c\")\n");
603 let Statement::Command(cmd) = &f.statements[0] else {
604 panic!()
605 };
606 assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
607 }
608
609 #[test]
610 fn legacy_unquoted_argument_with_make_style_reference_parses() {
611 let f = parse_ok("set(x -Da=$(v))\n");
612 let Statement::Command(cmd) = &f.statements[0] else {
613 panic!()
614 };
615 assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
616 }
617
618 #[test]
619 fn legacy_unquoted_argument_with_embedded_parens_parses() {
620 let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ ]+"(.+)"")"##);
621 let Statement::Command(cmd) = &f.statements[0] else {
622 panic!()
623 };
624 assert_eq!(
625 cmd.arguments[1].as_str(),
626 "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
627 );
628 }
629
630 #[test]
631 fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
632 let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
633 let Statement::Command(cmd) = &f.statements[0] else {
634 panic!()
635 };
636 assert_eq!(
637 cmd.arguments[2].as_str(),
638 "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
639 );
640 }
641}