1use pest::Parser;
12use pest_derive::Parser;
13
14pub mod ast;
15
16#[doc(hidden)]
18#[derive(Parser)]
19#[grammar = "parser/cmake.pest"]
20pub struct CmakeParser;
21
22use crate::error::{Error, Result};
23use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
24
25pub fn parse(source: &str) -> Result<File> {
30 let mut pairs =
31 CmakeParser::parse(Rule::file, source).map_err(|e| Error::Parse(Box::new(e)))?;
32 let file_pair = pairs
33 .next()
34 .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
35
36 build_file(file_pair)
37}
38
39fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
40 debug_assert_eq!(pair.as_rule(), Rule::file);
41
42 let items = pair.into_inner();
43 let mut statements = Vec::with_capacity(items.size_hint().0);
44 let mut pending_blank_lines = 0usize;
45 let mut line_has_content = false;
46
47 for item in items {
48 collect_file_item(
49 item,
50 &mut statements,
51 &mut pending_blank_lines,
52 &mut line_has_content,
53 )?;
54 }
55
56 flush_blank_lines(&mut statements, &mut pending_blank_lines);
57 Ok(File { statements })
58}
59
60fn collect_file_item(
61 item: pest::iterators::Pair<'_, Rule>,
62 statements: &mut Vec<Statement>,
63 pending_blank_lines: &mut usize,
64 line_has_content: &mut bool,
65) -> Result<()> {
66 match item.as_rule() {
67 Rule::file_item => {
68 for inner in item.into_inner() {
69 collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
70 }
71 Ok(())
72 }
73 Rule::command_invocation => {
74 flush_blank_lines(statements, pending_blank_lines);
75 statements.push(Statement::Command(build_command(item)?));
76 *line_has_content = true;
77 Ok(())
78 }
79 Rule::template_placeholder => {
80 flush_blank_lines(statements, pending_blank_lines);
81 statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
82 *line_has_content = true;
83 Ok(())
84 }
85 Rule::bracket_comment => {
86 let comment = Comment::Bracket(item.as_str().to_owned());
87 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
88 flush_blank_lines(statements, pending_blank_lines);
89 statements.push(Statement::Comment(comment));
90 }
91 *line_has_content = true;
92 Ok(())
93 }
94 Rule::line_comment => {
95 let comment = Comment::Line(item.as_str().to_owned());
96 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
97 flush_blank_lines(statements, pending_blank_lines);
98 statements.push(Statement::Comment(comment));
99 }
100 *line_has_content = true;
101 Ok(())
102 }
103 Rule::newline => {
104 if *line_has_content {
105 *line_has_content = false;
106 } else {
107 *pending_blank_lines += 1;
108 }
109 Ok(())
110 }
111 Rule::space | Rule::EOI => Ok(()),
112 other => Err(Error::Formatter(format!(
113 "unexpected top-level parser rule: {other:?}"
114 ))),
115 }
116}
117
118fn attach_trailing_comment(
119 statements: &mut [Statement],
120 comment: Comment,
121 line_has_content: bool,
122) -> Option<Comment> {
123 if !line_has_content {
124 return Some(comment);
125 }
126
127 match statements.last_mut() {
128 Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
129 command.trailing_comment = Some(comment);
130 None
131 }
132 _ => Some(comment),
133 }
134}
135
136fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
137 if *pending_blank_lines == 0 {
138 return;
139 }
140
141 match statements.last_mut() {
142 Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
143 _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
144 }
145
146 *pending_blank_lines = 0;
147}
148
149fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
150 debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
151
152 let span = pair.as_span();
153 let mut name = None;
154 let mut arguments = Vec::new();
155
156 for inner in pair.into_inner() {
157 match inner.as_rule() {
158 Rule::identifier => {
159 name = Some(inner.as_str().to_owned());
160 }
161 Rule::arguments => {
162 arguments = build_arguments(inner)?;
163 }
164 Rule::space => {}
165 other => {
166 return Err(Error::Formatter(format!(
167 "unexpected command parser rule: {other:?}"
168 )));
169 }
170 }
171 }
172
173 Ok(CommandInvocation {
174 name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
175 arguments,
176 trailing_comment: None,
177 span: (span.start(), span.end()),
178 })
179}
180
181fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
182 debug_assert_eq!(pair.as_rule(), Rule::arguments);
183
184 let inner = pair.into_inner();
185 let mut args = Vec::with_capacity(inner.size_hint().0);
186
187 for p in inner {
188 collect_argument_part(p, &mut args)?;
189 }
190
191 Ok(args)
192}
193
194fn collect_argument_part(
195 pair: pest::iterators::Pair<'_, Rule>,
196 out: &mut Vec<Argument>,
197) -> Result<()> {
198 match pair.as_rule() {
199 Rule::argument_part => {
200 for inner in pair.into_inner() {
201 collect_argument_part(inner, out)?;
202 }
203 Ok(())
204 }
205 Rule::arguments => {
206 for inner in pair.into_inner() {
207 collect_argument_part(inner, out)?;
208 }
209 Ok(())
210 }
211 Rule::argument => {
212 let mut inner = pair.into_inner();
213 let argument = inner
214 .next()
215 .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
216 out.push(build_argument(argument)?);
217 Ok(())
218 }
219 Rule::bracket_comment => {
220 out.push(Argument::InlineComment(Comment::Bracket(
221 pair.as_str().to_owned(),
222 )));
223 Ok(())
224 }
225 Rule::line_ending => {
226 collect_line_ending_comments(pair, out);
227 Ok(())
228 }
229 Rule::space => Ok(()),
230 other => Err(Error::Formatter(format!(
231 "unexpected argument parser rule: {other:?}"
232 ))),
233 }
234}
235
236fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
237 for inner in pair.into_inner() {
238 if inner.as_rule() == Rule::line_comment {
239 out.push(Argument::InlineComment(Comment::Line(
240 inner.as_str().to_owned(),
241 )));
242 }
243 }
244}
245
246fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
247 match pair.as_rule() {
248 Rule::bracket_argument => {
249 let raw = pair.as_str().to_owned();
250 Ok(Argument::Bracket(validate_bracket_argument(raw)?))
251 }
252 Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
253 Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
254 Ok(Argument::Unquoted(pair.as_str().to_owned()))
255 }
256 other => Err(Error::Formatter(format!(
257 "unexpected argument rule: {other:?}"
258 ))),
259 }
260}
261
262fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
264 let open_equals = raw
265 .strip_prefix('[')
266 .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
267 .bytes()
268 .take_while(|&b| b == b'=')
269 .count();
270
271 let close_equals = raw
272 .strip_suffix(']')
273 .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
274 .bytes()
275 .rev()
276 .take_while(|&b| b == b'=')
277 .count();
278
279 if open_equals != close_equals {
280 return Err(Error::Formatter(format!(
281 "invalid bracket argument delimiter: {raw}"
282 )));
283 }
284
285 Ok(BracketArgument {
286 level: open_equals,
287 raw,
288 })
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294
295 fn parse_ok(src: &str) -> File {
296 parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
297 }
298
299 #[test]
300 fn empty_file() {
301 let f = parse_ok("");
302 assert!(f.statements.is_empty());
303 }
304
305 #[test]
306 fn simple_command() {
307 let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
308 assert_eq!(f.statements.len(), 1);
309 let Statement::Command(cmd) = &f.statements[0] else {
310 panic!()
311 };
312 assert_eq!(cmd.name, "cmake_minimum_required");
313 assert_eq!(cmd.arguments.len(), 2);
314 assert!(cmd.trailing_comment.is_none());
315 }
316
317 #[test]
318 fn command_no_args() {
319 let f = parse_ok("some_command()\n");
320 let Statement::Command(cmd) = &f.statements[0] else {
321 panic!()
322 };
323 assert!(cmd.arguments.is_empty());
324 }
325
326 #[test]
327 fn quoted_argument() {
328 let f = parse_ok("message(\"hello world\")\n");
329 let Statement::Command(cmd) = &f.statements[0] else {
330 panic!()
331 };
332 assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
333 }
334
335 #[test]
336 fn bracket_argument_zero_equals() {
337 let f = parse_ok("set(VAR [[hello]])\n");
338 let Statement::Command(cmd) = &f.statements[0] else {
339 panic!()
340 };
341 let Argument::Bracket(b) = &cmd.arguments[1] else {
342 panic!()
343 };
344 assert_eq!(b.level, 0);
345 }
346
347 #[test]
348 fn bracket_argument_one_equals() {
349 let f = parse_ok("set(VAR [=[hello]=])\n");
350 let Statement::Command(cmd) = &f.statements[0] else {
351 panic!()
352 };
353 let Argument::Bracket(b) = &cmd.arguments[1] else {
354 panic!()
355 };
356 assert_eq!(b.level, 1);
357 }
358
359 #[test]
360 fn bracket_argument_two_equals() {
361 let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
362 let Statement::Command(cmd) = &f.statements[0] else {
363 panic!()
364 };
365 let Argument::Bracket(b) = &cmd.arguments[1] else {
366 panic!()
367 };
368 assert_eq!(b.level, 2);
369 }
370
371 #[test]
372 fn invalid_bracket_argument_returns_error() {
373 let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
374 assert!(matches!(err, Error::Formatter(_)));
375 }
376
377 #[test]
378 fn line_comment_standalone() {
379 let f = parse_ok("# this is a comment\n");
380 assert!(matches!(
381 &f.statements[0],
382 Statement::Comment(Comment::Line(_))
383 ));
384 }
385
386 #[test]
387 fn bracket_comment() {
388 let f = parse_ok("#[[ multi\nline ]]\n");
389 assert!(matches!(
390 &f.statements[0],
391 Statement::Comment(Comment::Bracket(_))
392 ));
393 }
394
395 #[test]
396 fn variable_reference_in_unquoted() {
397 let f = parse_ok("message(${MY_VAR})\n");
398 let Statement::Command(cmd) = &f.statements[0] else {
399 panic!()
400 };
401 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
402 }
403
404 #[test]
405 fn env_variable_reference() {
406 let f = parse_ok("message($ENV{PATH})\n");
407 let Statement::Command(cmd) = &f.statements[0] else {
408 panic!()
409 };
410 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
411 }
412
413 #[test]
414 fn generator_expression() {
415 let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
416 let Statement::Command(cmd) = &f.statements[0] else {
417 panic!()
418 };
419 assert_eq!(cmd.arguments.len(), 2);
420 }
421
422 #[test]
423 fn multiline_argument_list() {
424 let src = "target_link_libraries(mylib\n PUBLIC dep1\n PRIVATE dep2\n)\n";
425 let f = parse_ok(src);
426 let Statement::Command(cmd) = &f.statements[0] else {
427 panic!()
428 };
429 assert_eq!(cmd.name, "target_link_libraries");
430 assert_eq!(cmd.arguments.len(), 5); }
432
433 #[test]
434 fn inline_bracket_comment_in_arguments() {
435 let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
436 let f = parse_ok(src);
437 let Statement::Command(cmd) = &f.statements[0] else {
438 panic!()
439 };
440 assert_eq!(cmd.arguments.len(), 3);
441 assert!(matches!(
442 &cmd.arguments[1],
443 Argument::InlineComment(Comment::Bracket(_))
444 ));
445 }
446
447 #[test]
448 fn line_comment_between_arguments() {
449 let src = "target_sources(foo\n PRIVATE a.cc # keep grouping\n b.cc\n)\n";
450 let f = parse_ok(src);
451 let Statement::Command(cmd) = &f.statements[0] else {
452 panic!()
453 };
454 assert!(cmd.arguments.iter().any(Argument::is_comment));
455 }
456
457 #[test]
458 fn trailing_comment_after_command() {
459 let src = "message(STATUS \"hello\") # trailing\n";
460 let f = parse_ok(src);
461 let Statement::Command(cmd) = &f.statements[0] else {
462 panic!()
463 };
464 assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
465 }
466
467 #[test]
468 fn file_without_final_newline() {
469 let f = parse_ok("project(MyProject)");
470 assert_eq!(f.statements.len(), 1);
471 }
472
473 #[test]
474 fn blank_lines_are_preserved() {
475 let f = parse_ok("message(foo)\n\nproject(bar)\n");
476 assert_eq!(f.statements.len(), 3);
477 assert!(matches!(f.statements[1], Statement::BlankLines(1)));
478 }
479
480 #[test]
481 fn leading_blank_lines_are_preserved() {
482 let f = parse_ok("\nmessage(foo)\n");
483 assert!(matches!(f.statements[0], Statement::BlankLines(1)));
484 }
485
486 #[test]
487 fn escape_sequences_in_quoted() {
488 let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
489 assert!(!f.statements.is_empty());
490 }
491
492 #[test]
493 fn escaped_quotes_in_quoted_argument_parse() {
494 let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
495 let Statement::Command(cmd) = &f.statements[0] else {
496 panic!()
497 };
498 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
499 assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
500 }
501
502 #[test]
503 fn multiple_commands() {
504 let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
505 let f = parse_ok(src);
506 assert_eq!(f.statements.len(), 2);
507 }
508
509 #[test]
510 fn nested_variable_reference() {
511 let f = parse_ok("message(${${OUTER}})\n");
512 let Statement::Command(cmd) = &f.statements[0] else {
513 panic!()
514 };
515 assert_eq!(cmd.arguments.len(), 1);
516 }
517
518 #[test]
519 fn underscore_command_name_is_valid() {
520 let f = parse_ok("_my_command(ARG)\n");
521 let Statement::Command(cmd) = &f.statements[0] else {
522 panic!()
523 };
524 assert_eq!(cmd.name, "_my_command");
525 }
526
527 #[test]
528 fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
529 let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
530 let Statement::Command(cmd) = &f.statements[0] else {
531 panic!()
532 };
533 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
534 assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
535 }
536
537 #[test]
538 fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
539 let f = parse_ok(concat!(
540 "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
541 " AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
542 ));
543 let Statement::Command(cmd) = &f.statements[0] else {
544 panic!()
545 };
546 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
547 assert_eq!(
548 args,
549 vec![
550 "NOT",
551 "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
552 ]
553 );
554 }
555
556 #[test]
557 fn source_file_with_utf8_bom_parses() {
558 let f = parse_ok("\u{FEFF}project(MyProject)\n");
559 assert_eq!(f.statements.len(), 1);
560 }
561
562 #[test]
563 fn top_level_template_placeholder_parses() {
564 let f = parse_ok("@PACKAGE_INIT@\n");
565 assert_eq!(
566 f.statements,
567 vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
568 );
569 }
570
571 #[test]
572 fn legacy_unquoted_argument_with_embedded_quotes_parses() {
573 let f = parse_ok("set(x -Da=\"b c\")\n");
574 let Statement::Command(cmd) = &f.statements[0] else {
575 panic!()
576 };
577 assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
578 }
579
580 #[test]
581 fn legacy_unquoted_argument_with_make_style_reference_parses() {
582 let f = parse_ok("set(x -Da=$(v))\n");
583 let Statement::Command(cmd) = &f.statements[0] else {
584 panic!()
585 };
586 assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
587 }
588
589 #[test]
590 fn legacy_unquoted_argument_with_embedded_parens_parses() {
591 let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ ]+"(.+)"")"##);
592 let Statement::Command(cmd) = &f.statements[0] else {
593 panic!()
594 };
595 assert_eq!(
596 cmd.arguments[1].as_str(),
597 "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
598 );
599 }
600
601 #[test]
602 fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
603 let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
604 let Statement::Command(cmd) = &f.statements[0] else {
605 panic!()
606 };
607 assert_eq!(
608 cmd.arguments[2].as_str(),
609 "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
610 );
611 }
612}