1use pest::Parser;
12use pest_derive::Parser;
13
14pub mod ast;
15
16#[derive(Parser)]
18#[grammar = "parser/cmake.pest"]
19pub struct CmakeParser;
20
21use crate::error::{Error, Result};
22use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
23
24pub fn parse(source: &str) -> Result<File> {
29 let mut pairs =
30 CmakeParser::parse(Rule::file, source).map_err(|e| Error::Parse(Box::new(e)))?;
31 let file_pair = pairs
32 .next()
33 .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
34
35 build_file(file_pair)
36}
37
38fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
39 debug_assert_eq!(pair.as_rule(), Rule::file);
40
41 let items = pair.into_inner();
42 let mut statements = Vec::with_capacity(items.size_hint().0);
43 let mut pending_blank_lines = 0usize;
44 let mut line_has_content = false;
45
46 for item in items {
47 collect_file_item(
48 item,
49 &mut statements,
50 &mut pending_blank_lines,
51 &mut line_has_content,
52 )?;
53 }
54
55 flush_blank_lines(&mut statements, &mut pending_blank_lines);
56 Ok(File { statements })
57}
58
59fn collect_file_item(
60 item: pest::iterators::Pair<'_, Rule>,
61 statements: &mut Vec<Statement>,
62 pending_blank_lines: &mut usize,
63 line_has_content: &mut bool,
64) -> Result<()> {
65 match item.as_rule() {
66 Rule::file_item => {
67 for inner in item.into_inner() {
68 collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
69 }
70 Ok(())
71 }
72 Rule::command_invocation => {
73 flush_blank_lines(statements, pending_blank_lines);
74 statements.push(Statement::Command(build_command(item)?));
75 *line_has_content = true;
76 Ok(())
77 }
78 Rule::template_placeholder => {
79 flush_blank_lines(statements, pending_blank_lines);
80 statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
81 *line_has_content = true;
82 Ok(())
83 }
84 Rule::bracket_comment => {
85 let comment = Comment::Bracket(item.as_str().to_owned());
86 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
87 flush_blank_lines(statements, pending_blank_lines);
88 statements.push(Statement::Comment(comment));
89 }
90 *line_has_content = true;
91 Ok(())
92 }
93 Rule::line_comment => {
94 let comment = Comment::Line(item.as_str().to_owned());
95 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
96 flush_blank_lines(statements, pending_blank_lines);
97 statements.push(Statement::Comment(comment));
98 }
99 *line_has_content = true;
100 Ok(())
101 }
102 Rule::newline => {
103 if *line_has_content {
104 *line_has_content = false;
105 } else {
106 *pending_blank_lines += 1;
107 }
108 Ok(())
109 }
110 Rule::space | Rule::EOI => Ok(()),
111 other => Err(Error::Formatter(format!(
112 "unexpected top-level parser rule: {other:?}"
113 ))),
114 }
115}
116
117fn attach_trailing_comment(
118 statements: &mut [Statement],
119 comment: Comment,
120 line_has_content: bool,
121) -> Option<Comment> {
122 if !line_has_content {
123 return Some(comment);
124 }
125
126 match statements.last_mut() {
127 Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
128 command.trailing_comment = Some(comment);
129 None
130 }
131 _ => Some(comment),
132 }
133}
134
135fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
136 if *pending_blank_lines == 0 {
137 return;
138 }
139
140 match statements.last_mut() {
141 Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
142 _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
143 }
144
145 *pending_blank_lines = 0;
146}
147
148fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
149 debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
150
151 let span = pair.as_span();
152 let mut name = None;
153 let mut arguments = Vec::new();
154
155 for inner in pair.into_inner() {
156 match inner.as_rule() {
157 Rule::identifier => {
158 name = Some(inner.as_str().to_owned());
159 }
160 Rule::arguments => {
161 arguments = build_arguments(inner)?;
162 }
163 Rule::space => {}
164 other => {
165 return Err(Error::Formatter(format!(
166 "unexpected command parser rule: {other:?}"
167 )));
168 }
169 }
170 }
171
172 Ok(CommandInvocation {
173 name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
174 arguments,
175 trailing_comment: None,
176 span: (span.start(), span.end()),
177 })
178}
179
180fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
181 debug_assert_eq!(pair.as_rule(), Rule::arguments);
182
183 let inner = pair.into_inner();
184 let mut args = Vec::with_capacity(inner.size_hint().0);
185
186 for p in inner {
187 collect_argument_part(p, &mut args)?;
188 }
189
190 Ok(args)
191}
192
193fn collect_argument_part(
194 pair: pest::iterators::Pair<'_, Rule>,
195 out: &mut Vec<Argument>,
196) -> Result<()> {
197 match pair.as_rule() {
198 Rule::argument_part => {
199 for inner in pair.into_inner() {
200 collect_argument_part(inner, out)?;
201 }
202 Ok(())
203 }
204 Rule::arguments => {
205 for inner in pair.into_inner() {
206 collect_argument_part(inner, out)?;
207 }
208 Ok(())
209 }
210 Rule::argument => {
211 let mut inner = pair.into_inner();
212 let argument = inner
213 .next()
214 .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
215 out.push(build_argument(argument)?);
216 Ok(())
217 }
218 Rule::bracket_comment => {
219 out.push(Argument::InlineComment(Comment::Bracket(
220 pair.as_str().to_owned(),
221 )));
222 Ok(())
223 }
224 Rule::line_ending => {
225 collect_line_ending_comments(pair, out);
226 Ok(())
227 }
228 Rule::space => Ok(()),
229 other => Err(Error::Formatter(format!(
230 "unexpected argument parser rule: {other:?}"
231 ))),
232 }
233}
234
235fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
236 for inner in pair.into_inner() {
237 if inner.as_rule() == Rule::line_comment {
238 out.push(Argument::InlineComment(Comment::Line(
239 inner.as_str().to_owned(),
240 )));
241 }
242 }
243}
244
245fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
246 match pair.as_rule() {
247 Rule::bracket_argument => {
248 let raw = pair.as_str().to_owned();
249 Ok(Argument::Bracket(validate_bracket_argument(raw)?))
250 }
251 Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
252 Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
253 Ok(Argument::Unquoted(pair.as_str().to_owned()))
254 }
255 other => Err(Error::Formatter(format!(
256 "unexpected argument rule: {other:?}"
257 ))),
258 }
259}
260
261fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
263 let open_equals = raw
264 .strip_prefix('[')
265 .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
266 .bytes()
267 .take_while(|&b| b == b'=')
268 .count();
269
270 let close_equals = raw
271 .strip_suffix(']')
272 .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
273 .bytes()
274 .rev()
275 .take_while(|&b| b == b'=')
276 .count();
277
278 if open_equals != close_equals {
279 return Err(Error::Formatter(format!(
280 "invalid bracket argument delimiter: {raw}"
281 )));
282 }
283
284 Ok(BracketArgument {
285 level: open_equals,
286 raw,
287 })
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 fn parse_ok(src: &str) -> File {
295 parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
296 }
297
298 #[test]
299 fn empty_file() {
300 let f = parse_ok("");
301 assert!(f.statements.is_empty());
302 }
303
304 #[test]
305 fn simple_command() {
306 let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
307 assert_eq!(f.statements.len(), 1);
308 let Statement::Command(cmd) = &f.statements[0] else {
309 panic!()
310 };
311 assert_eq!(cmd.name, "cmake_minimum_required");
312 assert_eq!(cmd.arguments.len(), 2);
313 assert!(cmd.trailing_comment.is_none());
314 }
315
316 #[test]
317 fn command_no_args() {
318 let f = parse_ok("some_command()\n");
319 let Statement::Command(cmd) = &f.statements[0] else {
320 panic!()
321 };
322 assert!(cmd.arguments.is_empty());
323 }
324
325 #[test]
326 fn quoted_argument() {
327 let f = parse_ok("message(\"hello world\")\n");
328 let Statement::Command(cmd) = &f.statements[0] else {
329 panic!()
330 };
331 assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
332 }
333
334 #[test]
335 fn bracket_argument_zero_equals() {
336 let f = parse_ok("set(VAR [[hello]])\n");
337 let Statement::Command(cmd) = &f.statements[0] else {
338 panic!()
339 };
340 let Argument::Bracket(b) = &cmd.arguments[1] else {
341 panic!()
342 };
343 assert_eq!(b.level, 0);
344 }
345
346 #[test]
347 fn bracket_argument_one_equals() {
348 let f = parse_ok("set(VAR [=[hello]=])\n");
349 let Statement::Command(cmd) = &f.statements[0] else {
350 panic!()
351 };
352 let Argument::Bracket(b) = &cmd.arguments[1] else {
353 panic!()
354 };
355 assert_eq!(b.level, 1);
356 }
357
358 #[test]
359 fn bracket_argument_two_equals() {
360 let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
361 let Statement::Command(cmd) = &f.statements[0] else {
362 panic!()
363 };
364 let Argument::Bracket(b) = &cmd.arguments[1] else {
365 panic!()
366 };
367 assert_eq!(b.level, 2);
368 }
369
370 #[test]
371 fn invalid_bracket_argument_returns_error() {
372 let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
373 assert!(matches!(err, Error::Formatter(_)));
374 }
375
376 #[test]
377 fn line_comment_standalone() {
378 let f = parse_ok("# this is a comment\n");
379 assert!(matches!(
380 &f.statements[0],
381 Statement::Comment(Comment::Line(_))
382 ));
383 }
384
385 #[test]
386 fn bracket_comment() {
387 let f = parse_ok("#[[ multi\nline ]]\n");
388 assert!(matches!(
389 &f.statements[0],
390 Statement::Comment(Comment::Bracket(_))
391 ));
392 }
393
394 #[test]
395 fn variable_reference_in_unquoted() {
396 let f = parse_ok("message(${MY_VAR})\n");
397 let Statement::Command(cmd) = &f.statements[0] else {
398 panic!()
399 };
400 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
401 }
402
403 #[test]
404 fn env_variable_reference() {
405 let f = parse_ok("message($ENV{PATH})\n");
406 let Statement::Command(cmd) = &f.statements[0] else {
407 panic!()
408 };
409 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
410 }
411
412 #[test]
413 fn generator_expression() {
414 let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
415 let Statement::Command(cmd) = &f.statements[0] else {
416 panic!()
417 };
418 assert_eq!(cmd.arguments.len(), 2);
419 }
420
421 #[test]
422 fn multiline_argument_list() {
423 let src = "target_link_libraries(mylib\n PUBLIC dep1\n PRIVATE dep2\n)\n";
424 let f = parse_ok(src);
425 let Statement::Command(cmd) = &f.statements[0] else {
426 panic!()
427 };
428 assert_eq!(cmd.name, "target_link_libraries");
429 assert_eq!(cmd.arguments.len(), 5); }
431
432 #[test]
433 fn inline_bracket_comment_in_arguments() {
434 let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
435 let f = parse_ok(src);
436 let Statement::Command(cmd) = &f.statements[0] else {
437 panic!()
438 };
439 assert_eq!(cmd.arguments.len(), 3);
440 assert!(matches!(
441 &cmd.arguments[1],
442 Argument::InlineComment(Comment::Bracket(_))
443 ));
444 }
445
446 #[test]
447 fn line_comment_between_arguments() {
448 let src = "target_sources(foo\n PRIVATE a.cc # keep grouping\n b.cc\n)\n";
449 let f = parse_ok(src);
450 let Statement::Command(cmd) = &f.statements[0] else {
451 panic!()
452 };
453 assert!(cmd.arguments.iter().any(Argument::is_comment));
454 }
455
456 #[test]
457 fn trailing_comment_after_command() {
458 let src = "message(STATUS \"hello\") # trailing\n";
459 let f = parse_ok(src);
460 let Statement::Command(cmd) = &f.statements[0] else {
461 panic!()
462 };
463 assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
464 }
465
466 #[test]
467 fn file_without_final_newline() {
468 let f = parse_ok("project(MyProject)");
469 assert_eq!(f.statements.len(), 1);
470 }
471
472 #[test]
473 fn blank_lines_are_preserved() {
474 let f = parse_ok("message(foo)\n\nproject(bar)\n");
475 assert_eq!(f.statements.len(), 3);
476 assert!(matches!(f.statements[1], Statement::BlankLines(1)));
477 }
478
479 #[test]
480 fn leading_blank_lines_are_preserved() {
481 let f = parse_ok("\nmessage(foo)\n");
482 assert!(matches!(f.statements[0], Statement::BlankLines(1)));
483 }
484
485 #[test]
486 fn escape_sequences_in_quoted() {
487 let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
488 assert!(!f.statements.is_empty());
489 }
490
491 #[test]
492 fn escaped_quotes_in_quoted_argument_parse() {
493 let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
494 let Statement::Command(cmd) = &f.statements[0] else {
495 panic!()
496 };
497 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
498 assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
499 }
500
501 #[test]
502 fn multiple_commands() {
503 let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
504 let f = parse_ok(src);
505 assert_eq!(f.statements.len(), 2);
506 }
507
508 #[test]
509 fn nested_variable_reference() {
510 let f = parse_ok("message(${${OUTER}})\n");
511 let Statement::Command(cmd) = &f.statements[0] else {
512 panic!()
513 };
514 assert_eq!(cmd.arguments.len(), 1);
515 }
516
517 #[test]
518 fn underscore_command_name_is_valid() {
519 let f = parse_ok("_my_command(ARG)\n");
520 let Statement::Command(cmd) = &f.statements[0] else {
521 panic!()
522 };
523 assert_eq!(cmd.name, "_my_command");
524 }
525
526 #[test]
527 fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
528 let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
529 let Statement::Command(cmd) = &f.statements[0] else {
530 panic!()
531 };
532 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
533 assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
534 }
535
536 #[test]
537 fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
538 let f = parse_ok(concat!(
539 "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
540 " AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
541 ));
542 let Statement::Command(cmd) = &f.statements[0] else {
543 panic!()
544 };
545 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
546 assert_eq!(
547 args,
548 vec![
549 "NOT",
550 "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
551 ]
552 );
553 }
554
555 #[test]
556 fn source_file_with_utf8_bom_parses() {
557 let f = parse_ok("\u{FEFF}project(MyProject)\n");
558 assert_eq!(f.statements.len(), 1);
559 }
560
561 #[test]
562 fn top_level_template_placeholder_parses() {
563 let f = parse_ok("@PACKAGE_INIT@\n");
564 assert_eq!(
565 f.statements,
566 vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
567 );
568 }
569
570 #[test]
571 fn legacy_unquoted_argument_with_embedded_quotes_parses() {
572 let f = parse_ok("set(x -Da=\"b c\")\n");
573 let Statement::Command(cmd) = &f.statements[0] else {
574 panic!()
575 };
576 assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
577 }
578
579 #[test]
580 fn legacy_unquoted_argument_with_make_style_reference_parses() {
581 let f = parse_ok("set(x -Da=$(v))\n");
582 let Statement::Command(cmd) = &f.statements[0] else {
583 panic!()
584 };
585 assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
586 }
587
588 #[test]
589 fn legacy_unquoted_argument_with_embedded_parens_parses() {
590 let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ ]+"(.+)"")"##);
591 let Statement::Command(cmd) = &f.statements[0] else {
592 panic!()
593 };
594 assert_eq!(
595 cmd.arguments[1].as_str(),
596 "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
597 );
598 }
599
600 #[test]
601 fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
602 let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
603 let Statement::Command(cmd) = &f.statements[0] else {
604 panic!()
605 };
606 assert_eq!(
607 cmd.arguments[2].as_str(),
608 "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
609 );
610 }
611}