1use pest::Parser;
12use pest_derive::Parser;
13
14pub mod ast;
15
16#[doc(hidden)]
18#[derive(Parser)]
19#[grammar = "parser/cmake.pest"]
20pub struct CmakeParser;
21
22use crate::error::{Error, Result};
23use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
24
25pub fn parse(source: &str) -> Result<File> {
30 let mut pairs = CmakeParser::parse(Rule::file, source).map_err(|e| Error::ParseContext {
31 display_name: "<source>".to_owned(),
32 source_text: source.to_owned().into_boxed_str(),
33 start_line: 1,
34 barrier_context: false,
35 diagnostic: crate::error::ParseDiagnostic::from_pest(&e),
36 })?;
37 let file_pair = pairs
38 .next()
39 .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
40
41 build_file(file_pair)
42}
43
44fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
45 debug_assert_eq!(pair.as_rule(), Rule::file);
46
47 let items = pair.into_inner();
48 let mut statements = Vec::with_capacity(items.size_hint().0);
49 let mut pending_blank_lines = 0usize;
50 let mut line_has_content = false;
51
52 for item in items {
53 collect_file_item(
54 item,
55 &mut statements,
56 &mut pending_blank_lines,
57 &mut line_has_content,
58 )?;
59 }
60
61 flush_blank_lines(&mut statements, &mut pending_blank_lines);
62 Ok(File { statements })
63}
64
65fn collect_file_item(
66 item: pest::iterators::Pair<'_, Rule>,
67 statements: &mut Vec<Statement>,
68 pending_blank_lines: &mut usize,
69 line_has_content: &mut bool,
70) -> Result<()> {
71 match item.as_rule() {
72 Rule::file_item => {
73 for inner in item.into_inner() {
74 collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
75 }
76 Ok(())
77 }
78 Rule::command_invocation => {
79 flush_blank_lines(statements, pending_blank_lines);
80 statements.push(Statement::Command(build_command(item)?));
81 *line_has_content = true;
82 Ok(())
83 }
84 Rule::template_placeholder => {
85 flush_blank_lines(statements, pending_blank_lines);
86 statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
87 *line_has_content = true;
88 Ok(())
89 }
90 Rule::bracket_comment => {
91 let comment = Comment::Bracket(item.as_str().to_owned());
92 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
93 flush_blank_lines(statements, pending_blank_lines);
94 statements.push(Statement::Comment(comment));
95 }
96 *line_has_content = true;
97 Ok(())
98 }
99 Rule::line_comment => {
100 let comment = Comment::Line(item.as_str().to_owned());
101 if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
102 flush_blank_lines(statements, pending_blank_lines);
103 statements.push(Statement::Comment(comment));
104 }
105 *line_has_content = true;
106 Ok(())
107 }
108 Rule::newline => {
109 if *line_has_content {
110 *line_has_content = false;
111 } else {
112 *pending_blank_lines += 1;
113 }
114 Ok(())
115 }
116 Rule::space | Rule::EOI => Ok(()),
117 other => Err(Error::Formatter(format!(
118 "unexpected top-level parser rule: {other:?}"
119 ))),
120 }
121}
122
123fn attach_trailing_comment(
124 statements: &mut [Statement],
125 comment: Comment,
126 line_has_content: bool,
127) -> Option<Comment> {
128 if !line_has_content {
129 return Some(comment);
130 }
131
132 match statements.last_mut() {
133 Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
134 command.trailing_comment = Some(comment);
135 None
136 }
137 _ => Some(comment),
138 }
139}
140
141fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
142 if *pending_blank_lines == 0 {
143 return;
144 }
145
146 match statements.last_mut() {
147 Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
148 _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
149 }
150
151 *pending_blank_lines = 0;
152}
153
154fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
155 debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
156
157 let span = pair.as_span();
158 let mut name = None;
159 let mut arguments = Vec::new();
160
161 for inner in pair.into_inner() {
162 match inner.as_rule() {
163 Rule::identifier => {
164 name = Some(inner.as_str().to_owned());
165 }
166 Rule::arguments => {
167 arguments = build_arguments(inner)?;
168 }
169 Rule::space => {}
170 other => {
171 return Err(Error::Formatter(format!(
172 "unexpected command parser rule: {other:?}"
173 )));
174 }
175 }
176 }
177
178 Ok(CommandInvocation {
179 name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
180 arguments,
181 trailing_comment: None,
182 span: (span.start(), span.end()),
183 })
184}
185
186fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
187 debug_assert_eq!(pair.as_rule(), Rule::arguments);
188
189 let inner = pair.into_inner();
190 let mut args = Vec::with_capacity(inner.size_hint().0);
191
192 for p in inner {
193 collect_argument_part(p, &mut args)?;
194 }
195
196 Ok(args)
197}
198
199fn collect_argument_part(
200 pair: pest::iterators::Pair<'_, Rule>,
201 out: &mut Vec<Argument>,
202) -> Result<()> {
203 match pair.as_rule() {
204 Rule::argument_part => {
205 for inner in pair.into_inner() {
206 collect_argument_part(inner, out)?;
207 }
208 Ok(())
209 }
210 Rule::arguments => {
211 for inner in pair.into_inner() {
212 collect_argument_part(inner, out)?;
213 }
214 Ok(())
215 }
216 Rule::argument => {
217 let mut inner = pair.into_inner();
218 let argument = inner
219 .next()
220 .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
221 out.push(build_argument(argument)?);
222 Ok(())
223 }
224 Rule::bracket_comment => {
225 out.push(Argument::InlineComment(Comment::Bracket(
226 pair.as_str().to_owned(),
227 )));
228 Ok(())
229 }
230 Rule::line_ending => {
231 collect_line_ending_comments(pair, out);
232 Ok(())
233 }
234 Rule::space => Ok(()),
235 other => Err(Error::Formatter(format!(
236 "unexpected argument parser rule: {other:?}"
237 ))),
238 }
239}
240
241fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
242 for inner in pair.into_inner() {
243 if inner.as_rule() == Rule::line_comment {
244 out.push(Argument::InlineComment(Comment::Line(
245 inner.as_str().to_owned(),
246 )));
247 }
248 }
249}
250
251fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
252 match pair.as_rule() {
253 Rule::bracket_argument => {
254 let raw = pair.as_str().to_owned();
255 Ok(Argument::Bracket(validate_bracket_argument(raw)?))
256 }
257 Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
258 Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
259 Ok(Argument::Unquoted(pair.as_str().to_owned()))
260 }
261 other => Err(Error::Formatter(format!(
262 "unexpected argument rule: {other:?}"
263 ))),
264 }
265}
266
267fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
269 let open_equals = raw
270 .strip_prefix('[')
271 .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
272 .bytes()
273 .take_while(|&b| b == b'=')
274 .count();
275
276 let close_equals = raw
277 .strip_suffix(']')
278 .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
279 .bytes()
280 .rev()
281 .take_while(|&b| b == b'=')
282 .count();
283
284 if open_equals != close_equals {
285 return Err(Error::Formatter(format!(
286 "invalid bracket argument delimiter: {raw}"
287 )));
288 }
289
290 Ok(BracketArgument {
291 level: open_equals,
292 raw,
293 })
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299
300 fn parse_ok(src: &str) -> File {
301 parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
302 }
303
304 #[test]
305 fn empty_file() {
306 let f = parse_ok("");
307 assert!(f.statements.is_empty());
308 }
309
310 #[test]
311 fn simple_command() {
312 let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
313 assert_eq!(f.statements.len(), 1);
314 let Statement::Command(cmd) = &f.statements[0] else {
315 panic!()
316 };
317 assert_eq!(cmd.name, "cmake_minimum_required");
318 assert_eq!(cmd.arguments.len(), 2);
319 assert!(cmd.trailing_comment.is_none());
320 }
321
322 #[test]
323 fn command_no_args() {
324 let f = parse_ok("some_command()\n");
325 let Statement::Command(cmd) = &f.statements[0] else {
326 panic!()
327 };
328 assert!(cmd.arguments.is_empty());
329 }
330
331 #[test]
332 fn quoted_argument() {
333 let f = parse_ok("message(\"hello world\")\n");
334 let Statement::Command(cmd) = &f.statements[0] else {
335 panic!()
336 };
337 assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
338 }
339
340 #[test]
341 fn bracket_argument_zero_equals() {
342 let f = parse_ok("set(VAR [[hello]])\n");
343 let Statement::Command(cmd) = &f.statements[0] else {
344 panic!()
345 };
346 let Argument::Bracket(b) = &cmd.arguments[1] else {
347 panic!()
348 };
349 assert_eq!(b.level, 0);
350 }
351
352 #[test]
353 fn bracket_argument_one_equals() {
354 let f = parse_ok("set(VAR [=[hello]=])\n");
355 let Statement::Command(cmd) = &f.statements[0] else {
356 panic!()
357 };
358 let Argument::Bracket(b) = &cmd.arguments[1] else {
359 panic!()
360 };
361 assert_eq!(b.level, 1);
362 }
363
364 #[test]
365 fn bracket_argument_two_equals() {
366 let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
367 let Statement::Command(cmd) = &f.statements[0] else {
368 panic!()
369 };
370 let Argument::Bracket(b) = &cmd.arguments[1] else {
371 panic!()
372 };
373 assert_eq!(b.level, 2);
374 }
375
376 #[test]
377 fn invalid_bracket_argument_returns_error() {
378 let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
379 assert!(matches!(err, Error::Formatter(_)));
380 }
381
382 #[test]
383 fn invalid_syntax_returns_parse_context_with_crate_owned_diagnostic() {
384 let err = parse("message(\n").unwrap_err();
385 let Error::ParseContext {
386 display_name,
387 source_text,
388 start_line,
389 barrier_context,
390 diagnostic,
391 } = err
392 else {
393 panic!("expected parse context error");
394 };
395
396 assert_eq!(display_name, "<source>");
397 assert_eq!(source_text.as_ref(), "message(\n");
398 assert_eq!(start_line, 1);
399 assert!(!barrier_context);
400 assert!(
401 diagnostic.message.contains("expected"),
402 "unexpected parse diagnostic: {diagnostic:?}"
403 );
404 assert_eq!(diagnostic.line, 2);
405 assert_eq!(diagnostic.column, 1);
406 }
407
408 #[test]
409 fn line_comment_standalone() {
410 let f = parse_ok("# this is a comment\n");
411 assert!(matches!(
412 &f.statements[0],
413 Statement::Comment(Comment::Line(_))
414 ));
415 }
416
417 #[test]
418 fn bracket_comment() {
419 let f = parse_ok("#[[ multi\nline ]]\n");
420 assert!(matches!(
421 &f.statements[0],
422 Statement::Comment(Comment::Bracket(_))
423 ));
424 }
425
426 #[test]
427 fn variable_reference_in_unquoted() {
428 let f = parse_ok("message(${MY_VAR})\n");
429 let Statement::Command(cmd) = &f.statements[0] else {
430 panic!()
431 };
432 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
433 }
434
435 #[test]
436 fn env_variable_reference() {
437 let f = parse_ok("message($ENV{PATH})\n");
438 let Statement::Command(cmd) = &f.statements[0] else {
439 panic!()
440 };
441 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
442 }
443
444 #[test]
445 fn generator_expression() {
446 let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
447 let Statement::Command(cmd) = &f.statements[0] else {
448 panic!()
449 };
450 assert_eq!(cmd.arguments.len(), 2);
451 }
452
453 #[test]
454 fn multiline_argument_list() {
455 let src = "target_link_libraries(mylib\n PUBLIC dep1\n PRIVATE dep2\n)\n";
456 let f = parse_ok(src);
457 let Statement::Command(cmd) = &f.statements[0] else {
458 panic!()
459 };
460 assert_eq!(cmd.name, "target_link_libraries");
461 assert_eq!(cmd.arguments.len(), 5); }
463
464 #[test]
465 fn inline_bracket_comment_in_arguments() {
466 let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
467 let f = parse_ok(src);
468 let Statement::Command(cmd) = &f.statements[0] else {
469 panic!()
470 };
471 assert_eq!(cmd.arguments.len(), 3);
472 assert!(matches!(
473 &cmd.arguments[1],
474 Argument::InlineComment(Comment::Bracket(_))
475 ));
476 }
477
478 #[test]
479 fn line_comment_between_arguments() {
480 let src = "target_sources(foo\n PRIVATE a.cc # keep grouping\n b.cc\n)\n";
481 let f = parse_ok(src);
482 let Statement::Command(cmd) = &f.statements[0] else {
483 panic!()
484 };
485 assert!(cmd.arguments.iter().any(Argument::is_comment));
486 }
487
488 #[test]
489 fn trailing_comment_after_command() {
490 let src = "message(STATUS \"hello\") # trailing\n";
491 let f = parse_ok(src);
492 let Statement::Command(cmd) = &f.statements[0] else {
493 panic!()
494 };
495 assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
496 }
497
498 #[test]
499 fn file_without_final_newline() {
500 let f = parse_ok("project(MyProject)");
501 assert_eq!(f.statements.len(), 1);
502 }
503
504 #[test]
505 fn blank_lines_are_preserved() {
506 let f = parse_ok("message(foo)\n\nproject(bar)\n");
507 assert_eq!(f.statements.len(), 3);
508 assert!(matches!(f.statements[1], Statement::BlankLines(1)));
509 }
510
511 #[test]
512 fn leading_blank_lines_are_preserved() {
513 let f = parse_ok("\nmessage(foo)\n");
514 assert!(matches!(f.statements[0], Statement::BlankLines(1)));
515 }
516
517 #[test]
518 fn escape_sequences_in_quoted() {
519 let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
520 assert!(!f.statements.is_empty());
521 }
522
523 #[test]
524 fn escaped_quotes_in_quoted_argument_parse() {
525 let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
526 let Statement::Command(cmd) = &f.statements[0] else {
527 panic!()
528 };
529 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
530 assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
531 }
532
533 #[test]
534 fn multiple_commands() {
535 let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
536 let f = parse_ok(src);
537 assert_eq!(f.statements.len(), 2);
538 }
539
540 #[test]
541 fn nested_variable_reference() {
542 let f = parse_ok("message(${${OUTER}})\n");
543 let Statement::Command(cmd) = &f.statements[0] else {
544 panic!()
545 };
546 assert_eq!(cmd.arguments.len(), 1);
547 }
548
549 #[test]
550 fn underscore_command_name_is_valid() {
551 let f = parse_ok("_my_command(ARG)\n");
552 let Statement::Command(cmd) = &f.statements[0] else {
553 panic!()
554 };
555 assert_eq!(cmd.name, "_my_command");
556 }
557
558 #[test]
559 fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
560 let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
561 let Statement::Command(cmd) = &f.statements[0] else {
562 panic!()
563 };
564 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
565 assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
566 }
567
568 #[test]
569 fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
570 let f = parse_ok(concat!(
571 "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
572 " AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
573 ));
574 let Statement::Command(cmd) = &f.statements[0] else {
575 panic!()
576 };
577 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
578 assert_eq!(
579 args,
580 vec![
581 "NOT",
582 "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
583 ]
584 );
585 }
586
587 #[test]
588 fn source_file_with_utf8_bom_parses() {
589 let f = parse_ok("\u{FEFF}project(MyProject)\n");
590 assert_eq!(f.statements.len(), 1);
591 }
592
593 #[test]
594 fn top_level_template_placeholder_parses() {
595 let f = parse_ok("@PACKAGE_INIT@\n");
596 assert_eq!(
597 f.statements,
598 vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
599 );
600 }
601
602 #[test]
603 fn legacy_unquoted_argument_with_embedded_quotes_parses() {
604 let f = parse_ok("set(x -Da=\"b c\")\n");
605 let Statement::Command(cmd) = &f.statements[0] else {
606 panic!()
607 };
608 assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
609 }
610
611 #[test]
612 fn legacy_unquoted_argument_with_make_style_reference_parses() {
613 let f = parse_ok("set(x -Da=$(v))\n");
614 let Statement::Command(cmd) = &f.statements[0] else {
615 panic!()
616 };
617 assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
618 }
619
620 #[test]
621 fn legacy_unquoted_argument_with_embedded_parens_parses() {
622 let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ ]+"(.+)"")"##);
623 let Statement::Command(cmd) = &f.statements[0] else {
624 panic!()
625 };
626 assert_eq!(
627 cmd.arguments[1].as_str(),
628 "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
629 );
630 }
631
632 #[test]
633 fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
634 let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
635 let Statement::Command(cmd) = &f.statements[0] else {
636 panic!()
637 };
638 assert_eq!(
639 cmd.arguments[2].as_str(),
640 "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
641 );
642 }
643}