Skip to main content

cmakefmt/parser/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Parser entry points for CMake source text.
6//!
7//! The grammar is defined in `parser/cmake.pest`, while
8//! [`crate::parser::ast`] contains the AST types returned by
9//! [`crate::parser::parse()`].
10
11use pest::Parser;
12use pest_derive::Parser;
13
14pub mod ast;
15
16/// Internal pest parser generated from `cmake.pest`.
17#[doc(hidden)]
18#[derive(Parser)]
19#[grammar = "parser/cmake.pest"]
20pub struct CmakeParser;
21
22use crate::error::{Error, Result};
23use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
24
25/// Parse CMake source text into an AST [`File`].
26///
27/// The returned AST preserves command structure, blank lines, and comments so
28/// the formatter can round-trip files with stable semantics.
29pub fn parse(source: &str) -> Result<File> {
30    let mut pairs =
31        CmakeParser::parse(Rule::file, source).map_err(|e| Error::Parse(Box::new(e)))?;
32    let file_pair = pairs
33        .next()
34        .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
35
36    build_file(file_pair)
37}
38
39fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
40    debug_assert_eq!(pair.as_rule(), Rule::file);
41
42    let items = pair.into_inner();
43    let mut statements = Vec::with_capacity(items.size_hint().0);
44    let mut pending_blank_lines = 0usize;
45    let mut line_has_content = false;
46
47    for item in items {
48        collect_file_item(
49            item,
50            &mut statements,
51            &mut pending_blank_lines,
52            &mut line_has_content,
53        )?;
54    }
55
56    flush_blank_lines(&mut statements, &mut pending_blank_lines);
57    Ok(File { statements })
58}
59
60fn collect_file_item(
61    item: pest::iterators::Pair<'_, Rule>,
62    statements: &mut Vec<Statement>,
63    pending_blank_lines: &mut usize,
64    line_has_content: &mut bool,
65) -> Result<()> {
66    match item.as_rule() {
67        Rule::file_item => {
68            for inner in item.into_inner() {
69                collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
70            }
71            Ok(())
72        }
73        Rule::command_invocation => {
74            flush_blank_lines(statements, pending_blank_lines);
75            statements.push(Statement::Command(build_command(item)?));
76            *line_has_content = true;
77            Ok(())
78        }
79        Rule::template_placeholder => {
80            flush_blank_lines(statements, pending_blank_lines);
81            statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
82            *line_has_content = true;
83            Ok(())
84        }
85        Rule::bracket_comment => {
86            let comment = Comment::Bracket(item.as_str().to_owned());
87            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
88                flush_blank_lines(statements, pending_blank_lines);
89                statements.push(Statement::Comment(comment));
90            }
91            *line_has_content = true;
92            Ok(())
93        }
94        Rule::line_comment => {
95            let comment = Comment::Line(item.as_str().to_owned());
96            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
97                flush_blank_lines(statements, pending_blank_lines);
98                statements.push(Statement::Comment(comment));
99            }
100            *line_has_content = true;
101            Ok(())
102        }
103        Rule::newline => {
104            if *line_has_content {
105                *line_has_content = false;
106            } else {
107                *pending_blank_lines += 1;
108            }
109            Ok(())
110        }
111        Rule::space | Rule::EOI => Ok(()),
112        other => Err(Error::Formatter(format!(
113            "unexpected top-level parser rule: {other:?}"
114        ))),
115    }
116}
117
118fn attach_trailing_comment(
119    statements: &mut [Statement],
120    comment: Comment,
121    line_has_content: bool,
122) -> Option<Comment> {
123    if !line_has_content {
124        return Some(comment);
125    }
126
127    match statements.last_mut() {
128        Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
129            command.trailing_comment = Some(comment);
130            None
131        }
132        _ => Some(comment),
133    }
134}
135
136fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
137    if *pending_blank_lines == 0 {
138        return;
139    }
140
141    match statements.last_mut() {
142        Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
143        _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
144    }
145
146    *pending_blank_lines = 0;
147}
148
149fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
150    debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
151
152    let span = pair.as_span();
153    let mut name = None;
154    let mut arguments = Vec::new();
155
156    for inner in pair.into_inner() {
157        match inner.as_rule() {
158            Rule::identifier => {
159                name = Some(inner.as_str().to_owned());
160            }
161            Rule::arguments => {
162                arguments = build_arguments(inner)?;
163            }
164            Rule::space => {}
165            other => {
166                return Err(Error::Formatter(format!(
167                    "unexpected command parser rule: {other:?}"
168                )));
169            }
170        }
171    }
172
173    Ok(CommandInvocation {
174        name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
175        arguments,
176        trailing_comment: None,
177        span: (span.start(), span.end()),
178    })
179}
180
181fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
182    debug_assert_eq!(pair.as_rule(), Rule::arguments);
183
184    let inner = pair.into_inner();
185    let mut args = Vec::with_capacity(inner.size_hint().0);
186
187    for p in inner {
188        collect_argument_part(p, &mut args)?;
189    }
190
191    Ok(args)
192}
193
194fn collect_argument_part(
195    pair: pest::iterators::Pair<'_, Rule>,
196    out: &mut Vec<Argument>,
197) -> Result<()> {
198    match pair.as_rule() {
199        Rule::argument_part => {
200            for inner in pair.into_inner() {
201                collect_argument_part(inner, out)?;
202            }
203            Ok(())
204        }
205        Rule::arguments => {
206            for inner in pair.into_inner() {
207                collect_argument_part(inner, out)?;
208            }
209            Ok(())
210        }
211        Rule::argument => {
212            let mut inner = pair.into_inner();
213            let argument = inner
214                .next()
215                .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
216            out.push(build_argument(argument)?);
217            Ok(())
218        }
219        Rule::bracket_comment => {
220            out.push(Argument::InlineComment(Comment::Bracket(
221                pair.as_str().to_owned(),
222            )));
223            Ok(())
224        }
225        Rule::line_ending => {
226            collect_line_ending_comments(pair, out);
227            Ok(())
228        }
229        Rule::space => Ok(()),
230        other => Err(Error::Formatter(format!(
231            "unexpected argument parser rule: {other:?}"
232        ))),
233    }
234}
235
236fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
237    for inner in pair.into_inner() {
238        if inner.as_rule() == Rule::line_comment {
239            out.push(Argument::InlineComment(Comment::Line(
240                inner.as_str().to_owned(),
241            )));
242        }
243    }
244}
245
246fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
247    match pair.as_rule() {
248        Rule::bracket_argument => {
249            let raw = pair.as_str().to_owned();
250            Ok(Argument::Bracket(validate_bracket_argument(raw)?))
251        }
252        Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
253        Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
254            Ok(Argument::Unquoted(pair.as_str().to_owned()))
255        }
256        other => Err(Error::Formatter(format!(
257            "unexpected argument rule: {other:?}"
258        ))),
259    }
260}
261
262/// Validate that a bracket argument's opening and closing "=" counts match.
263fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
264    let open_equals = raw
265        .strip_prefix('[')
266        .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
267        .bytes()
268        .take_while(|&b| b == b'=')
269        .count();
270
271    let close_equals = raw
272        .strip_suffix(']')
273        .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
274        .bytes()
275        .rev()
276        .take_while(|&b| b == b'=')
277        .count();
278
279    if open_equals != close_equals {
280        return Err(Error::Formatter(format!(
281            "invalid bracket argument delimiter: {raw}"
282        )));
283    }
284
285    Ok(BracketArgument {
286        level: open_equals,
287        raw,
288    })
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    fn parse_ok(src: &str) -> File {
296        parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
297    }
298
299    #[test]
300    fn empty_file() {
301        let f = parse_ok("");
302        assert!(f.statements.is_empty());
303    }
304
305    #[test]
306    fn simple_command() {
307        let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
308        assert_eq!(f.statements.len(), 1);
309        let Statement::Command(cmd) = &f.statements[0] else {
310            panic!()
311        };
312        assert_eq!(cmd.name, "cmake_minimum_required");
313        assert_eq!(cmd.arguments.len(), 2);
314        assert!(cmd.trailing_comment.is_none());
315    }
316
317    #[test]
318    fn command_no_args() {
319        let f = parse_ok("some_command()\n");
320        let Statement::Command(cmd) = &f.statements[0] else {
321            panic!()
322        };
323        assert!(cmd.arguments.is_empty());
324    }
325
326    #[test]
327    fn quoted_argument() {
328        let f = parse_ok("message(\"hello world\")\n");
329        let Statement::Command(cmd) = &f.statements[0] else {
330            panic!()
331        };
332        assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
333    }
334
335    #[test]
336    fn bracket_argument_zero_equals() {
337        let f = parse_ok("set(VAR [[hello]])\n");
338        let Statement::Command(cmd) = &f.statements[0] else {
339            panic!()
340        };
341        let Argument::Bracket(b) = &cmd.arguments[1] else {
342            panic!()
343        };
344        assert_eq!(b.level, 0);
345    }
346
347    #[test]
348    fn bracket_argument_one_equals() {
349        let f = parse_ok("set(VAR [=[hello]=])\n");
350        let Statement::Command(cmd) = &f.statements[0] else {
351            panic!()
352        };
353        let Argument::Bracket(b) = &cmd.arguments[1] else {
354            panic!()
355        };
356        assert_eq!(b.level, 1);
357    }
358
359    #[test]
360    fn bracket_argument_two_equals() {
361        let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
362        let Statement::Command(cmd) = &f.statements[0] else {
363            panic!()
364        };
365        let Argument::Bracket(b) = &cmd.arguments[1] else {
366            panic!()
367        };
368        assert_eq!(b.level, 2);
369    }
370
371    #[test]
372    fn invalid_bracket_argument_returns_error() {
373        let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
374        assert!(matches!(err, Error::Formatter(_)));
375    }
376
377    #[test]
378    fn line_comment_standalone() {
379        let f = parse_ok("# this is a comment\n");
380        assert!(matches!(
381            &f.statements[0],
382            Statement::Comment(Comment::Line(_))
383        ));
384    }
385
386    #[test]
387    fn bracket_comment() {
388        let f = parse_ok("#[[ multi\nline ]]\n");
389        assert!(matches!(
390            &f.statements[0],
391            Statement::Comment(Comment::Bracket(_))
392        ));
393    }
394
395    #[test]
396    fn variable_reference_in_unquoted() {
397        let f = parse_ok("message(${MY_VAR})\n");
398        let Statement::Command(cmd) = &f.statements[0] else {
399            panic!()
400        };
401        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
402    }
403
404    #[test]
405    fn env_variable_reference() {
406        let f = parse_ok("message($ENV{PATH})\n");
407        let Statement::Command(cmd) = &f.statements[0] else {
408            panic!()
409        };
410        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
411    }
412
413    #[test]
414    fn generator_expression() {
415        let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
416        let Statement::Command(cmd) = &f.statements[0] else {
417            panic!()
418        };
419        assert_eq!(cmd.arguments.len(), 2);
420    }
421
422    #[test]
423    fn multiline_argument_list() {
424        let src = "target_link_libraries(mylib\n    PUBLIC dep1\n    PRIVATE dep2\n)\n";
425        let f = parse_ok(src);
426        let Statement::Command(cmd) = &f.statements[0] else {
427            panic!()
428        };
429        assert_eq!(cmd.name, "target_link_libraries");
430        assert_eq!(cmd.arguments.len(), 5); // mylib PUBLIC dep1 PRIVATE dep2
431    }
432
433    #[test]
434    fn inline_bracket_comment_in_arguments() {
435        let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
436        let f = parse_ok(src);
437        let Statement::Command(cmd) = &f.statements[0] else {
438            panic!()
439        };
440        assert_eq!(cmd.arguments.len(), 3);
441        assert!(matches!(
442            &cmd.arguments[1],
443            Argument::InlineComment(Comment::Bracket(_))
444        ));
445    }
446
447    #[test]
448    fn line_comment_between_arguments() {
449        let src = "target_sources(foo\n  PRIVATE a.cc # keep grouping\n  b.cc\n)\n";
450        let f = parse_ok(src);
451        let Statement::Command(cmd) = &f.statements[0] else {
452            panic!()
453        };
454        assert!(cmd.arguments.iter().any(Argument::is_comment));
455    }
456
457    #[test]
458    fn trailing_comment_after_command() {
459        let src = "message(STATUS \"hello\") # trailing\n";
460        let f = parse_ok(src);
461        let Statement::Command(cmd) = &f.statements[0] else {
462            panic!()
463        };
464        assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
465    }
466
467    #[test]
468    fn file_without_final_newline() {
469        let f = parse_ok("project(MyProject)");
470        assert_eq!(f.statements.len(), 1);
471    }
472
473    #[test]
474    fn blank_lines_are_preserved() {
475        let f = parse_ok("message(foo)\n\nproject(bar)\n");
476        assert_eq!(f.statements.len(), 3);
477        assert!(matches!(f.statements[1], Statement::BlankLines(1)));
478    }
479
480    #[test]
481    fn leading_blank_lines_are_preserved() {
482        let f = parse_ok("\nmessage(foo)\n");
483        assert!(matches!(f.statements[0], Statement::BlankLines(1)));
484    }
485
486    #[test]
487    fn escape_sequences_in_quoted() {
488        let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
489        assert!(!f.statements.is_empty());
490    }
491
492    #[test]
493    fn escaped_quotes_in_quoted_argument_parse() {
494        let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
495        let Statement::Command(cmd) = &f.statements[0] else {
496            panic!()
497        };
498        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
499        assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
500    }
501
502    #[test]
503    fn multiple_commands() {
504        let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
505        let f = parse_ok(src);
506        assert_eq!(f.statements.len(), 2);
507    }
508
509    #[test]
510    fn nested_variable_reference() {
511        let f = parse_ok("message(${${OUTER}})\n");
512        let Statement::Command(cmd) = &f.statements[0] else {
513            panic!()
514        };
515        assert_eq!(cmd.arguments.len(), 1);
516    }
517
518    #[test]
519    fn underscore_command_name_is_valid() {
520        let f = parse_ok("_my_command(ARG)\n");
521        let Statement::Command(cmd) = &f.statements[0] else {
522            panic!()
523        };
524        assert_eq!(cmd.name, "_my_command");
525    }
526
527    #[test]
528    fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
529        let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
530        let Statement::Command(cmd) = &f.statements[0] else {
531            panic!()
532        };
533        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
534        assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
535    }
536
537    #[test]
538    fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
539        let f = parse_ok(concat!(
540            "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
541            "          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
542        ));
543        let Statement::Command(cmd) = &f.statements[0] else {
544            panic!()
545        };
546        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
547        assert_eq!(
548            args,
549            vec![
550                "NOT",
551                "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
552            ]
553        );
554    }
555
556    #[test]
557    fn source_file_with_utf8_bom_parses() {
558        let f = parse_ok("\u{FEFF}project(MyProject)\n");
559        assert_eq!(f.statements.len(), 1);
560    }
561
562    #[test]
563    fn top_level_template_placeholder_parses() {
564        let f = parse_ok("@PACKAGE_INIT@\n");
565        assert_eq!(
566            f.statements,
567            vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
568        );
569    }
570
571    #[test]
572    fn legacy_unquoted_argument_with_embedded_quotes_parses() {
573        let f = parse_ok("set(x -Da=\"b c\")\n");
574        let Statement::Command(cmd) = &f.statements[0] else {
575            panic!()
576        };
577        assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
578    }
579
580    #[test]
581    fn legacy_unquoted_argument_with_make_style_reference_parses() {
582        let f = parse_ok("set(x -Da=$(v))\n");
583        let Statement::Command(cmd) = &f.statements[0] else {
584            panic!()
585        };
586        assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
587    }
588
589    #[test]
590    fn legacy_unquoted_argument_with_embedded_parens_parses() {
591        let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ 	]+"(.+)"")"##);
592        let Statement::Command(cmd) = &f.statements[0] else {
593            panic!()
594        };
595        assert_eq!(
596            cmd.arguments[1].as_str(),
597            "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
598        );
599    }
600
601    #[test]
602    fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
603        let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
604        let Statement::Command(cmd) = &f.statements[0] else {
605            panic!()
606        };
607        assert_eq!(
608            cmd.arguments[2].as_str(),
609            "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
610        );
611    }
612}