Skip to main content

cmakefmt/parser/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Parser entry points for CMake source text.
6//!
7//! The grammar is defined in `parser/cmake.pest`, while
8//! [`crate::parser::ast`] contains the AST types returned by
9//! [`crate::parser::parse()`].
10
11use pest::Parser;
12
13pub mod ast;
14
15mod generated {
16    use pest_derive::Parser;
17
18    /// Internal pest parser generated from `cmake.pest`.
19    #[derive(Parser)]
20    #[grammar = "parser/cmake.pest"]
21    pub(super) struct CmakeParser;
22}
23
24use generated::{CmakeParser, Rule};
25
26use crate::error::{Error, Result};
27use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
28
29/// Parse CMake source text into an AST [`File`].
30///
31/// The returned AST preserves command structure, blank lines, and comments so
32/// the formatter can round-trip files with stable semantics.
33pub fn parse(source: &str) -> Result<File> {
34    let mut pairs = CmakeParser::parse(Rule::file, source).map_err(|e| {
35        Error::Parse(crate::error::ParseError {
36            display_name: "<source>".to_owned(),
37            source_text: source.to_owned().into_boxed_str(),
38            start_line: 1,
39            diagnostic: crate::error::ParseDiagnostic::from_pest(&e),
40        })
41    })?;
42    let file_pair = pairs
43        .next()
44        .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
45
46    build_file(file_pair)
47}
48
49fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
50    debug_assert_eq!(pair.as_rule(), Rule::file);
51
52    let items = pair.into_inner();
53    let mut statements = Vec::with_capacity(items.size_hint().0);
54    let mut pending_blank_lines = 0usize;
55    let mut line_has_content = false;
56
57    for item in items {
58        collect_file_item(
59            item,
60            &mut statements,
61            &mut pending_blank_lines,
62            &mut line_has_content,
63        )?;
64    }
65
66    flush_blank_lines(&mut statements, &mut pending_blank_lines);
67    Ok(File { statements })
68}
69
70fn collect_file_item(
71    item: pest::iterators::Pair<'_, Rule>,
72    statements: &mut Vec<Statement>,
73    pending_blank_lines: &mut usize,
74    line_has_content: &mut bool,
75) -> Result<()> {
76    match item.as_rule() {
77        Rule::file_item => {
78            for inner in item.into_inner() {
79                collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
80            }
81            Ok(())
82        }
83        Rule::command_invocation => {
84            flush_blank_lines(statements, pending_blank_lines);
85            statements.push(Statement::Command(build_command(item)?));
86            *line_has_content = true;
87            Ok(())
88        }
89        Rule::template_placeholder => {
90            flush_blank_lines(statements, pending_blank_lines);
91            statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
92            *line_has_content = true;
93            Ok(())
94        }
95        Rule::bracket_comment => {
96            let comment = Comment::Bracket(item.as_str().to_owned());
97            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
98                flush_blank_lines(statements, pending_blank_lines);
99                statements.push(Statement::Comment(comment));
100            }
101            *line_has_content = true;
102            Ok(())
103        }
104        Rule::line_comment => {
105            let comment = Comment::Line(item.as_str().to_owned());
106            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
107                flush_blank_lines(statements, pending_blank_lines);
108                statements.push(Statement::Comment(comment));
109            }
110            *line_has_content = true;
111            Ok(())
112        }
113        Rule::newline => {
114            if *line_has_content {
115                *line_has_content = false;
116            } else {
117                *pending_blank_lines += 1;
118            }
119            Ok(())
120        }
121        Rule::space | Rule::EOI => Ok(()),
122        other => Err(Error::Formatter(format!(
123            "unexpected top-level parser rule: {other:?}"
124        ))),
125    }
126}
127
128fn attach_trailing_comment(
129    statements: &mut [Statement],
130    comment: Comment,
131    line_has_content: bool,
132) -> Option<Comment> {
133    if !line_has_content {
134        return Some(comment);
135    }
136
137    match statements.last_mut() {
138        Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
139            command.trailing_comment = Some(comment);
140            None
141        }
142        _ => Some(comment),
143    }
144}
145
146fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
147    if *pending_blank_lines == 0 {
148        return;
149    }
150
151    match statements.last_mut() {
152        Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
153        _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
154    }
155
156    *pending_blank_lines = 0;
157}
158
159fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
160    debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
161
162    let span = pair.as_span();
163    let mut name = None;
164    let mut arguments = Vec::new();
165
166    for inner in pair.into_inner() {
167        match inner.as_rule() {
168            Rule::identifier => {
169                name = Some(inner.as_str().to_owned());
170            }
171            Rule::arguments => {
172                arguments = build_arguments(inner)?;
173            }
174            Rule::space => {}
175            other => {
176                return Err(Error::Formatter(format!(
177                    "unexpected command parser rule: {other:?}"
178                )));
179            }
180        }
181    }
182
183    Ok(CommandInvocation {
184        name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
185        arguments,
186        trailing_comment: None,
187        span: (span.start(), span.end()),
188    })
189}
190
191fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
192    debug_assert_eq!(pair.as_rule(), Rule::arguments);
193
194    let inner = pair.into_inner();
195    let mut args = Vec::with_capacity(inner.size_hint().0);
196
197    for p in inner {
198        collect_argument_part(p, &mut args)?;
199    }
200
201    Ok(args)
202}
203
204fn collect_argument_part(
205    pair: pest::iterators::Pair<'_, Rule>,
206    out: &mut Vec<Argument>,
207) -> Result<()> {
208    match pair.as_rule() {
209        Rule::argument_part => {
210            for inner in pair.into_inner() {
211                collect_argument_part(inner, out)?;
212            }
213            Ok(())
214        }
215        Rule::arguments => {
216            for inner in pair.into_inner() {
217                collect_argument_part(inner, out)?;
218            }
219            Ok(())
220        }
221        Rule::argument => {
222            let mut inner = pair.into_inner();
223            let argument = inner
224                .next()
225                .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
226            out.push(build_argument(argument)?);
227            Ok(())
228        }
229        Rule::bracket_comment => {
230            out.push(Argument::InlineComment(Comment::Bracket(
231                pair.as_str().to_owned(),
232            )));
233            Ok(())
234        }
235        Rule::line_ending => {
236            collect_line_ending_comments(pair, out);
237            Ok(())
238        }
239        Rule::space => Ok(()),
240        other => Err(Error::Formatter(format!(
241            "unexpected argument parser rule: {other:?}"
242        ))),
243    }
244}
245
246fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
247    for inner in pair.into_inner() {
248        if inner.as_rule() == Rule::line_comment {
249            out.push(Argument::InlineComment(Comment::Line(
250                inner.as_str().to_owned(),
251            )));
252        }
253    }
254}
255
256fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
257    match pair.as_rule() {
258        Rule::bracket_argument => {
259            let raw = pair.as_str().to_owned();
260            Ok(Argument::Bracket(validate_bracket_argument(raw)?))
261        }
262        Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
263        Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
264            Ok(Argument::Unquoted(pair.as_str().to_owned()))
265        }
266        other => Err(Error::Formatter(format!(
267            "unexpected argument rule: {other:?}"
268        ))),
269    }
270}
271
272/// Validate that a bracket argument's opening and closing "=" counts match.
273fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
274    let open_equals = raw
275        .strip_prefix('[')
276        .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
277        .bytes()
278        .take_while(|&b| b == b'=')
279        .count();
280
281    let close_equals = raw
282        .strip_suffix(']')
283        .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
284        .bytes()
285        .rev()
286        .take_while(|&b| b == b'=')
287        .count();
288
289    if open_equals != close_equals {
290        return Err(Error::Formatter(format!(
291            "invalid bracket argument delimiter: {raw}"
292        )));
293    }
294
295    Ok(BracketArgument {
296        level: open_equals,
297        raw,
298    })
299}
300
301#[cfg(test)]
302mod tests {
303    use super::*;
304
305    fn parse_ok(src: &str) -> File {
306        parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
307    }
308
309    #[test]
310    fn empty_file() {
311        let f = parse_ok("");
312        assert!(f.statements.is_empty());
313    }
314
315    #[test]
316    fn simple_command() {
317        let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
318        assert_eq!(f.statements.len(), 1);
319        let Statement::Command(cmd) = &f.statements[0] else {
320            panic!()
321        };
322        assert_eq!(cmd.name, "cmake_minimum_required");
323        assert_eq!(cmd.arguments.len(), 2);
324        assert!(cmd.trailing_comment.is_none());
325    }
326
327    #[test]
328    fn command_no_args() {
329        let f = parse_ok("some_command()\n");
330        let Statement::Command(cmd) = &f.statements[0] else {
331            panic!()
332        };
333        assert!(cmd.arguments.is_empty());
334    }
335
336    #[test]
337    fn quoted_argument() {
338        let f = parse_ok("message(\"hello world\")\n");
339        let Statement::Command(cmd) = &f.statements[0] else {
340            panic!()
341        };
342        assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
343    }
344
345    #[test]
346    fn bracket_argument_zero_equals() {
347        let f = parse_ok("set(VAR [[hello]])\n");
348        let Statement::Command(cmd) = &f.statements[0] else {
349            panic!()
350        };
351        let Argument::Bracket(b) = &cmd.arguments[1] else {
352            panic!()
353        };
354        assert_eq!(b.level, 0);
355    }
356
357    #[test]
358    fn bracket_argument_one_equals() {
359        let f = parse_ok("set(VAR [=[hello]=])\n");
360        let Statement::Command(cmd) = &f.statements[0] else {
361            panic!()
362        };
363        let Argument::Bracket(b) = &cmd.arguments[1] else {
364            panic!()
365        };
366        assert_eq!(b.level, 1);
367    }
368
369    #[test]
370    fn bracket_argument_two_equals() {
371        let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
372        let Statement::Command(cmd) = &f.statements[0] else {
373            panic!()
374        };
375        let Argument::Bracket(b) = &cmd.arguments[1] else {
376            panic!()
377        };
378        assert_eq!(b.level, 2);
379    }
380
381    #[test]
382    fn invalid_bracket_argument_returns_error() {
383        let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
384        assert!(matches!(err, Error::Formatter(_)));
385    }
386
387    #[test]
388    fn invalid_syntax_returns_parse_error_with_crate_owned_diagnostic() {
389        let err = parse("message(\n").unwrap_err();
390        let Error::Parse(parse_err) = err else {
391            panic!("expected parse error");
392        };
393
394        assert_eq!(parse_err.display_name, "<source>");
395        assert_eq!(parse_err.source_text.as_ref(), "message(\n");
396        assert_eq!(parse_err.start_line, 1);
397        assert!(
398            parse_err.diagnostic.message.contains("expected"),
399            "unexpected parse diagnostic: {:?}",
400            parse_err.diagnostic
401        );
402        assert_eq!(parse_err.diagnostic.line, 2);
403        assert_eq!(parse_err.diagnostic.column, 1);
404    }
405
406    #[test]
407    fn line_comment_standalone() {
408        let f = parse_ok("# this is a comment\n");
409        assert!(matches!(
410            &f.statements[0],
411            Statement::Comment(Comment::Line(_))
412        ));
413    }
414
415    #[test]
416    fn bracket_comment() {
417        let f = parse_ok("#[[ multi\nline ]]\n");
418        assert!(matches!(
419            &f.statements[0],
420            Statement::Comment(Comment::Bracket(_))
421        ));
422    }
423
424    #[test]
425    fn variable_reference_in_unquoted() {
426        let f = parse_ok("message(${MY_VAR})\n");
427        let Statement::Command(cmd) = &f.statements[0] else {
428            panic!()
429        };
430        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
431    }
432
433    #[test]
434    fn env_variable_reference() {
435        let f = parse_ok("message($ENV{PATH})\n");
436        let Statement::Command(cmd) = &f.statements[0] else {
437            panic!()
438        };
439        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
440    }
441
442    #[test]
443    fn generator_expression() {
444        let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
445        let Statement::Command(cmd) = &f.statements[0] else {
446            panic!()
447        };
448        assert_eq!(cmd.arguments.len(), 2);
449    }
450
451    #[test]
452    fn multiline_argument_list() {
453        let src = "target_link_libraries(mylib\n    PUBLIC dep1\n    PRIVATE dep2\n)\n";
454        let f = parse_ok(src);
455        let Statement::Command(cmd) = &f.statements[0] else {
456            panic!()
457        };
458        assert_eq!(cmd.name, "target_link_libraries");
459        assert_eq!(cmd.arguments.len(), 5); // mylib PUBLIC dep1 PRIVATE dep2
460    }
461
462    #[test]
463    fn inline_bracket_comment_in_arguments() {
464        let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
465        let f = parse_ok(src);
466        let Statement::Command(cmd) = &f.statements[0] else {
467            panic!()
468        };
469        assert_eq!(cmd.arguments.len(), 3);
470        assert!(matches!(
471            &cmd.arguments[1],
472            Argument::InlineComment(Comment::Bracket(_))
473        ));
474    }
475
476    #[test]
477    fn line_comment_between_arguments() {
478        let src = "target_sources(foo\n  PRIVATE a.cc # keep grouping\n  b.cc\n)\n";
479        let f = parse_ok(src);
480        let Statement::Command(cmd) = &f.statements[0] else {
481            panic!()
482        };
483        assert!(cmd.arguments.iter().any(Argument::is_comment));
484    }
485
486    #[test]
487    fn trailing_comment_after_command() {
488        let src = "message(STATUS \"hello\") # trailing\n";
489        let f = parse_ok(src);
490        let Statement::Command(cmd) = &f.statements[0] else {
491            panic!()
492        };
493        assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
494    }
495
496    #[test]
497    fn file_without_final_newline() {
498        let f = parse_ok("project(MyProject)");
499        assert_eq!(f.statements.len(), 1);
500    }
501
502    #[test]
503    fn blank_lines_are_preserved() {
504        let f = parse_ok("message(foo)\n\nproject(bar)\n");
505        assert_eq!(f.statements.len(), 3);
506        assert!(matches!(f.statements[1], Statement::BlankLines(1)));
507    }
508
509    #[test]
510    fn leading_blank_lines_are_preserved() {
511        let f = parse_ok("\nmessage(foo)\n");
512        assert!(matches!(f.statements[0], Statement::BlankLines(1)));
513    }
514
515    #[test]
516    fn escape_sequences_in_quoted() {
517        let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
518        assert!(!f.statements.is_empty());
519    }
520
521    #[test]
522    fn escaped_quotes_in_quoted_argument_parse() {
523        let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
524        let Statement::Command(cmd) = &f.statements[0] else {
525            panic!()
526        };
527        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
528        assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
529    }
530
531    #[test]
532    fn multiple_commands() {
533        let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
534        let f = parse_ok(src);
535        assert_eq!(f.statements.len(), 2);
536    }
537
538    #[test]
539    fn nested_variable_reference() {
540        let f = parse_ok("message(${${OUTER}})\n");
541        let Statement::Command(cmd) = &f.statements[0] else {
542            panic!()
543        };
544        assert_eq!(cmd.arguments.len(), 1);
545    }
546
547    #[test]
548    fn underscore_command_name_is_valid() {
549        let f = parse_ok("_my_command(ARG)\n");
550        let Statement::Command(cmd) = &f.statements[0] else {
551            panic!()
552        };
553        assert_eq!(cmd.name, "_my_command");
554    }
555
556    #[test]
557    fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
558        let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
559        let Statement::Command(cmd) = &f.statements[0] else {
560            panic!()
561        };
562        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
563        assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
564    }
565
566    #[test]
567    fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
568        let f = parse_ok(concat!(
569            "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
570            "          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
571        ));
572        let Statement::Command(cmd) = &f.statements[0] else {
573            panic!()
574        };
575        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
576        assert_eq!(
577            args,
578            vec![
579                "NOT",
580                "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
581            ]
582        );
583    }
584
585    #[test]
586    fn source_file_with_utf8_bom_parses() {
587        let f = parse_ok("\u{FEFF}project(MyProject)\n");
588        assert_eq!(f.statements.len(), 1);
589    }
590
591    #[test]
592    fn top_level_template_placeholder_parses() {
593        let f = parse_ok("@PACKAGE_INIT@\n");
594        assert_eq!(
595            f.statements,
596            vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
597        );
598    }
599
600    #[test]
601    fn legacy_unquoted_argument_with_embedded_quotes_parses() {
602        let f = parse_ok("set(x -Da=\"b c\")\n");
603        let Statement::Command(cmd) = &f.statements[0] else {
604            panic!()
605        };
606        assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
607    }
608
609    #[test]
610    fn legacy_unquoted_argument_with_make_style_reference_parses() {
611        let f = parse_ok("set(x -Da=$(v))\n");
612        let Statement::Command(cmd) = &f.statements[0] else {
613            panic!()
614        };
615        assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
616    }
617
618    #[test]
619    fn legacy_unquoted_argument_with_embedded_parens_parses() {
620        let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ 	]+"(.+)"")"##);
621        let Statement::Command(cmd) = &f.statements[0] else {
622            panic!()
623        };
624        assert_eq!(
625            cmd.arguments[1].as_str(),
626            "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
627        );
628    }
629
630    #[test]
631    fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
632        let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
633        let Statement::Command(cmd) = &f.statements[0] else {
634            panic!()
635        };
636        assert_eq!(
637            cmd.arguments[2].as_str(),
638            "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
639        );
640    }
641}