Skip to main content

cmakefmt/parser/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Parser entry points for CMake source text.
6//!
7//! The grammar is defined in `parser/cmake.pest`, while
8//! [`crate::parser::ast`] contains the AST types returned by
9//! [`crate::parser::parse()`].
10
11use pest::Parser;
12use pest_derive::Parser;
13
14pub mod ast;
15
16/// Internal pest parser generated from `cmake.pest`.
17#[doc(hidden)]
18#[derive(Parser)]
19#[grammar = "parser/cmake.pest"]
20pub struct CmakeParser;
21
22use crate::error::{Error, Result};
23use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
24
25/// Parse CMake source text into an AST [`File`].
26///
27/// The returned AST preserves command structure, blank lines, and comments so
28/// the formatter can round-trip files with stable semantics.
29pub fn parse(source: &str) -> Result<File> {
30    let mut pairs = CmakeParser::parse(Rule::file, source).map_err(|e| Error::ParseContext {
31        display_name: "<source>".to_owned(),
32        source_text: source.to_owned().into_boxed_str(),
33        start_line: 1,
34        barrier_context: false,
35        diagnostic: crate::error::ParseDiagnostic::from_pest(&e),
36    })?;
37    let file_pair = pairs
38        .next()
39        .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
40
41    build_file(file_pair)
42}
43
44fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
45    debug_assert_eq!(pair.as_rule(), Rule::file);
46
47    let items = pair.into_inner();
48    let mut statements = Vec::with_capacity(items.size_hint().0);
49    let mut pending_blank_lines = 0usize;
50    let mut line_has_content = false;
51
52    for item in items {
53        collect_file_item(
54            item,
55            &mut statements,
56            &mut pending_blank_lines,
57            &mut line_has_content,
58        )?;
59    }
60
61    flush_blank_lines(&mut statements, &mut pending_blank_lines);
62    Ok(File { statements })
63}
64
65fn collect_file_item(
66    item: pest::iterators::Pair<'_, Rule>,
67    statements: &mut Vec<Statement>,
68    pending_blank_lines: &mut usize,
69    line_has_content: &mut bool,
70) -> Result<()> {
71    match item.as_rule() {
72        Rule::file_item => {
73            for inner in item.into_inner() {
74                collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
75            }
76            Ok(())
77        }
78        Rule::command_invocation => {
79            flush_blank_lines(statements, pending_blank_lines);
80            statements.push(Statement::Command(build_command(item)?));
81            *line_has_content = true;
82            Ok(())
83        }
84        Rule::template_placeholder => {
85            flush_blank_lines(statements, pending_blank_lines);
86            statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
87            *line_has_content = true;
88            Ok(())
89        }
90        Rule::bracket_comment => {
91            let comment = Comment::Bracket(item.as_str().to_owned());
92            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
93                flush_blank_lines(statements, pending_blank_lines);
94                statements.push(Statement::Comment(comment));
95            }
96            *line_has_content = true;
97            Ok(())
98        }
99        Rule::line_comment => {
100            let comment = Comment::Line(item.as_str().to_owned());
101            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
102                flush_blank_lines(statements, pending_blank_lines);
103                statements.push(Statement::Comment(comment));
104            }
105            *line_has_content = true;
106            Ok(())
107        }
108        Rule::newline => {
109            if *line_has_content {
110                *line_has_content = false;
111            } else {
112                *pending_blank_lines += 1;
113            }
114            Ok(())
115        }
116        Rule::space | Rule::EOI => Ok(()),
117        other => Err(Error::Formatter(format!(
118            "unexpected top-level parser rule: {other:?}"
119        ))),
120    }
121}
122
123fn attach_trailing_comment(
124    statements: &mut [Statement],
125    comment: Comment,
126    line_has_content: bool,
127) -> Option<Comment> {
128    if !line_has_content {
129        return Some(comment);
130    }
131
132    match statements.last_mut() {
133        Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
134            command.trailing_comment = Some(comment);
135            None
136        }
137        _ => Some(comment),
138    }
139}
140
141fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
142    if *pending_blank_lines == 0 {
143        return;
144    }
145
146    match statements.last_mut() {
147        Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
148        _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
149    }
150
151    *pending_blank_lines = 0;
152}
153
154fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
155    debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
156
157    let span = pair.as_span();
158    let mut name = None;
159    let mut arguments = Vec::new();
160
161    for inner in pair.into_inner() {
162        match inner.as_rule() {
163            Rule::identifier => {
164                name = Some(inner.as_str().to_owned());
165            }
166            Rule::arguments => {
167                arguments = build_arguments(inner)?;
168            }
169            Rule::space => {}
170            other => {
171                return Err(Error::Formatter(format!(
172                    "unexpected command parser rule: {other:?}"
173                )));
174            }
175        }
176    }
177
178    Ok(CommandInvocation {
179        name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
180        arguments,
181        trailing_comment: None,
182        span: (span.start(), span.end()),
183    })
184}
185
186fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
187    debug_assert_eq!(pair.as_rule(), Rule::arguments);
188
189    let inner = pair.into_inner();
190    let mut args = Vec::with_capacity(inner.size_hint().0);
191
192    for p in inner {
193        collect_argument_part(p, &mut args)?;
194    }
195
196    Ok(args)
197}
198
199fn collect_argument_part(
200    pair: pest::iterators::Pair<'_, Rule>,
201    out: &mut Vec<Argument>,
202) -> Result<()> {
203    match pair.as_rule() {
204        Rule::argument_part => {
205            for inner in pair.into_inner() {
206                collect_argument_part(inner, out)?;
207            }
208            Ok(())
209        }
210        Rule::arguments => {
211            for inner in pair.into_inner() {
212                collect_argument_part(inner, out)?;
213            }
214            Ok(())
215        }
216        Rule::argument => {
217            let mut inner = pair.into_inner();
218            let argument = inner
219                .next()
220                .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
221            out.push(build_argument(argument)?);
222            Ok(())
223        }
224        Rule::bracket_comment => {
225            out.push(Argument::InlineComment(Comment::Bracket(
226                pair.as_str().to_owned(),
227            )));
228            Ok(())
229        }
230        Rule::line_ending => {
231            collect_line_ending_comments(pair, out);
232            Ok(())
233        }
234        Rule::space => Ok(()),
235        other => Err(Error::Formatter(format!(
236            "unexpected argument parser rule: {other:?}"
237        ))),
238    }
239}
240
241fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
242    for inner in pair.into_inner() {
243        if inner.as_rule() == Rule::line_comment {
244            out.push(Argument::InlineComment(Comment::Line(
245                inner.as_str().to_owned(),
246            )));
247        }
248    }
249}
250
251fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
252    match pair.as_rule() {
253        Rule::bracket_argument => {
254            let raw = pair.as_str().to_owned();
255            Ok(Argument::Bracket(validate_bracket_argument(raw)?))
256        }
257        Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
258        Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
259            Ok(Argument::Unquoted(pair.as_str().to_owned()))
260        }
261        other => Err(Error::Formatter(format!(
262            "unexpected argument rule: {other:?}"
263        ))),
264    }
265}
266
267/// Validate that a bracket argument's opening and closing "=" counts match.
268fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
269    let open_equals = raw
270        .strip_prefix('[')
271        .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
272        .bytes()
273        .take_while(|&b| b == b'=')
274        .count();
275
276    let close_equals = raw
277        .strip_suffix(']')
278        .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
279        .bytes()
280        .rev()
281        .take_while(|&b| b == b'=')
282        .count();
283
284    if open_equals != close_equals {
285        return Err(Error::Formatter(format!(
286            "invalid bracket argument delimiter: {raw}"
287        )));
288    }
289
290    Ok(BracketArgument {
291        level: open_equals,
292        raw,
293    })
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    fn parse_ok(src: &str) -> File {
301        parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
302    }
303
304    #[test]
305    fn empty_file() {
306        let f = parse_ok("");
307        assert!(f.statements.is_empty());
308    }
309
310    #[test]
311    fn simple_command() {
312        let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
313        assert_eq!(f.statements.len(), 1);
314        let Statement::Command(cmd) = &f.statements[0] else {
315            panic!()
316        };
317        assert_eq!(cmd.name, "cmake_minimum_required");
318        assert_eq!(cmd.arguments.len(), 2);
319        assert!(cmd.trailing_comment.is_none());
320    }
321
322    #[test]
323    fn command_no_args() {
324        let f = parse_ok("some_command()\n");
325        let Statement::Command(cmd) = &f.statements[0] else {
326            panic!()
327        };
328        assert!(cmd.arguments.is_empty());
329    }
330
331    #[test]
332    fn quoted_argument() {
333        let f = parse_ok("message(\"hello world\")\n");
334        let Statement::Command(cmd) = &f.statements[0] else {
335            panic!()
336        };
337        assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
338    }
339
340    #[test]
341    fn bracket_argument_zero_equals() {
342        let f = parse_ok("set(VAR [[hello]])\n");
343        let Statement::Command(cmd) = &f.statements[0] else {
344            panic!()
345        };
346        let Argument::Bracket(b) = &cmd.arguments[1] else {
347            panic!()
348        };
349        assert_eq!(b.level, 0);
350    }
351
352    #[test]
353    fn bracket_argument_one_equals() {
354        let f = parse_ok("set(VAR [=[hello]=])\n");
355        let Statement::Command(cmd) = &f.statements[0] else {
356            panic!()
357        };
358        let Argument::Bracket(b) = &cmd.arguments[1] else {
359            panic!()
360        };
361        assert_eq!(b.level, 1);
362    }
363
364    #[test]
365    fn bracket_argument_two_equals() {
366        let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
367        let Statement::Command(cmd) = &f.statements[0] else {
368            panic!()
369        };
370        let Argument::Bracket(b) = &cmd.arguments[1] else {
371            panic!()
372        };
373        assert_eq!(b.level, 2);
374    }
375
376    #[test]
377    fn invalid_bracket_argument_returns_error() {
378        let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
379        assert!(matches!(err, Error::Formatter(_)));
380    }
381
382    #[test]
383    fn invalid_syntax_returns_parse_context_with_crate_owned_diagnostic() {
384        let err = parse("message(\n").unwrap_err();
385        let Error::ParseContext {
386            display_name,
387            source_text,
388            start_line,
389            barrier_context,
390            diagnostic,
391        } = err
392        else {
393            panic!("expected parse context error");
394        };
395
396        assert_eq!(display_name, "<source>");
397        assert_eq!(source_text.as_ref(), "message(\n");
398        assert_eq!(start_line, 1);
399        assert!(!barrier_context);
400        assert!(
401            diagnostic.message.contains("expected"),
402            "unexpected parse diagnostic: {diagnostic:?}"
403        );
404        assert_eq!(diagnostic.line, 2);
405        assert_eq!(diagnostic.column, 1);
406    }
407
408    #[test]
409    fn line_comment_standalone() {
410        let f = parse_ok("# this is a comment\n");
411        assert!(matches!(
412            &f.statements[0],
413            Statement::Comment(Comment::Line(_))
414        ));
415    }
416
417    #[test]
418    fn bracket_comment() {
419        let f = parse_ok("#[[ multi\nline ]]\n");
420        assert!(matches!(
421            &f.statements[0],
422            Statement::Comment(Comment::Bracket(_))
423        ));
424    }
425
426    #[test]
427    fn variable_reference_in_unquoted() {
428        let f = parse_ok("message(${MY_VAR})\n");
429        let Statement::Command(cmd) = &f.statements[0] else {
430            panic!()
431        };
432        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
433    }
434
435    #[test]
436    fn env_variable_reference() {
437        let f = parse_ok("message($ENV{PATH})\n");
438        let Statement::Command(cmd) = &f.statements[0] else {
439            panic!()
440        };
441        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
442    }
443
444    #[test]
445    fn generator_expression() {
446        let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
447        let Statement::Command(cmd) = &f.statements[0] else {
448            panic!()
449        };
450        assert_eq!(cmd.arguments.len(), 2);
451    }
452
453    #[test]
454    fn multiline_argument_list() {
455        let src = "target_link_libraries(mylib\n    PUBLIC dep1\n    PRIVATE dep2\n)\n";
456        let f = parse_ok(src);
457        let Statement::Command(cmd) = &f.statements[0] else {
458            panic!()
459        };
460        assert_eq!(cmd.name, "target_link_libraries");
461        assert_eq!(cmd.arguments.len(), 5); // mylib PUBLIC dep1 PRIVATE dep2
462    }
463
464    #[test]
465    fn inline_bracket_comment_in_arguments() {
466        let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
467        let f = parse_ok(src);
468        let Statement::Command(cmd) = &f.statements[0] else {
469            panic!()
470        };
471        assert_eq!(cmd.arguments.len(), 3);
472        assert!(matches!(
473            &cmd.arguments[1],
474            Argument::InlineComment(Comment::Bracket(_))
475        ));
476    }
477
478    #[test]
479    fn line_comment_between_arguments() {
480        let src = "target_sources(foo\n  PRIVATE a.cc # keep grouping\n  b.cc\n)\n";
481        let f = parse_ok(src);
482        let Statement::Command(cmd) = &f.statements[0] else {
483            panic!()
484        };
485        assert!(cmd.arguments.iter().any(Argument::is_comment));
486    }
487
488    #[test]
489    fn trailing_comment_after_command() {
490        let src = "message(STATUS \"hello\") # trailing\n";
491        let f = parse_ok(src);
492        let Statement::Command(cmd) = &f.statements[0] else {
493            panic!()
494        };
495        assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
496    }
497
498    #[test]
499    fn file_without_final_newline() {
500        let f = parse_ok("project(MyProject)");
501        assert_eq!(f.statements.len(), 1);
502    }
503
504    #[test]
505    fn blank_lines_are_preserved() {
506        let f = parse_ok("message(foo)\n\nproject(bar)\n");
507        assert_eq!(f.statements.len(), 3);
508        assert!(matches!(f.statements[1], Statement::BlankLines(1)));
509    }
510
511    #[test]
512    fn leading_blank_lines_are_preserved() {
513        let f = parse_ok("\nmessage(foo)\n");
514        assert!(matches!(f.statements[0], Statement::BlankLines(1)));
515    }
516
517    #[test]
518    fn escape_sequences_in_quoted() {
519        let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
520        assert!(!f.statements.is_empty());
521    }
522
523    #[test]
524    fn escaped_quotes_in_quoted_argument_parse() {
525        let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
526        let Statement::Command(cmd) = &f.statements[0] else {
527            panic!()
528        };
529        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
530        assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
531    }
532
533    #[test]
534    fn multiple_commands() {
535        let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
536        let f = parse_ok(src);
537        assert_eq!(f.statements.len(), 2);
538    }
539
540    #[test]
541    fn nested_variable_reference() {
542        let f = parse_ok("message(${${OUTER}})\n");
543        let Statement::Command(cmd) = &f.statements[0] else {
544            panic!()
545        };
546        assert_eq!(cmd.arguments.len(), 1);
547    }
548
549    #[test]
550    fn underscore_command_name_is_valid() {
551        let f = parse_ok("_my_command(ARG)\n");
552        let Statement::Command(cmd) = &f.statements[0] else {
553            panic!()
554        };
555        assert_eq!(cmd.name, "_my_command");
556    }
557
558    #[test]
559    fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
560        let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
561        let Statement::Command(cmd) = &f.statements[0] else {
562            panic!()
563        };
564        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
565        assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
566    }
567
568    #[test]
569    fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
570        let f = parse_ok(concat!(
571            "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
572            "          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
573        ));
574        let Statement::Command(cmd) = &f.statements[0] else {
575            panic!()
576        };
577        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
578        assert_eq!(
579            args,
580            vec![
581                "NOT",
582                "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
583            ]
584        );
585    }
586
587    #[test]
588    fn source_file_with_utf8_bom_parses() {
589        let f = parse_ok("\u{FEFF}project(MyProject)\n");
590        assert_eq!(f.statements.len(), 1);
591    }
592
593    #[test]
594    fn top_level_template_placeholder_parses() {
595        let f = parse_ok("@PACKAGE_INIT@\n");
596        assert_eq!(
597            f.statements,
598            vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
599        );
600    }
601
602    #[test]
603    fn legacy_unquoted_argument_with_embedded_quotes_parses() {
604        let f = parse_ok("set(x -Da=\"b c\")\n");
605        let Statement::Command(cmd) = &f.statements[0] else {
606            panic!()
607        };
608        assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
609    }
610
611    #[test]
612    fn legacy_unquoted_argument_with_make_style_reference_parses() {
613        let f = parse_ok("set(x -Da=$(v))\n");
614        let Statement::Command(cmd) = &f.statements[0] else {
615            panic!()
616        };
617        assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
618    }
619
620    #[test]
621    fn legacy_unquoted_argument_with_embedded_parens_parses() {
622        let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ 	]+"(.+)"")"##);
623        let Statement::Command(cmd) = &f.statements[0] else {
624            panic!()
625        };
626        assert_eq!(
627            cmd.arguments[1].as_str(),
628            "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
629        );
630    }
631
632    #[test]
633    fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
634        let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
635        let Statement::Command(cmd) = &f.statements[0] else {
636            panic!()
637        };
638        assert_eq!(
639            cmd.arguments[2].as_str(),
640            "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
641        );
642    }
643}