Skip to main content

cmakefmt/parser/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Parser entry points for CMake source text.
6//!
7//! The grammar is defined in `parser/cmake.pest`, while
8//! [`crate::parser::ast`] contains the AST types returned by
9//! [`crate::parser::parse()`].
10
11use pest::Parser;
12use pest_derive::Parser;
13
14pub mod ast;
15
16/// Internal pest parser generated from `cmake.pest`.
17#[derive(Parser)]
18#[grammar = "parser/cmake.pest"]
19pub struct CmakeParser;
20
21use crate::error::{Error, Result};
22use ast::{Argument, BracketArgument, CommandInvocation, Comment, File, Statement};
23
24/// Parse CMake source text into an AST [`File`].
25///
26/// The returned AST preserves command structure, blank lines, and comments so
27/// the formatter can round-trip files with stable semantics.
28pub fn parse(source: &str) -> Result<File> {
29    let mut pairs =
30        CmakeParser::parse(Rule::file, source).map_err(|e| Error::Parse(Box::new(e)))?;
31    let file_pair = pairs
32        .next()
33        .ok_or_else(|| Error::Formatter("parser did not return a file pair".to_owned()))?;
34
35    build_file(file_pair)
36}
37
38fn build_file(pair: pest::iterators::Pair<'_, Rule>) -> Result<File> {
39    debug_assert_eq!(pair.as_rule(), Rule::file);
40
41    let items = pair.into_inner();
42    let mut statements = Vec::with_capacity(items.size_hint().0);
43    let mut pending_blank_lines = 0usize;
44    let mut line_has_content = false;
45
46    for item in items {
47        collect_file_item(
48            item,
49            &mut statements,
50            &mut pending_blank_lines,
51            &mut line_has_content,
52        )?;
53    }
54
55    flush_blank_lines(&mut statements, &mut pending_blank_lines);
56    Ok(File { statements })
57}
58
59fn collect_file_item(
60    item: pest::iterators::Pair<'_, Rule>,
61    statements: &mut Vec<Statement>,
62    pending_blank_lines: &mut usize,
63    line_has_content: &mut bool,
64) -> Result<()> {
65    match item.as_rule() {
66        Rule::file_item => {
67            for inner in item.into_inner() {
68                collect_file_item(inner, statements, pending_blank_lines, line_has_content)?;
69            }
70            Ok(())
71        }
72        Rule::command_invocation => {
73            flush_blank_lines(statements, pending_blank_lines);
74            statements.push(Statement::Command(build_command(item)?));
75            *line_has_content = true;
76            Ok(())
77        }
78        Rule::template_placeholder => {
79            flush_blank_lines(statements, pending_blank_lines);
80            statements.push(Statement::TemplatePlaceholder(item.as_str().to_owned()));
81            *line_has_content = true;
82            Ok(())
83        }
84        Rule::bracket_comment => {
85            let comment = Comment::Bracket(item.as_str().to_owned());
86            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
87                flush_blank_lines(statements, pending_blank_lines);
88                statements.push(Statement::Comment(comment));
89            }
90            *line_has_content = true;
91            Ok(())
92        }
93        Rule::line_comment => {
94            let comment = Comment::Line(item.as_str().to_owned());
95            if let Some(comment) = attach_trailing_comment(statements, comment, *line_has_content) {
96                flush_blank_lines(statements, pending_blank_lines);
97                statements.push(Statement::Comment(comment));
98            }
99            *line_has_content = true;
100            Ok(())
101        }
102        Rule::newline => {
103            if *line_has_content {
104                *line_has_content = false;
105            } else {
106                *pending_blank_lines += 1;
107            }
108            Ok(())
109        }
110        Rule::space | Rule::EOI => Ok(()),
111        other => Err(Error::Formatter(format!(
112            "unexpected top-level parser rule: {other:?}"
113        ))),
114    }
115}
116
117fn attach_trailing_comment(
118    statements: &mut [Statement],
119    comment: Comment,
120    line_has_content: bool,
121) -> Option<Comment> {
122    if !line_has_content {
123        return Some(comment);
124    }
125
126    match statements.last_mut() {
127        Some(Statement::Command(command)) if command.trailing_comment.is_none() => {
128            command.trailing_comment = Some(comment);
129            None
130        }
131        _ => Some(comment),
132    }
133}
134
135fn flush_blank_lines(statements: &mut Vec<Statement>, pending_blank_lines: &mut usize) {
136    if *pending_blank_lines == 0 {
137        return;
138    }
139
140    match statements.last_mut() {
141        Some(Statement::BlankLines(count)) => *count += *pending_blank_lines,
142        _ => statements.push(Statement::BlankLines(*pending_blank_lines)),
143    }
144
145    *pending_blank_lines = 0;
146}
147
148fn build_command(pair: pest::iterators::Pair<'_, Rule>) -> Result<CommandInvocation> {
149    debug_assert_eq!(pair.as_rule(), Rule::command_invocation);
150
151    let span = pair.as_span();
152    let mut name = None;
153    let mut arguments = Vec::new();
154
155    for inner in pair.into_inner() {
156        match inner.as_rule() {
157            Rule::identifier => {
158                name = Some(inner.as_str().to_owned());
159            }
160            Rule::arguments => {
161                arguments = build_arguments(inner)?;
162            }
163            Rule::space => {}
164            other => {
165                return Err(Error::Formatter(format!(
166                    "unexpected command parser rule: {other:?}"
167                )));
168            }
169        }
170    }
171
172    Ok(CommandInvocation {
173        name: name.ok_or_else(|| Error::Formatter("command missing identifier".to_owned()))?,
174        arguments,
175        trailing_comment: None,
176        span: (span.start(), span.end()),
177    })
178}
179
180fn build_arguments(pair: pest::iterators::Pair<'_, Rule>) -> Result<Vec<Argument>> {
181    debug_assert_eq!(pair.as_rule(), Rule::arguments);
182
183    let inner = pair.into_inner();
184    let mut args = Vec::with_capacity(inner.size_hint().0);
185
186    for p in inner {
187        collect_argument_part(p, &mut args)?;
188    }
189
190    Ok(args)
191}
192
193fn collect_argument_part(
194    pair: pest::iterators::Pair<'_, Rule>,
195    out: &mut Vec<Argument>,
196) -> Result<()> {
197    match pair.as_rule() {
198        Rule::argument_part => {
199            for inner in pair.into_inner() {
200                collect_argument_part(inner, out)?;
201            }
202            Ok(())
203        }
204        Rule::arguments => {
205            for inner in pair.into_inner() {
206                collect_argument_part(inner, out)?;
207            }
208            Ok(())
209        }
210        Rule::argument => {
211            let mut inner = pair.into_inner();
212            let argument = inner
213                .next()
214                .ok_or_else(|| Error::Formatter("argument missing child node".to_owned()))?;
215            out.push(build_argument(argument)?);
216            Ok(())
217        }
218        Rule::bracket_comment => {
219            out.push(Argument::InlineComment(Comment::Bracket(
220                pair.as_str().to_owned(),
221            )));
222            Ok(())
223        }
224        Rule::line_ending => {
225            collect_line_ending_comments(pair, out);
226            Ok(())
227        }
228        Rule::space => Ok(()),
229        other => Err(Error::Formatter(format!(
230            "unexpected argument parser rule: {other:?}"
231        ))),
232    }
233}
234
235fn collect_line_ending_comments(pair: pest::iterators::Pair<'_, Rule>, out: &mut Vec<Argument>) {
236    for inner in pair.into_inner() {
237        if inner.as_rule() == Rule::line_comment {
238            out.push(Argument::InlineComment(Comment::Line(
239                inner.as_str().to_owned(),
240            )));
241        }
242    }
243}
244
245fn build_argument(pair: pest::iterators::Pair<'_, Rule>) -> Result<Argument> {
246    match pair.as_rule() {
247        Rule::bracket_argument => {
248            let raw = pair.as_str().to_owned();
249            Ok(Argument::Bracket(validate_bracket_argument(raw)?))
250        }
251        Rule::quoted_argument => Ok(Argument::Quoted(pair.as_str().to_owned())),
252        Rule::mixed_unquoted_argument | Rule::unquoted_argument => {
253            Ok(Argument::Unquoted(pair.as_str().to_owned()))
254        }
255        other => Err(Error::Formatter(format!(
256            "unexpected argument rule: {other:?}"
257        ))),
258    }
259}
260
261/// Validate that a bracket argument's opening and closing "=" counts match.
262fn validate_bracket_argument(raw: String) -> Result<BracketArgument> {
263    let open_equals = raw
264        .strip_prefix('[')
265        .ok_or_else(|| Error::Formatter("bracket argument missing '[' prefix".to_owned()))?
266        .bytes()
267        .take_while(|&b| b == b'=')
268        .count();
269
270    let close_equals = raw
271        .strip_suffix(']')
272        .ok_or_else(|| Error::Formatter("bracket argument missing ']' suffix".to_owned()))?
273        .bytes()
274        .rev()
275        .take_while(|&b| b == b'=')
276        .count();
277
278    if open_equals != close_equals {
279        return Err(Error::Formatter(format!(
280            "invalid bracket argument delimiter: {raw}"
281        )));
282    }
283
284    Ok(BracketArgument {
285        level: open_equals,
286        raw,
287    })
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    fn parse_ok(src: &str) -> File {
295        parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
296    }
297
298    #[test]
299    fn empty_file() {
300        let f = parse_ok("");
301        assert!(f.statements.is_empty());
302    }
303
304    #[test]
305    fn simple_command() {
306        let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
307        assert_eq!(f.statements.len(), 1);
308        let Statement::Command(cmd) = &f.statements[0] else {
309            panic!()
310        };
311        assert_eq!(cmd.name, "cmake_minimum_required");
312        assert_eq!(cmd.arguments.len(), 2);
313        assert!(cmd.trailing_comment.is_none());
314    }
315
316    #[test]
317    fn command_no_args() {
318        let f = parse_ok("some_command()\n");
319        let Statement::Command(cmd) = &f.statements[0] else {
320            panic!()
321        };
322        assert!(cmd.arguments.is_empty());
323    }
324
325    #[test]
326    fn quoted_argument() {
327        let f = parse_ok("message(\"hello world\")\n");
328        let Statement::Command(cmd) = &f.statements[0] else {
329            panic!()
330        };
331        assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
332    }
333
334    #[test]
335    fn bracket_argument_zero_equals() {
336        let f = parse_ok("set(VAR [[hello]])\n");
337        let Statement::Command(cmd) = &f.statements[0] else {
338            panic!()
339        };
340        let Argument::Bracket(b) = &cmd.arguments[1] else {
341            panic!()
342        };
343        assert_eq!(b.level, 0);
344    }
345
346    #[test]
347    fn bracket_argument_one_equals() {
348        let f = parse_ok("set(VAR [=[hello]=])\n");
349        let Statement::Command(cmd) = &f.statements[0] else {
350            panic!()
351        };
352        let Argument::Bracket(b) = &cmd.arguments[1] else {
353            panic!()
354        };
355        assert_eq!(b.level, 1);
356    }
357
358    #[test]
359    fn bracket_argument_two_equals() {
360        let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
361        let Statement::Command(cmd) = &f.statements[0] else {
362            panic!()
363        };
364        let Argument::Bracket(b) = &cmd.arguments[1] else {
365            panic!()
366        };
367        assert_eq!(b.level, 2);
368    }
369
370    #[test]
371    fn invalid_bracket_argument_returns_error() {
372        let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
373        assert!(matches!(err, Error::Formatter(_)));
374    }
375
376    #[test]
377    fn line_comment_standalone() {
378        let f = parse_ok("# this is a comment\n");
379        assert!(matches!(
380            &f.statements[0],
381            Statement::Comment(Comment::Line(_))
382        ));
383    }
384
385    #[test]
386    fn bracket_comment() {
387        let f = parse_ok("#[[ multi\nline ]]\n");
388        assert!(matches!(
389            &f.statements[0],
390            Statement::Comment(Comment::Bracket(_))
391        ));
392    }
393
394    #[test]
395    fn variable_reference_in_unquoted() {
396        let f = parse_ok("message(${MY_VAR})\n");
397        let Statement::Command(cmd) = &f.statements[0] else {
398            panic!()
399        };
400        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
401    }
402
403    #[test]
404    fn env_variable_reference() {
405        let f = parse_ok("message($ENV{PATH})\n");
406        let Statement::Command(cmd) = &f.statements[0] else {
407            panic!()
408        };
409        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
410    }
411
412    #[test]
413    fn generator_expression() {
414        let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
415        let Statement::Command(cmd) = &f.statements[0] else {
416            panic!()
417        };
418        assert_eq!(cmd.arguments.len(), 2);
419    }
420
421    #[test]
422    fn multiline_argument_list() {
423        let src = "target_link_libraries(mylib\n    PUBLIC dep1\n    PRIVATE dep2\n)\n";
424        let f = parse_ok(src);
425        let Statement::Command(cmd) = &f.statements[0] else {
426            panic!()
427        };
428        assert_eq!(cmd.name, "target_link_libraries");
429        assert_eq!(cmd.arguments.len(), 5); // mylib PUBLIC dep1 PRIVATE dep2
430    }
431
432    #[test]
433    fn inline_bracket_comment_in_arguments() {
434        let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
435        let f = parse_ok(src);
436        let Statement::Command(cmd) = &f.statements[0] else {
437            panic!()
438        };
439        assert_eq!(cmd.arguments.len(), 3);
440        assert!(matches!(
441            &cmd.arguments[1],
442            Argument::InlineComment(Comment::Bracket(_))
443        ));
444    }
445
446    #[test]
447    fn line_comment_between_arguments() {
448        let src = "target_sources(foo\n  PRIVATE a.cc # keep grouping\n  b.cc\n)\n";
449        let f = parse_ok(src);
450        let Statement::Command(cmd) = &f.statements[0] else {
451            panic!()
452        };
453        assert!(cmd.arguments.iter().any(Argument::is_comment));
454    }
455
456    #[test]
457    fn trailing_comment_after_command() {
458        let src = "message(STATUS \"hello\") # trailing\n";
459        let f = parse_ok(src);
460        let Statement::Command(cmd) = &f.statements[0] else {
461            panic!()
462        };
463        assert!(matches!(cmd.trailing_comment, Some(Comment::Line(_))));
464    }
465
466    #[test]
467    fn file_without_final_newline() {
468        let f = parse_ok("project(MyProject)");
469        assert_eq!(f.statements.len(), 1);
470    }
471
472    #[test]
473    fn blank_lines_are_preserved() {
474        let f = parse_ok("message(foo)\n\nproject(bar)\n");
475        assert_eq!(f.statements.len(), 3);
476        assert!(matches!(f.statements[1], Statement::BlankLines(1)));
477    }
478
479    #[test]
480    fn leading_blank_lines_are_preserved() {
481        let f = parse_ok("\nmessage(foo)\n");
482        assert!(matches!(f.statements[0], Statement::BlankLines(1)));
483    }
484
485    #[test]
486    fn escape_sequences_in_quoted() {
487        let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
488        assert!(!f.statements.is_empty());
489    }
490
491    #[test]
492    fn escaped_quotes_in_quoted_argument_parse() {
493        let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
494        let Statement::Command(cmd) = &f.statements[0] else {
495            panic!()
496        };
497        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
498        assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
499    }
500
501    #[test]
502    fn multiple_commands() {
503        let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
504        let f = parse_ok(src);
505        assert_eq!(f.statements.len(), 2);
506    }
507
508    #[test]
509    fn nested_variable_reference() {
510        let f = parse_ok("message(${${OUTER}})\n");
511        let Statement::Command(cmd) = &f.statements[0] else {
512            panic!()
513        };
514        assert_eq!(cmd.arguments.len(), 1);
515    }
516
517    #[test]
518    fn underscore_command_name_is_valid() {
519        let f = parse_ok("_my_command(ARG)\n");
520        let Statement::Command(cmd) = &f.statements[0] else {
521            panic!()
522        };
523        assert_eq!(cmd.name, "_my_command");
524    }
525
526    #[test]
527    fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
528        let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
529        let Statement::Command(cmd) = &f.statements[0] else {
530            panic!()
531        };
532        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
533        assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
534    }
535
536    #[test]
537    fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
538        let f = parse_ok(concat!(
539            "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
540            "          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
541        ));
542        let Statement::Command(cmd) = &f.statements[0] else {
543            panic!()
544        };
545        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
546        assert_eq!(
547            args,
548            vec![
549                "NOT",
550                "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
551            ]
552        );
553    }
554
555    #[test]
556    fn source_file_with_utf8_bom_parses() {
557        let f = parse_ok("\u{FEFF}project(MyProject)\n");
558        assert_eq!(f.statements.len(), 1);
559    }
560
561    #[test]
562    fn top_level_template_placeholder_parses() {
563        let f = parse_ok("@PACKAGE_INIT@\n");
564        assert_eq!(
565            f.statements,
566            vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
567        );
568    }
569
570    #[test]
571    fn legacy_unquoted_argument_with_embedded_quotes_parses() {
572        let f = parse_ok("set(x -Da=\"b c\")\n");
573        let Statement::Command(cmd) = &f.statements[0] else {
574            panic!()
575        };
576        assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
577    }
578
579    #[test]
580    fn legacy_unquoted_argument_with_make_style_reference_parses() {
581        let f = parse_ok("set(x -Da=$(v))\n");
582        let Statement::Command(cmd) = &f.statements[0] else {
583            panic!()
584        };
585        assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
586    }
587
588    #[test]
589    fn legacy_unquoted_argument_with_embedded_parens_parses() {
590        let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ 	]+"(.+)"")"##);
591        let Statement::Command(cmd) = &f.statements[0] else {
592            panic!()
593        };
594        assert_eq!(
595            cmd.arguments[1].as_str(),
596            "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
597        );
598    }
599
600    #[test]
601    fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
602        let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
603        let Statement::Command(cmd) = &f.statements[0] else {
604            panic!()
605        };
606        assert_eq!(
607            cmd.arguments[2].as_str(),
608            "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
609        );
610    }
611}