Skip to main content

cmakefmt/parser/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Parser entry points for CMake source text.
6//!
7//! The parser is a hand-written recursive-descent implementation over a
8//! streaming scanner. [`crate::parser::ast`] contains the public AST returned by
9//! [`crate::parser::parse()`].
10
11use std::ops::Range;
12
13pub mod ast;
14
15mod cursor;
16mod grammar;
17mod lower;
18mod scanner;
19
20use ast::File;
21
22use crate::error::{Error, ParseDiagnostic, ParseError, Result};
23
24#[derive(Copy, Clone, PartialEq, Eq, Debug)]
25pub(super) struct Span {
26    pub(super) start: u32,
27    pub(super) end: u32,
28}
29
30impl Span {
31    pub(super) fn range(self) -> Range<usize> {
32        self.start as usize..self.end as usize
33    }
34}
35
36#[derive(Copy, Clone, PartialEq, Eq, Debug)]
37pub(super) struct ScanError {
38    pub(super) message: &'static str,
39    pub(super) byte_offset: u32,
40}
41
42impl ScanError {
43    pub(super) fn new(message: &'static str, byte_offset: u32) -> Self {
44        Self {
45            message,
46            byte_offset,
47        }
48    }
49}
50
51/// Parse CMake source text into an AST [`File`].
52///
53/// The returned AST preserves command structure, blank lines, and comments so
54/// the formatter can round-trip files with stable semantics.
55pub fn parse(source: &str) -> Result<File> {
56    parse_v2(source)
57}
58
59pub(crate) fn parse_v2(source: &str) -> Result<File> {
60    if source.len() > u32::MAX as usize {
61        return Err(Error::Parse(ParseError {
62            display_name: "<source>".to_owned(),
63            source_text: source.to_owned().into_boxed_str(),
64            start_line: 1,
65            diagnostic: ParseDiagnostic {
66                message: "source exceeds maximum supported size".into(),
67                line: 1,
68                column: 1,
69            },
70        }));
71    }
72
73    let tree = grammar::parse_file(source).map_err(|e| Error::Parse(to_public_error(e, source)))?;
74    Ok(lower::lower(source, tree))
75}
76
77fn to_public_error(error: ScanError, source: &str) -> ParseError {
78    let (line, column) = line_col_at(source, error.byte_offset);
79    ParseError {
80        display_name: "<source>".to_owned(),
81        source_text: source.to_owned().into_boxed_str(),
82        start_line: 1,
83        diagnostic: ParseDiagnostic {
84            message: error.message.into(),
85            line,
86            column,
87        },
88    }
89}
90
91pub(super) fn line_col_at(source: &str, offset: u32) -> (usize, usize) {
92    let offset = offset as usize;
93    let clamped = offset.min(source.len());
94    let prefix = &source[..clamped];
95    let line = prefix.bytes().filter(|&b| b == b'\n').count() + 1;
96    let line_start = prefix.rfind('\n').map(|idx| idx + 1).unwrap_or(0);
97    let column = source[line_start..clamped].chars().count() + 1;
98    (line, column)
99}
100
101#[cfg(test)]
102mod tests {
103    use super::ast::{Argument, Statement};
104    use super::*;
105
106    fn parse_ok(src: &str) -> File {
107        parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
108    }
109
110    #[test]
111    fn empty_file() {
112        let f = parse_ok("");
113        assert!(f.statements.is_empty());
114    }
115
116    #[test]
117    fn simple_command() {
118        let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
119        assert_eq!(f.statements.len(), 1);
120        let Statement::Command(cmd) = &f.statements[0] else {
121            panic!()
122        };
123        assert_eq!(cmd.name, "cmake_minimum_required");
124        assert_eq!(cmd.arguments.len(), 2);
125        assert!(cmd.trailing_comment.is_none());
126    }
127
128    #[test]
129    fn command_no_args() {
130        let f = parse_ok("some_command()\n");
131        let Statement::Command(cmd) = &f.statements[0] else {
132            panic!()
133        };
134        assert!(cmd.arguments.is_empty());
135    }
136
137    #[test]
138    fn quoted_argument() {
139        let f = parse_ok("message(\"hello world\")\n");
140        let Statement::Command(cmd) = &f.statements[0] else {
141            panic!()
142        };
143        assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
144    }
145
146    #[test]
147    fn bracket_argument_zero_equals() {
148        let f = parse_ok("set(VAR [[hello]])\n");
149        let Statement::Command(cmd) = &f.statements[0] else {
150            panic!()
151        };
152        let Argument::Bracket(b) = &cmd.arguments[1] else {
153            panic!()
154        };
155        assert_eq!(b.level, 0);
156    }
157
158    #[test]
159    fn bracket_argument_one_equals() {
160        let f = parse_ok("set(VAR [=[hello]=])\n");
161        let Statement::Command(cmd) = &f.statements[0] else {
162            panic!()
163        };
164        let Argument::Bracket(b) = &cmd.arguments[1] else {
165            panic!()
166        };
167        assert_eq!(b.level, 1);
168    }
169
170    #[test]
171    fn bracket_argument_two_equals() {
172        let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
173        let Statement::Command(cmd) = &f.statements[0] else {
174            panic!()
175        };
176        let Argument::Bracket(b) = &cmd.arguments[1] else {
177            panic!()
178        };
179        assert_eq!(b.level, 2);
180    }
181
182    #[test]
183    fn invalid_bracket_argument_returns_error() {
184        let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
185        assert!(matches!(err, Error::Parse(_)));
186    }
187
188    #[test]
189    fn invalid_syntax_returns_parse_error_with_crate_owned_diagnostic() {
190        let err = parse("message(\n").unwrap_err();
191        let Error::Parse(parse_err) = err else {
192            panic!("expected parse error");
193        };
194
195        assert_eq!(parse_err.display_name, "<source>");
196        assert_eq!(parse_err.source_text.as_ref(), "message(\n");
197        assert_eq!(parse_err.start_line, 1);
198        assert!(!parse_err.diagnostic.message.is_empty());
199        assert_eq!(parse_err.diagnostic.line, 2);
200        assert_eq!(parse_err.diagnostic.column, 1);
201    }
202
203    #[test]
204    fn unterminated_genex_reports_char_based_column() {
205        let err = parse("message(é $<TARGET_FILE:foo)\n").unwrap_err();
206        let Error::Parse(parse_err) = err else {
207            panic!("expected parse error");
208        };
209
210        assert_eq!(
211            parse_err.diagnostic.message.as_ref(),
212            "unterminated generator expression"
213        );
214        assert_eq!(parse_err.diagnostic.line, 1);
215        assert_eq!(parse_err.diagnostic.column, 11);
216    }
217
218    #[test]
219    fn line_col_at_counts_multibyte_chars_as_single_columns() {
220        assert_eq!(line_col_at("message(é $<foo", 11), (1, 11));
221    }
222
223    #[test]
224    fn line_comment_standalone() {
225        let f = parse_ok("# this is a comment\n");
226        assert!(matches!(
227            &f.statements[0],
228            Statement::Comment(ast::Comment::Line(_))
229        ));
230    }
231
232    #[test]
233    fn bracket_comment() {
234        let f = parse_ok("#[[ multi\nline ]]\n");
235        assert!(matches!(
236            &f.statements[0],
237            Statement::Comment(ast::Comment::Bracket(_))
238        ));
239    }
240
241    #[test]
242    fn variable_reference_in_unquoted() {
243        let f = parse_ok("message(${MY_VAR})\n");
244        let Statement::Command(cmd) = &f.statements[0] else {
245            panic!()
246        };
247        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
248    }
249
250    #[test]
251    fn env_variable_reference() {
252        let f = parse_ok("message($ENV{PATH})\n");
253        let Statement::Command(cmd) = &f.statements[0] else {
254            panic!()
255        };
256        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
257    }
258
259    #[test]
260    fn generator_expression() {
261        let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
262        let Statement::Command(cmd) = &f.statements[0] else {
263            panic!()
264        };
265        assert_eq!(cmd.arguments.len(), 2);
266    }
267
268    #[test]
269    fn multiline_argument_list() {
270        let src = "target_link_libraries(mylib\n    PUBLIC dep1\n    PRIVATE dep2\n)\n";
271        let f = parse_ok(src);
272        let Statement::Command(cmd) = &f.statements[0] else {
273            panic!()
274        };
275        assert_eq!(cmd.name, "target_link_libraries");
276        assert_eq!(cmd.arguments.len(), 5);
277    }
278
279    #[test]
280    fn inline_bracket_comment_in_arguments() {
281        let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
282        let f = parse_ok(src);
283        let Statement::Command(cmd) = &f.statements[0] else {
284            panic!()
285        };
286        assert_eq!(cmd.arguments.len(), 3);
287        assert!(matches!(
288            &cmd.arguments[1],
289            Argument::InlineComment(ast::Comment::Bracket(_))
290        ));
291    }
292
293    #[test]
294    fn line_comment_between_arguments() {
295        let src = "target_sources(foo\n  PRIVATE a.cc # keep grouping\n  b.cc\n)\n";
296        let f = parse_ok(src);
297        let Statement::Command(cmd) = &f.statements[0] else {
298            panic!()
299        };
300        assert!(cmd.arguments.iter().any(Argument::is_comment));
301    }
302
303    #[test]
304    fn trailing_comment_after_command() {
305        let src = "message(STATUS \"hello\") # trailing\n";
306        let f = parse_ok(src);
307        let Statement::Command(cmd) = &f.statements[0] else {
308            panic!()
309        };
310        assert!(matches!(cmd.trailing_comment, Some(ast::Comment::Line(_))));
311    }
312
313    #[test]
314    fn aligned_continuation_merges_into_trailing_comment() {
315        let src = "set(FOO bar) # first line\n             # second line\n";
316        let f = parse_ok(src);
317        assert_eq!(f.statements.len(), 1);
318        let Statement::Command(cmd) = &f.statements[0] else {
319            panic!()
320        };
321        assert_eq!(
322            cmd.trailing_comment,
323            Some(ast::Comment::Line("# first line second line".to_owned()))
324        );
325    }
326
327    #[test]
328    fn multiple_aligned_continuations_merge() {
329        let src = "set(FOO bar) # line one\n             # line two\n             # line three\n";
330        let f = parse_ok(src);
331        assert_eq!(f.statements.len(), 1);
332        let Statement::Command(cmd) = &f.statements[0] else {
333            panic!()
334        };
335        assert_eq!(
336            cmd.trailing_comment,
337            Some(ast::Comment::Line(
338                "# line one line two line three".to_owned()
339            ))
340        );
341    }
342
343    #[test]
344    fn non_aligned_comment_stays_standalone() {
345        let src = "set(FOO bar) # trailing\n# standalone\n";
346        let f = parse_ok(src);
347        assert_eq!(f.statements.len(), 2);
348        let Statement::Command(cmd) = &f.statements[0] else {
349            panic!()
350        };
351        assert_eq!(
352            cmd.trailing_comment,
353            Some(ast::Comment::Line("# trailing".to_owned()))
354        );
355        assert!(matches!(f.statements[1], Statement::Comment(_)));
356    }
357
358    #[test]
359    fn blank_line_prevents_continuation_merge() {
360        let src = "set(FOO bar) # trailing\n\n             # not a continuation\n";
361        let f = parse_ok(src);
362        assert_eq!(f.statements.len(), 3);
363    }
364
365    #[test]
366    fn empty_continuation_line_merges_without_adding_text() {
367        let src = "set(FOO bar) # first\n             #\n             # third\n";
368        let f = parse_ok(src);
369        assert_eq!(f.statements.len(), 1);
370        let Statement::Command(cmd) = &f.statements[0] else {
371            panic!()
372        };
373        assert_eq!(
374            cmd.trailing_comment,
375            Some(ast::Comment::Line("# first third".to_owned()))
376        );
377    }
378
379    #[test]
380    fn off_by_one_column_prevents_merge() {
381        let src = "set(FOO bar) # trailing\n              # off by one\n";
382        let f = parse_ok(src);
383        assert_eq!(f.statements.len(), 2);
384        assert!(matches!(f.statements[1], Statement::Comment(_)));
385    }
386
387    #[test]
388    fn file_without_final_newline() {
389        let f = parse_ok("project(MyProject)");
390        assert_eq!(f.statements.len(), 1);
391    }
392
393    #[test]
394    fn blank_lines_are_preserved() {
395        let f = parse_ok("message(foo)\n\nproject(bar)\n");
396        assert_eq!(f.statements.len(), 3);
397        assert!(matches!(f.statements[1], Statement::BlankLines(1)));
398    }
399
400    #[test]
401    fn leading_blank_lines_are_preserved() {
402        let f = parse_ok("\nmessage(foo)\n");
403        assert!(matches!(f.statements[0], Statement::BlankLines(1)));
404    }
405
406    #[test]
407    fn escape_sequences_in_quoted() {
408        let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
409        assert!(!f.statements.is_empty());
410    }
411
412    #[test]
413    fn escaped_quotes_in_quoted_argument_parse() {
414        let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
415        let Statement::Command(cmd) = &f.statements[0] else {
416            panic!()
417        };
418        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
419        assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
420    }
421
422    #[test]
423    fn multiple_commands() {
424        let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
425        let f = parse_ok(src);
426        assert_eq!(f.statements.len(), 2);
427    }
428
429    #[test]
430    fn nested_variable_reference() {
431        let f = parse_ok("message(${${OUTER}})\n");
432        let Statement::Command(cmd) = &f.statements[0] else {
433            panic!()
434        };
435        assert_eq!(cmd.arguments.len(), 1);
436    }
437
438    #[test]
439    fn underscore_command_name_is_valid() {
440        let f = parse_ok("_my_command(ARG)\n");
441        let Statement::Command(cmd) = &f.statements[0] else {
442            panic!()
443        };
444        assert_eq!(cmd.name, "_my_command");
445    }
446
447    #[test]
448    fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
449        let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
450        let Statement::Command(cmd) = &f.statements[0] else {
451            panic!()
452        };
453        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
454        assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
455    }
456
457    #[test]
458    fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
459        let f = parse_ok(concat!(
460            "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
461            "          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
462        ));
463        let Statement::Command(cmd) = &f.statements[0] else {
464            panic!()
465        };
466        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
467        assert_eq!(
468            args,
469            vec![
470                "NOT",
471                "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
472            ]
473        );
474    }
475
476    #[test]
477    fn source_file_with_utf8_bom_parses() {
478        let f = parse_ok("\u{FEFF}project(MyProject)\n");
479        assert_eq!(f.statements.len(), 1);
480    }
481
482    #[test]
483    fn crlf_line_endings_parse() {
484        let f = parse_ok("set(FOO bar)\r\nset(BAZ qux)\r\n");
485        assert_eq!(f.statements.len(), 2);
486    }
487
488    #[test]
489    fn top_level_template_placeholder_parses() {
490        let f = parse_ok("@PACKAGE_INIT@\n");
491        assert_eq!(
492            f.statements,
493            vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
494        );
495    }
496
497    #[test]
498    fn legacy_unquoted_argument_with_embedded_quotes_parses() {
499        let f = parse_ok("set(x -Da=\"b c\")\n");
500        let Statement::Command(cmd) = &f.statements[0] else {
501            panic!()
502        };
503        assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
504    }
505
506    #[test]
507    fn legacy_unquoted_argument_with_make_style_reference_parses() {
508        let f = parse_ok("set(x -Da=$(v))\n");
509        let Statement::Command(cmd) = &f.statements[0] else {
510            panic!()
511        };
512        assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
513    }
514
515    #[test]
516    fn legacy_unquoted_argument_with_embedded_parens_parses() {
517        let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ 	]+"(.+)"")"##);
518        let Statement::Command(cmd) = &f.statements[0] else {
519            panic!()
520        };
521        assert_eq!(
522            cmd.arguments[1].as_str(),
523            "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
524        );
525    }
526
527    #[test]
528    fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
529        let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
530        let Statement::Command(cmd) = &f.statements[0] else {
531            panic!()
532        };
533        assert_eq!(
534            cmd.arguments[2].as_str(),
535            "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
536        );
537    }
538
539    #[test]
540    fn bracket_argument_ignores_mismatched_inner_closer() {
541        let src = "set(VAR [==[before ]====] after]==])\n";
542        let f = parse_ok(src);
543        let Statement::Command(cmd) = &f.statements[0] else {
544            panic!()
545        };
546        assert_eq!(cmd.arguments[1].as_str(), "[==[before ]====] after]==]");
547    }
548}