Skip to main content

cmakefmt/parser/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Parser entry points for CMake source text.
6//!
7//! The parser is a hand-written recursive-descent implementation over a
8//! streaming scanner. [`crate::parser::ast`] contains the public AST returned by
9//! [`crate::parser::parse()`].
10
11use std::ops::Range;
12
13pub mod ast;
14
15mod cursor;
16mod grammar;
17mod lower;
18mod scanner;
19
20use ast::File;
21
22use crate::error::{Error, ParseDiagnostic, ParseError, Result};
23
24#[derive(Copy, Clone, PartialEq, Eq, Debug)]
25pub(super) struct Span {
26    pub(super) start: u32,
27    pub(super) end: u32,
28}
29
30impl Span {
31    pub(super) fn range(self) -> Range<usize> {
32        self.start as usize..self.end as usize
33    }
34}
35
36#[derive(Copy, Clone, PartialEq, Eq, Debug)]
37pub(super) struct ScanError {
38    pub(super) message: &'static str,
39    pub(super) byte_offset: u32,
40}
41
42impl ScanError {
43    pub(super) fn new(message: &'static str, byte_offset: u32) -> Self {
44        Self {
45            message,
46            byte_offset,
47        }
48    }
49}
50
51/// Parse CMake source text into an AST [`File`].
52///
53/// The returned AST preserves command structure, blank lines, and comments so
54/// the formatter can round-trip files with stable semantics. CRLF line
55/// endings and a UTF-8 BOM at the start of the source are both accepted
56/// and normalised internally.
57///
58/// # Examples
59///
60/// ```
61/// use cmakefmt::parser::{parse, ast::Statement};
62///
63/// let file = parse("cmake_minimum_required(VERSION 3.20)\n").unwrap();
64/// assert_eq!(file.statements.len(), 1);
65/// let Statement::Command(cmd) = &file.statements[0] else {
66///     panic!("expected a command");
67/// };
68/// assert_eq!(cmd.name, "cmake_minimum_required");
69/// ```
70pub fn parse(source: &str) -> Result<File> {
71    parse_v2(source)
72}
73
74pub(crate) fn parse_v2(source: &str) -> Result<File> {
75    if source.len() > u32::MAX as usize {
76        return Err(Error::Parse(ParseError {
77            display_name: "<source>".to_owned(),
78            source_text: source.to_owned().into_boxed_str(),
79            start_line: 1,
80            diagnostic: ParseDiagnostic {
81                message: "source exceeds maximum supported size".into(),
82                line: 1,
83                column: 1,
84            },
85        }));
86    }
87
88    let tree = grammar::parse_file(source).map_err(|e| Error::Parse(to_public_error(e, source)))?;
89    Ok(lower::lower(source, tree))
90}
91
92fn to_public_error(error: ScanError, source: &str) -> ParseError {
93    let (line, column) = line_col_at(source, error.byte_offset);
94    ParseError {
95        display_name: "<source>".to_owned(),
96        source_text: source.to_owned().into_boxed_str(),
97        start_line: 1,
98        diagnostic: ParseDiagnostic {
99            message: error.message.into(),
100            line,
101            column,
102        },
103    }
104}
105
106pub(super) fn line_col_at(source: &str, offset: u32) -> (usize, usize) {
107    let offset = offset as usize;
108    let clamped = offset.min(source.len());
109    let prefix = &source[..clamped];
110    let line = prefix.bytes().filter(|&b| b == b'\n').count() + 1;
111    let line_start = prefix.rfind('\n').map(|idx| idx + 1).unwrap_or(0);
112    let column = source[line_start..clamped].chars().count() + 1;
113    (line, column)
114}
115
116#[cfg(test)]
117mod tests {
118    use super::ast::{Argument, Statement};
119    use super::*;
120
121    fn parse_ok(src: &str) -> File {
122        parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
123    }
124
125    #[test]
126    fn empty_file() {
127        let f = parse_ok("");
128        assert!(f.statements.is_empty());
129    }
130
131    #[test]
132    fn simple_command() {
133        let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
134        assert_eq!(f.statements.len(), 1);
135        let Statement::Command(cmd) = &f.statements[0] else {
136            panic!()
137        };
138        assert_eq!(cmd.name, "cmake_minimum_required");
139        assert_eq!(cmd.arguments.len(), 2);
140        assert!(cmd.trailing_comment.is_none());
141    }
142
143    #[test]
144    fn command_no_args() {
145        let f = parse_ok("some_command()\n");
146        let Statement::Command(cmd) = &f.statements[0] else {
147            panic!()
148        };
149        assert!(cmd.arguments.is_empty());
150    }
151
152    #[test]
153    fn quoted_argument() {
154        let f = parse_ok("message(\"hello world\")\n");
155        let Statement::Command(cmd) = &f.statements[0] else {
156            panic!()
157        };
158        assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
159    }
160
161    #[test]
162    fn bracket_argument_zero_equals() {
163        let f = parse_ok("set(VAR [[hello]])\n");
164        let Statement::Command(cmd) = &f.statements[0] else {
165            panic!()
166        };
167        let Argument::Bracket(b) = &cmd.arguments[1] else {
168            panic!()
169        };
170        assert_eq!(b.level, 0);
171    }
172
173    #[test]
174    fn bracket_argument_one_equals() {
175        let f = parse_ok("set(VAR [=[hello]=])\n");
176        let Statement::Command(cmd) = &f.statements[0] else {
177            panic!()
178        };
179        let Argument::Bracket(b) = &cmd.arguments[1] else {
180            panic!()
181        };
182        assert_eq!(b.level, 1);
183    }
184
185    #[test]
186    fn bracket_argument_two_equals() {
187        let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
188        let Statement::Command(cmd) = &f.statements[0] else {
189            panic!()
190        };
191        let Argument::Bracket(b) = &cmd.arguments[1] else {
192            panic!()
193        };
194        assert_eq!(b.level, 2);
195    }
196
197    #[test]
198    fn invalid_bracket_argument_returns_error() {
199        let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
200        assert!(matches!(err, Error::Parse(_)));
201    }
202
203    #[test]
204    fn invalid_syntax_returns_parse_error_with_crate_owned_diagnostic() {
205        let err = parse("message(\n").unwrap_err();
206        let Error::Parse(parse_err) = err else {
207            panic!("expected parse error");
208        };
209
210        assert_eq!(parse_err.display_name, "<source>");
211        assert_eq!(parse_err.source_text.as_ref(), "message(\n");
212        assert_eq!(parse_err.start_line, 1);
213        assert!(!parse_err.diagnostic.message.is_empty());
214        assert_eq!(parse_err.diagnostic.line, 2);
215        assert_eq!(parse_err.diagnostic.column, 1);
216    }
217
218    #[test]
219    fn unterminated_genex_reports_char_based_column() {
220        let err = parse("message(é $<TARGET_FILE:foo)\n").unwrap_err();
221        let Error::Parse(parse_err) = err else {
222            panic!("expected parse error");
223        };
224
225        assert_eq!(
226            parse_err.diagnostic.message.as_ref(),
227            "unterminated generator expression"
228        );
229        assert_eq!(parse_err.diagnostic.line, 1);
230        assert_eq!(parse_err.diagnostic.column, 11);
231    }
232
233    #[test]
234    fn line_col_at_counts_multibyte_chars_as_single_columns() {
235        assert_eq!(line_col_at("message(é $<foo", 11), (1, 11));
236    }
237
238    #[test]
239    fn line_comment_standalone() {
240        let f = parse_ok("# this is a comment\n");
241        assert!(matches!(
242            &f.statements[0],
243            Statement::Comment(ast::Comment::Line(_))
244        ));
245    }
246
247    #[test]
248    fn bracket_comment() {
249        let f = parse_ok("#[[ multi\nline ]]\n");
250        assert!(matches!(
251            &f.statements[0],
252            Statement::Comment(ast::Comment::Bracket(_))
253        ));
254    }
255
256    #[test]
257    fn variable_reference_in_unquoted() {
258        let f = parse_ok("message(${MY_VAR})\n");
259        let Statement::Command(cmd) = &f.statements[0] else {
260            panic!()
261        };
262        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
263    }
264
265    #[test]
266    fn env_variable_reference() {
267        let f = parse_ok("message($ENV{PATH})\n");
268        let Statement::Command(cmd) = &f.statements[0] else {
269            panic!()
270        };
271        assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
272    }
273
274    #[test]
275    fn generator_expression() {
276        let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
277        let Statement::Command(cmd) = &f.statements[0] else {
278            panic!()
279        };
280        assert_eq!(cmd.arguments.len(), 2);
281    }
282
283    #[test]
284    fn multiline_argument_list() {
285        let src = "target_link_libraries(mylib\n    PUBLIC dep1\n    PRIVATE dep2\n)\n";
286        let f = parse_ok(src);
287        let Statement::Command(cmd) = &f.statements[0] else {
288            panic!()
289        };
290        assert_eq!(cmd.name, "target_link_libraries");
291        assert_eq!(cmd.arguments.len(), 5);
292    }
293
294    #[test]
295    fn inline_bracket_comment_in_arguments() {
296        let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
297        let f = parse_ok(src);
298        let Statement::Command(cmd) = &f.statements[0] else {
299            panic!()
300        };
301        assert_eq!(cmd.arguments.len(), 3);
302        assert!(matches!(
303            &cmd.arguments[1],
304            Argument::InlineComment(ast::Comment::Bracket(_))
305        ));
306    }
307
308    #[test]
309    fn line_comment_between_arguments() {
310        let src = "target_sources(foo\n  PRIVATE a.cc # keep grouping\n  b.cc\n)\n";
311        let f = parse_ok(src);
312        let Statement::Command(cmd) = &f.statements[0] else {
313            panic!()
314        };
315        assert!(cmd.arguments.iter().any(Argument::is_comment));
316    }
317
318    #[test]
319    fn trailing_comment_after_command() {
320        let src = "message(STATUS \"hello\") # trailing\n";
321        let f = parse_ok(src);
322        let Statement::Command(cmd) = &f.statements[0] else {
323            panic!()
324        };
325        assert!(matches!(cmd.trailing_comment, Some(ast::Comment::Line(_))));
326    }
327
328    #[test]
329    fn aligned_continuation_merges_into_trailing_comment() {
330        let src = "set(FOO bar) # first line\n             # second line\n";
331        let f = parse_ok(src);
332        assert_eq!(f.statements.len(), 1);
333        let Statement::Command(cmd) = &f.statements[0] else {
334            panic!()
335        };
336        assert_eq!(
337            cmd.trailing_comment,
338            Some(ast::Comment::Line("# first line second line".to_owned()))
339        );
340    }
341
342    #[test]
343    fn multiple_aligned_continuations_merge() {
344        let src = "set(FOO bar) # line one\n             # line two\n             # line three\n";
345        let f = parse_ok(src);
346        assert_eq!(f.statements.len(), 1);
347        let Statement::Command(cmd) = &f.statements[0] else {
348            panic!()
349        };
350        assert_eq!(
351            cmd.trailing_comment,
352            Some(ast::Comment::Line(
353                "# line one line two line three".to_owned()
354            ))
355        );
356    }
357
358    #[test]
359    fn non_aligned_comment_stays_standalone() {
360        let src = "set(FOO bar) # trailing\n# standalone\n";
361        let f = parse_ok(src);
362        assert_eq!(f.statements.len(), 2);
363        let Statement::Command(cmd) = &f.statements[0] else {
364            panic!()
365        };
366        assert_eq!(
367            cmd.trailing_comment,
368            Some(ast::Comment::Line("# trailing".to_owned()))
369        );
370        assert!(matches!(f.statements[1], Statement::Comment(_)));
371    }
372
373    #[test]
374    fn blank_line_prevents_continuation_merge() {
375        let src = "set(FOO bar) # trailing\n\n             # not a continuation\n";
376        let f = parse_ok(src);
377        assert_eq!(f.statements.len(), 3);
378    }
379
380    #[test]
381    fn empty_continuation_line_merges_without_adding_text() {
382        let src = "set(FOO bar) # first\n             #\n             # third\n";
383        let f = parse_ok(src);
384        assert_eq!(f.statements.len(), 1);
385        let Statement::Command(cmd) = &f.statements[0] else {
386            panic!()
387        };
388        assert_eq!(
389            cmd.trailing_comment,
390            Some(ast::Comment::Line("# first third".to_owned()))
391        );
392    }
393
394    #[test]
395    fn off_by_one_column_prevents_merge() {
396        let src = "set(FOO bar) # trailing\n              # off by one\n";
397        let f = parse_ok(src);
398        assert_eq!(f.statements.len(), 2);
399        assert!(matches!(f.statements[1], Statement::Comment(_)));
400    }
401
402    #[test]
403    fn file_without_final_newline() {
404        let f = parse_ok("project(MyProject)");
405        assert_eq!(f.statements.len(), 1);
406    }
407
408    #[test]
409    fn blank_lines_are_preserved() {
410        let f = parse_ok("message(foo)\n\nproject(bar)\n");
411        assert_eq!(f.statements.len(), 3);
412        assert!(matches!(f.statements[1], Statement::BlankLines(1)));
413    }
414
415    #[test]
416    fn leading_blank_lines_are_preserved() {
417        let f = parse_ok("\nmessage(foo)\n");
418        assert!(matches!(f.statements[0], Statement::BlankLines(1)));
419    }
420
421    #[test]
422    fn escape_sequences_in_quoted() {
423        let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
424        assert!(!f.statements.is_empty());
425    }
426
427    #[test]
428    fn escaped_quotes_in_quoted_argument_parse() {
429        let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
430        let Statement::Command(cmd) = &f.statements[0] else {
431            panic!()
432        };
433        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
434        assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
435    }
436
437    #[test]
438    fn multiple_commands() {
439        let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
440        let f = parse_ok(src);
441        assert_eq!(f.statements.len(), 2);
442    }
443
444    #[test]
445    fn nested_variable_reference() {
446        let f = parse_ok("message(${${OUTER}})\n");
447        let Statement::Command(cmd) = &f.statements[0] else {
448            panic!()
449        };
450        assert_eq!(cmd.arguments.len(), 1);
451    }
452
453    #[test]
454    fn underscore_command_name_is_valid() {
455        let f = parse_ok("_my_command(ARG)\n");
456        let Statement::Command(cmd) = &f.statements[0] else {
457            panic!()
458        };
459        assert_eq!(cmd.name, "_my_command");
460    }
461
462    #[test]
463    fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
464        let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
465        let Statement::Command(cmd) = &f.statements[0] else {
466            panic!()
467        };
468        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
469        assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
470    }
471
472    #[test]
473    fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
474        let f = parse_ok(concat!(
475            "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
476            "          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
477        ));
478        let Statement::Command(cmd) = &f.statements[0] else {
479            panic!()
480        };
481        let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
482        assert_eq!(
483            args,
484            vec![
485                "NOT",
486                "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n          AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
487            ]
488        );
489    }
490
491    #[test]
492    fn source_file_with_utf8_bom_parses() {
493        let f = parse_ok("\u{FEFF}project(MyProject)\n");
494        assert_eq!(f.statements.len(), 1);
495    }
496
497    #[test]
498    fn crlf_line_endings_parse() {
499        let f = parse_ok("set(FOO bar)\r\nset(BAZ qux)\r\n");
500        assert_eq!(f.statements.len(), 2);
501    }
502
503    #[test]
504    fn top_level_template_placeholder_parses() {
505        let f = parse_ok("@PACKAGE_INIT@\n");
506        assert_eq!(
507            f.statements,
508            vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
509        );
510    }
511
512    #[test]
513    fn legacy_unquoted_argument_with_embedded_quotes_parses() {
514        let f = parse_ok("set(x -Da=\"b c\")\n");
515        let Statement::Command(cmd) = &f.statements[0] else {
516            panic!()
517        };
518        assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
519    }
520
521    #[test]
522    fn legacy_unquoted_argument_with_make_style_reference_parses() {
523        let f = parse_ok("set(x -Da=$(v))\n");
524        let Statement::Command(cmd) = &f.statements[0] else {
525            panic!()
526        };
527        assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
528    }
529
530    #[test]
531    fn legacy_unquoted_argument_with_embedded_parens_parses() {
532        let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ 	]+"(.+)"")"##);
533        let Statement::Command(cmd) = &f.statements[0] else {
534            panic!()
535        };
536        assert_eq!(
537            cmd.arguments[1].as_str(),
538            "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
539        );
540    }
541
542    #[test]
543    fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
544        let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
545        let Statement::Command(cmd) = &f.statements[0] else {
546            panic!()
547        };
548        assert_eq!(
549            cmd.arguments[2].as_str(),
550            "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
551        );
552    }
553
554    #[test]
555    fn bracket_argument_ignores_mismatched_inner_closer() {
556        let src = "set(VAR [==[before ]====] after]==])\n";
557        let f = parse_ok(src);
558        let Statement::Command(cmd) = &f.statements[0] else {
559            panic!()
560        };
561        assert_eq!(cmd.arguments[1].as_str(), "[==[before ]====] after]==]");
562    }
563}