1use std::ops::Range;
12
13pub mod ast;
14
15mod cursor;
16mod grammar;
17mod lower;
18mod scanner;
19
20use ast::File;
21
22use crate::error::{Error, ParseDiagnostic, ParseError, Result};
23
24#[derive(Copy, Clone, PartialEq, Eq, Debug)]
25pub(super) struct Span {
26 pub(super) start: u32,
27 pub(super) end: u32,
28}
29
30impl Span {
31 pub(super) fn range(self) -> Range<usize> {
32 self.start as usize..self.end as usize
33 }
34}
35
36#[derive(Copy, Clone, PartialEq, Eq, Debug)]
37pub(super) struct ScanError {
38 pub(super) message: &'static str,
39 pub(super) byte_offset: u32,
40}
41
42impl ScanError {
43 pub(super) fn new(message: &'static str, byte_offset: u32) -> Self {
44 Self {
45 message,
46 byte_offset,
47 }
48 }
49}
50
51pub fn parse(source: &str) -> Result<File> {
71 parse_v2(source)
72}
73
74pub(crate) fn parse_v2(source: &str) -> Result<File> {
75 if source.len() > u32::MAX as usize {
76 return Err(Error::Parse(ParseError {
77 display_name: "<source>".to_owned(),
78 source_text: source.to_owned().into_boxed_str(),
79 start_line: 1,
80 diagnostic: ParseDiagnostic {
81 message: "source exceeds maximum supported size".into(),
82 line: 1,
83 column: 1,
84 },
85 }));
86 }
87
88 let tree = grammar::parse_file(source).map_err(|e| Error::Parse(to_public_error(e, source)))?;
89 Ok(lower::lower(source, tree))
90}
91
92fn to_public_error(error: ScanError, source: &str) -> ParseError {
93 let (line, column) = line_col_at(source, error.byte_offset);
94 ParseError {
95 display_name: "<source>".to_owned(),
96 source_text: source.to_owned().into_boxed_str(),
97 start_line: 1,
98 diagnostic: ParseDiagnostic {
99 message: error.message.into(),
100 line,
101 column,
102 },
103 }
104}
105
106pub(super) fn line_col_at(source: &str, offset: u32) -> (usize, usize) {
107 let offset = offset as usize;
108 let clamped = offset.min(source.len());
109 let prefix = &source[..clamped];
110 let line = prefix.bytes().filter(|&b| b == b'\n').count() + 1;
111 let line_start = prefix.rfind('\n').map(|idx| idx + 1).unwrap_or(0);
112 let column = source[line_start..clamped].chars().count() + 1;
113 (line, column)
114}
115
116#[cfg(test)]
117mod tests {
118 use super::ast::{Argument, Statement};
119 use super::*;
120
121 fn parse_ok(src: &str) -> File {
122 parse(src).unwrap_or_else(|e| panic!("parse failed for {src:?}: {e}"))
123 }
124
125 #[test]
126 fn empty_file() {
127 let f = parse_ok("");
128 assert!(f.statements.is_empty());
129 }
130
131 #[test]
132 fn simple_command() {
133 let f = parse_ok("cmake_minimum_required(VERSION 3.20)\n");
134 assert_eq!(f.statements.len(), 1);
135 let Statement::Command(cmd) = &f.statements[0] else {
136 panic!()
137 };
138 assert_eq!(cmd.name, "cmake_minimum_required");
139 assert_eq!(cmd.arguments.len(), 2);
140 assert!(cmd.trailing_comment.is_none());
141 }
142
143 #[test]
144 fn command_no_args() {
145 let f = parse_ok("some_command()\n");
146 let Statement::Command(cmd) = &f.statements[0] else {
147 panic!()
148 };
149 assert!(cmd.arguments.is_empty());
150 }
151
152 #[test]
153 fn quoted_argument() {
154 let f = parse_ok("message(\"hello world\")\n");
155 let Statement::Command(cmd) = &f.statements[0] else {
156 panic!()
157 };
158 assert!(matches!(&cmd.arguments[0], Argument::Quoted(_)));
159 }
160
161 #[test]
162 fn bracket_argument_zero_equals() {
163 let f = parse_ok("set(VAR [[hello]])\n");
164 let Statement::Command(cmd) = &f.statements[0] else {
165 panic!()
166 };
167 let Argument::Bracket(b) = &cmd.arguments[1] else {
168 panic!()
169 };
170 assert_eq!(b.level, 0);
171 }
172
173 #[test]
174 fn bracket_argument_one_equals() {
175 let f = parse_ok("set(VAR [=[hello]=])\n");
176 let Statement::Command(cmd) = &f.statements[0] else {
177 panic!()
178 };
179 let Argument::Bracket(b) = &cmd.arguments[1] else {
180 panic!()
181 };
182 assert_eq!(b.level, 1);
183 }
184
185 #[test]
186 fn bracket_argument_two_equals() {
187 let f = parse_ok("set(VAR [==[contains ]= inside]==])\n");
188 let Statement::Command(cmd) = &f.statements[0] else {
189 panic!()
190 };
191 let Argument::Bracket(b) = &cmd.arguments[1] else {
192 panic!()
193 };
194 assert_eq!(b.level, 2);
195 }
196
197 #[test]
198 fn invalid_bracket_argument_returns_error() {
199 let err = parse("set(VAR [=[hello]==])\n").unwrap_err();
200 assert!(matches!(err, Error::Parse(_)));
201 }
202
203 #[test]
204 fn invalid_syntax_returns_parse_error_with_crate_owned_diagnostic() {
205 let err = parse("message(\n").unwrap_err();
206 let Error::Parse(parse_err) = err else {
207 panic!("expected parse error");
208 };
209
210 assert_eq!(parse_err.display_name, "<source>");
211 assert_eq!(parse_err.source_text.as_ref(), "message(\n");
212 assert_eq!(parse_err.start_line, 1);
213 assert!(!parse_err.diagnostic.message.is_empty());
214 assert_eq!(parse_err.diagnostic.line, 2);
215 assert_eq!(parse_err.diagnostic.column, 1);
216 }
217
218 #[test]
219 fn unterminated_genex_reports_char_based_column() {
220 let err = parse("message(é $<TARGET_FILE:foo)\n").unwrap_err();
221 let Error::Parse(parse_err) = err else {
222 panic!("expected parse error");
223 };
224
225 assert_eq!(
226 parse_err.diagnostic.message.as_ref(),
227 "unterminated generator expression"
228 );
229 assert_eq!(parse_err.diagnostic.line, 1);
230 assert_eq!(parse_err.diagnostic.column, 11);
231 }
232
233 #[test]
234 fn line_col_at_counts_multibyte_chars_as_single_columns() {
235 assert_eq!(line_col_at("message(é $<foo", 11), (1, 11));
236 }
237
238 #[test]
239 fn line_comment_standalone() {
240 let f = parse_ok("# this is a comment\n");
241 assert!(matches!(
242 &f.statements[0],
243 Statement::Comment(ast::Comment::Line(_))
244 ));
245 }
246
247 #[test]
248 fn bracket_comment() {
249 let f = parse_ok("#[[ multi\nline ]]\n");
250 assert!(matches!(
251 &f.statements[0],
252 Statement::Comment(ast::Comment::Bracket(_))
253 ));
254 }
255
256 #[test]
257 fn variable_reference_in_unquoted() {
258 let f = parse_ok("message(${MY_VAR})\n");
259 let Statement::Command(cmd) = &f.statements[0] else {
260 panic!()
261 };
262 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
263 }
264
265 #[test]
266 fn env_variable_reference() {
267 let f = parse_ok("message($ENV{PATH})\n");
268 let Statement::Command(cmd) = &f.statements[0] else {
269 panic!()
270 };
271 assert!(matches!(&cmd.arguments[0], Argument::Unquoted(_)));
272 }
273
274 #[test]
275 fn generator_expression() {
276 let f = parse_ok("target_link_libraries(foo $<TARGET_FILE:bar>)\n");
277 let Statement::Command(cmd) = &f.statements[0] else {
278 panic!()
279 };
280 assert_eq!(cmd.arguments.len(), 2);
281 }
282
283 #[test]
284 fn multiline_argument_list() {
285 let src = "target_link_libraries(mylib\n PUBLIC dep1\n PRIVATE dep2\n)\n";
286 let f = parse_ok(src);
287 let Statement::Command(cmd) = &f.statements[0] else {
288 panic!()
289 };
290 assert_eq!(cmd.name, "target_link_libraries");
291 assert_eq!(cmd.arguments.len(), 5);
292 }
293
294 #[test]
295 fn inline_bracket_comment_in_arguments() {
296 let src = "message(\"First\" #[[inline comment]] \"Second\")\n";
297 let f = parse_ok(src);
298 let Statement::Command(cmd) = &f.statements[0] else {
299 panic!()
300 };
301 assert_eq!(cmd.arguments.len(), 3);
302 assert!(matches!(
303 &cmd.arguments[1],
304 Argument::InlineComment(ast::Comment::Bracket(_))
305 ));
306 }
307
308 #[test]
309 fn line_comment_between_arguments() {
310 let src = "target_sources(foo\n PRIVATE a.cc # keep grouping\n b.cc\n)\n";
311 let f = parse_ok(src);
312 let Statement::Command(cmd) = &f.statements[0] else {
313 panic!()
314 };
315 assert!(cmd.arguments.iter().any(Argument::is_comment));
316 }
317
318 #[test]
319 fn trailing_comment_after_command() {
320 let src = "message(STATUS \"hello\") # trailing\n";
321 let f = parse_ok(src);
322 let Statement::Command(cmd) = &f.statements[0] else {
323 panic!()
324 };
325 assert!(matches!(cmd.trailing_comment, Some(ast::Comment::Line(_))));
326 }
327
328 #[test]
329 fn aligned_continuation_merges_into_trailing_comment() {
330 let src = "set(FOO bar) # first line\n # second line\n";
331 let f = parse_ok(src);
332 assert_eq!(f.statements.len(), 1);
333 let Statement::Command(cmd) = &f.statements[0] else {
334 panic!()
335 };
336 assert_eq!(
337 cmd.trailing_comment,
338 Some(ast::Comment::Line("# first line second line".to_owned()))
339 );
340 }
341
342 #[test]
343 fn multiple_aligned_continuations_merge() {
344 let src = "set(FOO bar) # line one\n # line two\n # line three\n";
345 let f = parse_ok(src);
346 assert_eq!(f.statements.len(), 1);
347 let Statement::Command(cmd) = &f.statements[0] else {
348 panic!()
349 };
350 assert_eq!(
351 cmd.trailing_comment,
352 Some(ast::Comment::Line(
353 "# line one line two line three".to_owned()
354 ))
355 );
356 }
357
358 #[test]
359 fn non_aligned_comment_stays_standalone() {
360 let src = "set(FOO bar) # trailing\n# standalone\n";
361 let f = parse_ok(src);
362 assert_eq!(f.statements.len(), 2);
363 let Statement::Command(cmd) = &f.statements[0] else {
364 panic!()
365 };
366 assert_eq!(
367 cmd.trailing_comment,
368 Some(ast::Comment::Line("# trailing".to_owned()))
369 );
370 assert!(matches!(f.statements[1], Statement::Comment(_)));
371 }
372
373 #[test]
374 fn blank_line_prevents_continuation_merge() {
375 let src = "set(FOO bar) # trailing\n\n # not a continuation\n";
376 let f = parse_ok(src);
377 assert_eq!(f.statements.len(), 3);
378 }
379
380 #[test]
381 fn empty_continuation_line_merges_without_adding_text() {
382 let src = "set(FOO bar) # first\n #\n # third\n";
383 let f = parse_ok(src);
384 assert_eq!(f.statements.len(), 1);
385 let Statement::Command(cmd) = &f.statements[0] else {
386 panic!()
387 };
388 assert_eq!(
389 cmd.trailing_comment,
390 Some(ast::Comment::Line("# first third".to_owned()))
391 );
392 }
393
394 #[test]
395 fn off_by_one_column_prevents_merge() {
396 let src = "set(FOO bar) # trailing\n # off by one\n";
397 let f = parse_ok(src);
398 assert_eq!(f.statements.len(), 2);
399 assert!(matches!(f.statements[1], Statement::Comment(_)));
400 }
401
402 #[test]
403 fn file_without_final_newline() {
404 let f = parse_ok("project(MyProject)");
405 assert_eq!(f.statements.len(), 1);
406 }
407
408 #[test]
409 fn blank_lines_are_preserved() {
410 let f = parse_ok("message(foo)\n\nproject(bar)\n");
411 assert_eq!(f.statements.len(), 3);
412 assert!(matches!(f.statements[1], Statement::BlankLines(1)));
413 }
414
415 #[test]
416 fn leading_blank_lines_are_preserved() {
417 let f = parse_ok("\nmessage(foo)\n");
418 assert!(matches!(f.statements[0], Statement::BlankLines(1)));
419 }
420
421 #[test]
422 fn escape_sequences_in_quoted() {
423 let f = parse_ok("message(\"tab\\there\\nnewline\")\n");
424 assert!(!f.statements.is_empty());
425 }
426
427 #[test]
428 fn escaped_quotes_in_quoted_argument_parse() {
429 let f = parse_ok("message(FATAL_ERROR \"foo \\\"Debug\\\"\")\n");
430 let Statement::Command(cmd) = &f.statements[0] else {
431 panic!()
432 };
433 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
434 assert_eq!(args, vec!["FATAL_ERROR", "\"foo \\\"Debug\\\"\""]);
435 }
436
437 #[test]
438 fn multiple_commands() {
439 let src = "cmake_minimum_required(VERSION 3.20)\nproject(MyProject)\n";
440 let f = parse_ok(src);
441 assert_eq!(f.statements.len(), 2);
442 }
443
444 #[test]
445 fn nested_variable_reference() {
446 let f = parse_ok("message(${${OUTER}})\n");
447 let Statement::Command(cmd) = &f.statements[0] else {
448 panic!()
449 };
450 assert_eq!(cmd.arguments.len(), 1);
451 }
452
453 #[test]
454 fn underscore_command_name_is_valid() {
455 let f = parse_ok("_my_command(ARG)\n");
456 let Statement::Command(cmd) = &f.statements[0] else {
457 panic!()
458 };
459 assert_eq!(cmd.name, "_my_command");
460 }
461
462 #[test]
463 fn nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
464 let f = parse_ok("if(FALSE AND (FALSE OR TRUE))\n");
465 let Statement::Command(cmd) = &f.statements[0] else {
466 panic!()
467 };
468 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
469 assert_eq!(args, vec!["FALSE", "AND", "(FALSE OR TRUE)"]);
470 }
471
472 #[test]
473 fn multiline_nested_parentheses_in_arguments_are_preserved_as_unquoted_tokens() {
474 let f = parse_ok(concat!(
475 "IF(NOT (have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n",
476 " AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE))\n",
477 ));
478 let Statement::Command(cmd) = &f.statements[0] else {
479 panic!()
480 };
481 let args: Vec<&str> = cmd.arguments.iter().map(Argument::as_str).collect();
482 assert_eq!(
483 args,
484 vec![
485 "NOT",
486 "(have_C__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE\n AND have_CXX__fsanitize_memory__fsanitize_memory_track_origins__U_FORTIFY_SOURCE)"
487 ]
488 );
489 }
490
491 #[test]
492 fn source_file_with_utf8_bom_parses() {
493 let f = parse_ok("\u{FEFF}project(MyProject)\n");
494 assert_eq!(f.statements.len(), 1);
495 }
496
497 #[test]
498 fn crlf_line_endings_parse() {
499 let f = parse_ok("set(FOO bar)\r\nset(BAZ qux)\r\n");
500 assert_eq!(f.statements.len(), 2);
501 }
502
503 #[test]
504 fn top_level_template_placeholder_parses() {
505 let f = parse_ok("@PACKAGE_INIT@\n");
506 assert_eq!(
507 f.statements,
508 vec![Statement::TemplatePlaceholder("@PACKAGE_INIT@".to_owned())]
509 );
510 }
511
512 #[test]
513 fn legacy_unquoted_argument_with_embedded_quotes_parses() {
514 let f = parse_ok("set(x -Da=\"b c\")\n");
515 let Statement::Command(cmd) = &f.statements[0] else {
516 panic!()
517 };
518 assert_eq!(cmd.arguments[1].as_str(), "-Da=\"b c\"");
519 }
520
521 #[test]
522 fn legacy_unquoted_argument_with_make_style_reference_parses() {
523 let f = parse_ok("set(x -Da=$(v))\n");
524 let Statement::Command(cmd) = &f.statements[0] else {
525 panic!()
526 };
527 assert_eq!(cmd.arguments[1].as_str(), "-Da=$(v)");
528 }
529
530 #[test]
531 fn legacy_unquoted_argument_with_embedded_parens_parses() {
532 let f = parse_ok(r##"set(VERSION_REGEX "#define CLI11_VERSION[ ]+"(.+)"")"##);
533 let Statement::Command(cmd) = &f.statements[0] else {
534 panic!()
535 };
536 assert_eq!(
537 cmd.arguments[1].as_str(),
538 "\"#define CLI11_VERSION[ \t]+\"(.+)\"\""
539 );
540 }
541
542 #[test]
543 fn legacy_unquoted_argument_starting_with_quoted_segment_parses() {
544 let f = parse_ok(r##"list(APPEND force-libcxx "CMAKE_CXX_COMPILER_ID STREQUAL "Clang"")"##);
545 let Statement::Command(cmd) = &f.statements[0] else {
546 panic!()
547 };
548 assert_eq!(
549 cmd.arguments[2].as_str(),
550 "\"CMAKE_CXX_COMPILER_ID STREQUAL \"Clang\"\""
551 );
552 }
553
554 #[test]
555 fn bracket_argument_ignores_mismatched_inner_closer() {
556 let src = "set(VAR [==[before ]====] after]==])\n";
557 let f = parse_ok(src);
558 let Statement::Command(cmd) = &f.statements[0] else {
559 panic!()
560 };
561 assert_eq!(cmd.arguments[1].as_str(), "[==[before ]====] after]==]");
562 }
563}