1use std::borrow::Cow;
2
3use nom::{
4 branch::alt,
5 bytes::complete::{is_a, is_not, tag, take_until},
6 character::complete::{alpha1, alphanumeric1, char, not_line_ending, space1},
7 combinator::{consumed, map, not, opt, recognize, value},
8 multi::{many0, many0_count, many1},
9 sequence::{delimited, pair, preceded, tuple},
10};
11
12use crate::Token;
13
14pub fn parse_cmakelists(src: &[u8]) -> Result<CMakeListsTokens<'_>, CMakeListsParseError> {
15 nom_parse_cmakelists(src)
16 .map(|(_, cm)| cm)
17 .map_err(From::from)
18}
19
20#[derive(Debug)]
21pub struct CMakeListsTokens<'cmlist> {
22 file: Vec<FileElement<'cmlist>>,
23}
24
25impl<'cmlist> CMakeListsTokens<'cmlist> {
26 pub(crate) fn command_invocations(&self) -> impl Iterator<Item = &CommandInvocation<'cmlist>> {
27 self.file.iter().filter_map(|file_element| {
28 if let CMakeLanguage::CommandInvocation((command_invocation, _)) = &file_element.element
29 {
30 Some(command_invocation)
31 } else {
32 None
33 }
34 })
35 }
36}
37
38#[allow(dead_code)]
39#[derive(Debug)]
40struct FileElement<'fe> {
41 source: Source<'fe>,
42 element: CMakeLanguage<'fe>,
43}
44
45struct Source<'s>(&'s [u8]);
46
47type IResult<I, O, E = nom::error::VerboseError<I>> = Result<(I, O), nom::Err<E>>;
48
49impl<'s> std::fmt::Debug for Source<'s> {
50 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51 f.debug_tuple("Source")
52 .field(&String::from_utf8_lossy(self.0))
53 .finish()
54 }
55}
56
57#[allow(dead_code)]
58#[derive(Debug)]
59enum CMakeLanguage<'cml> {
60 CommandInvocation((CommandInvocation<'cml>, LineEnding<'cml>)),
61 Formatting((Vec<Formatting<'cml>>, LineEnding<'cml>)),
62}
63
64#[allow(dead_code)]
65#[derive(Debug)]
66enum Formatting<'f> {
67 BracketComment(BracketComment<'f>),
68 Spaces(Spaces),
69}
70
71#[allow(dead_code)]
72#[derive(Debug)]
73pub(crate) struct CommandInvocation<'ci> {
74 spaces_before: Vec<Spaces>,
75 pub(crate) identifier: &'ci [u8],
76 spaces_after: Vec<Spaces>,
77 arguments: Arguments<'ci>,
78}
79
80impl<'ci> CommandInvocation<'ci> {
81 pub fn to_text_nodes(&'ci self) -> Vec<Token<'ci>> {
82 self.arguments.to_text_nodes()
83 }
84
85 pub fn identifier(&self) -> Cow<'_, [u8]> {
86 if !self.identifier.iter().any(u8::is_ascii_uppercase) {
87 Cow::Borrowed(self.identifier)
88 } else {
89 Cow::Owned(self.identifier.to_ascii_lowercase())
90 }
91 }
92}
93
94#[derive(Debug)]
95struct Arguments<'a> {
96 argument: Option<Argument<'a>>,
97 separated_arguments: Vec<SeparatedArguments<'a>>,
98}
99
100impl<'a> Arguments<'a> {
101 pub fn to_text_nodes(&'a self) -> Vec<Token<'a>> {
102 let mut text_nodes = vec![];
103 if let Some(arg_tn) = self.argument.as_ref().map(|arg| arg.to_text_node()) {
104 text_nodes.push(arg_tn);
105 }
106 text_nodes.extend(self.separated_arguments.iter().filter_map(|x| {
107 if let SeparatedArguments::Single((_, Some(arg))) = x {
108 Some(arg.to_text_node())
109 } else {
110 None
111 }
112 }));
113 text_nodes
114 }
115}
116
117#[allow(dead_code)]
118#[derive(Debug)]
119enum SeparatedArguments<'a> {
120 Single((Vec<Separation<'a>>, Option<Argument<'a>>)),
121 Multi((Vec<Separation<'a>>, Box<Arguments<'a>>)),
122}
123
124#[allow(dead_code)]
125#[derive(Debug)]
126enum Separation<'a> {
127 Space(Spaces),
128 LineEnding(LineEnding<'a>),
129}
130
131#[derive(Debug)]
132enum Argument<'a> {
133 Bracket(BracketArgument<'a>),
134 Quoted(QuotedArgument),
135 Unquoted(UnquotedArgument<'a>),
136}
137
138impl<'a> Argument<'a> {
139 fn to_text_node(&'a self) -> Token<'a> {
140 match self {
141 Argument::Bracket(ba) => Token::text_node(ba.bracket_content, false),
142 Argument::Quoted(qa) => Token::text_node(&qa.0, true),
143 Argument::Unquoted(ua) => ua.to_text_node(),
144 }
145 }
146}
147
148#[allow(dead_code)]
149#[derive(Debug)]
150struct BracketComment<'bc>(BracketArgument<'bc>);
151
152#[allow(dead_code)]
153#[derive(Debug)]
154struct BracketArgument<'ba> {
155 len: usize,
156 bracket_content: &'ba [u8],
157}
158
159#[derive(Debug)]
160struct QuotedArgument(Vec<u8>);
161
162#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
163enum UnquotedArgument<'ua> {
164 Normal(Vec<u8>),
165 Legacy(&'ua [u8]),
166}
167
168impl<'ua> UnquotedArgument<'ua> {
169 fn to_text_node(&'ua self) -> Token<'ua> {
170 match self {
171 UnquotedArgument::Normal(n) => Token::text_node(n, false),
172 UnquotedArgument::Legacy(l) => Token::text_node(l, false),
173 }
174 }
175}
176
177#[allow(dead_code)]
178#[derive(Debug)]
179struct LineComment<'lc>(&'lc [u8]);
180
181#[allow(dead_code)]
182#[derive(Debug)]
183struct LineEnding<'le> {
184 line_comment: Option<LineComment<'le>>,
185}
186
187#[allow(dead_code)]
188#[derive(Debug)]
189struct Spaces(usize);
190
191#[derive(Debug, thiserror::Error)]
192pub enum CMakeListsParseError {
193 #[error("unknown")]
194 Unknown,
195 #[error("parser: {0}")]
196 Parser(String),
197}
198
199impl From<nom::Err<nom::error::VerboseError<&[u8]>>> for CMakeListsParseError {
200 fn from(value: nom::Err<nom::error::VerboseError<&[u8]>>) -> Self {
201 Self::Parser(value.to_string())
202 }
203}
204
205fn nom_parse_cmakelists(src: &[u8]) -> IResult<&[u8], CMakeListsTokens<'_>> {
206 many0(file_element)(src).map(|(src, file)| (src, CMakeListsTokens { file }))
207}
208
209fn file_element(src: &[u8]) -> IResult<&[u8], FileElement<'_>> {
210 alt((
211 map(
212 consumed(tuple((command_invocation, line_ending))),
213 |(source, command_invocation)| FileElement {
214 source: Source(source),
215 element: CMakeLanguage::CommandInvocation(command_invocation),
216 },
217 ),
218 map(
219 consumed(tuple((
220 many0(alt((
221 map(bracket_comment, Formatting::BracketComment),
222 map(spaces, Formatting::Spaces),
223 ))),
224 line_ending,
225 ))),
226 |(source, formatting)| FileElement {
227 source: Source(source),
228 element: CMakeLanguage::Formatting(formatting),
229 },
230 ),
231 ))(src)
232}
233
234fn command_invocation(src: &[u8]) -> IResult<&[u8], CommandInvocation<'_>> {
235 map(
236 tuple((many0(spaces), identifier, many0(spaces), scoped_arguments)),
237 |(spaces_before, identifier, spaces_after, arguments)| CommandInvocation {
238 spaces_before,
239 identifier,
240 spaces_after,
241 arguments,
242 },
243 )(src)
244}
245
246fn scoped_arguments(src: &[u8]) -> IResult<&[u8], Arguments<'_>> {
247 delimited(char('('), arguments, char(')'))(src)
248}
249
250fn arguments(src: &[u8]) -> IResult<&[u8], Arguments<'_>> {
251 map(
252 pair(opt(argument), many0(separated_arguments)),
253 |(argument, separated_arguments)| Arguments {
254 argument,
255 separated_arguments,
256 },
257 )(src)
258}
259
260fn separated_arguments(src: &[u8]) -> IResult<&[u8], SeparatedArguments<'_>> {
261 alt((
262 map(
263 pair(many1(separation), opt(argument)),
264 SeparatedArguments::Single,
265 ),
266 map(
267 pair(many0(separation), map(scoped_arguments, Box::new)),
268 SeparatedArguments::Multi,
269 ),
270 ))(src)
271}
272
273fn separation(src: &[u8]) -> IResult<&[u8], Separation<'_>> {
274 alt((
275 map(spaces, Separation::Space),
276 map(line_ending, Separation::LineEnding),
277 ))(src)
278}
279
280fn argument(src: &[u8]) -> IResult<&[u8], Argument<'_>> {
281 alt((
282 map(bracket_argument, Argument::Bracket),
283 map(quoted_argument, Argument::Quoted),
284 map(unquoted_argument, Argument::Unquoted),
285 ))(src)
286}
287
288fn bracket_argument(src: &[u8]) -> IResult<&[u8], BracketArgument<'_>> {
289 let (src, _) = char('[')(src)?;
290 let (src, len) = many0_count(char('='))(src)?;
291 let bracket_close = format!("]{}]", "=".repeat(len));
292 let (src, _) = char('[')(src)?;
293 let (src, _) = opt(nom::character::complete::line_ending)(src)?;
294 let (src, bracket_content) = take_until(bracket_close.as_bytes())(src)?;
295 let (src, _) = tag(bracket_close.as_bytes())(src)?;
296 Ok((
297 src,
298 BracketArgument {
299 len,
300 bracket_content,
301 },
302 ))
303}
304
305fn quoted_argument(src: &[u8]) -> IResult<&[u8], QuotedArgument> {
306 map(
307 delimited(tag(b"\""), many0(quoted_element), tag(b"\"")),
308 |x| QuotedArgument(x.into_iter().flatten().collect()),
309 )(src)
310}
311
312fn quoted_element(src: &[u8]) -> IResult<&[u8], Vec<u8>> {
313 alt((
314 map(is_not("\\\""), |x: &[u8]| x.to_vec()),
315 map(escape_sequence, |x| x.to_vec()),
316 value(
317 Vec::default(),
318 pair(char('\\'), nom::character::complete::line_ending),
319 ),
320 ))(src)
321}
322
323fn escape_sequence(src: &[u8]) -> IResult<&[u8], &[u8]> {
324 preceded(
325 char('\\'),
326 alt((
327 is_a("()#\" \\$@^;"),
328 value(&b"\t"[..], char('t')),
329 value(&b"\r"[..], char('r')),
330 value(&b"\n"[..], char('n')),
331 )),
332 )(src)
333}
334
335fn unquoted_argument(src: &[u8]) -> IResult<&[u8], UnquotedArgument<'_>> {
336 alt((
337 map(unquoted_legacy, UnquotedArgument::Legacy),
338 map(many1(unquoted_element), |x| {
339 UnquotedArgument::Normal(x.iter().flat_map(|x| x.to_vec()).collect())
340 }),
341 ))(src)
342}
343
344fn unquoted_element(src: &[u8]) -> IResult<&[u8], &[u8]> {
345 alt((is_not(" \t\r\n()#\"\\"), escape_sequence))(src)
346}
347
348fn unquoted_legacy(src: &[u8]) -> IResult<&[u8], &[u8]> {
349 recognize(pair(
350 alt((
351 value((), is_not(" \t\r\n()#\"\\$")),
352 value((), delimited(tag(b"$("), is_not(")"), tag(b")"))),
353 )),
354 many1(alt((
355 value((), is_not(" \t\r\n()#\"\\$")),
356 value((), delimited(tag(b"$("), is_not(")"), tag(b")"))),
357 value((), delimited(char('"'), is_not("\""), char('"'))),
358 ))),
359 ))(src)
360}
361
362fn identifier(src: &[u8]) -> IResult<&[u8], &[u8]> {
363 recognize(pair(
364 alt((alpha1, tag("_"))),
365 many0_count(alt((alphanumeric1, tag("_")))),
366 ))(src)
367}
368
369fn line_ending(src: &[u8]) -> IResult<&[u8], LineEnding<'_>> {
370 map(
371 tuple((opt(line_comment), nom::character::complete::line_ending)),
372 |(line_comment, _)| LineEnding { line_comment },
373 )(src)
374}
375
376fn line_comment(src: &[u8]) -> IResult<&[u8], LineComment<'_>> {
377 preceded(
378 char('#'),
379 map(
380 recognize(tuple((
381 not(tuple((char('['), many0(char('=')), char('[')))),
382 not_line_ending,
383 ))),
384 LineComment,
385 ),
386 )(src)
387}
388
389fn bracket_comment(src: &[u8]) -> IResult<&[u8], BracketComment<'_>> {
390 map(preceded(char('#'), bracket_argument), BracketComment)(src)
391}
392
393fn spaces(src: &[u8]) -> IResult<&[u8], Spaces> {
394 map(space1, |spaces: &[u8]| Spaces(spaces.len()))(src)
395}
396
397#[cfg(test)]
398mod tests {
399 trait CheckNomError<O> {
400 fn debug_unwrap(self) -> (&'static [u8], O);
401 }
402
403 impl<O> CheckNomError<O> for super::IResult<&'static [u8], O> {
404 fn debug_unwrap(self) -> (&'static [u8], O) {
405 match self {
406 Ok(ok) => ok,
407 Err(err) => match err {
408 nom::Err::Incomplete(_e) => panic!("Incomplete: {err}"),
409 nom::Err::Error(e) => {
410 let mut msgs = vec![];
411 for (src, knd) in e.errors {
412 msgs.push(format!(
413 "{knd:?}: '{}'",
414 String::from_utf8_lossy(&src[..src.len().min(50)])
415 ));
416 }
417 panic!("Error: {}", msgs.join("\n"));
418 }
419 nom::Err::Failure(e) => {
420 let mut msgs = vec![];
421 for (src, knd) in e.errors {
422 msgs.push(format!(
423 "{knd:?}: '{}'",
424 String::from_utf8_lossy(&src[..src.len().min(50)])
425 ));
426 }
427 panic!("Failure: {}", msgs.join("\n"));
428 }
429 },
430 }
431 }
432 }
433
434 #[test]
435 fn parse_cmakelists() {
436 let ex1 = include_bytes!("../../fixture/CMakeLists.txt.ex1");
437 let _ = super::parse_cmakelists(ex1).unwrap();
438
439 let ex2 = include_bytes!("../../fixture/CMakeLists.txt.ex2");
440 let _ = super::parse_cmakelists(ex2).unwrap();
441
442 let ex3 = include_bytes!("../../fixture/CMakeLists.txt.ex3");
443 let _ = super::parse_cmakelists(ex3).unwrap();
444
445 let ex4 = include_bytes!("../../fixture/CMakeLists.txt.ex4");
446 let _ = super::parse_cmakelists(ex4).unwrap();
447 }
448
449 #[test]
450 fn file_element() {
451 use super::file_element;
452
453 let input = include_bytes!("../../fixture/CMakeLists.txt.ex2");
454 let (src, _) = file_element(input).debug_unwrap();
455 let (src, _) = file_element(src).unwrap();
456 let (_, _) = file_element(src).unwrap();
457 }
458
459 #[test]
460 fn bracket_argument() {
461 use super::bracket_argument;
462 let (_, ba) = bracket_argument(b"[[hello]]").unwrap();
463 assert_eq!(ba.bracket_content, b"hello");
464 let (_, ba) = bracket_argument(b"[=[hel]]lo]=]").unwrap();
465 assert_eq!(ba.bracket_content, b"hel]]lo");
466 let (_, ba) = bracket_argument(b"[=[hel]]\r\nlo]=]").unwrap();
467 assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
468 let (_, ba) = bracket_argument(b"[=[\r\nhel]]\r\nlo]=]").unwrap();
469 assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
470 let (_, ba) = bracket_argument(b"[=[\nhel]]\r\nlo]=]").unwrap();
471 assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
472 }
473
474 #[test]
475 fn line_comment() {
476 use super::line_comment;
477
478 let (_, lc) = line_comment(b"#").unwrap();
479 assert_eq!(lc.0, b"");
480 let (_, lc) = line_comment(b"#hello").unwrap();
481 assert_eq!(lc.0, b"hello");
482 let (_, lc) = line_comment(b"# [[hello").unwrap();
483 assert_eq!(lc.0, b" [[hello");
484 let (_, lc) = line_comment(b"#\r\n").unwrap();
485 assert_eq!(lc.0, b"");
486
487 let res = line_comment(b"#[[hello");
488 assert!(res.is_err());
489 let res = line_comment(b"#[=[hello");
490 assert!(res.is_err());
491 }
492
493 #[test]
494 fn quoted_argument() {
495 use super::quoted_argument;
496
497 let (_, qa) = quoted_argument(br#""hello""#).unwrap();
498 assert_eq!(&qa.0, b"hello");
499 let (_, qa) = quoted_argument(
500 br#""hello\
501, world""#,
502 )
503 .unwrap();
504 assert_eq!(&qa.0, b"hello, world");
505 let (_, qa) = quoted_argument(br#""hello\nworld""#).unwrap();
506 assert_eq!(&qa.0, b"hello\nworld");
507 }
508
509 #[test]
510 fn unquoted_argument() {
511 use super::{unquoted_argument, UnquotedArgument};
512
513 let (_, ua) = unquoted_argument(b"hello").unwrap();
514 assert_eq!(ua, UnquotedArgument::Normal(b"hello".to_vec()));
515
516 let (_, ua) = unquoted_argument(b"a=\"b\"").unwrap();
517 assert_eq!(ua, UnquotedArgument::Legacy(b"a=\"b\""));
518
519 let (_, ua) = unquoted_argument(b"-Da=\"b c\"").unwrap();
520 assert_eq!(ua, UnquotedArgument::Legacy(b"-Da=\"b c\""));
521
522 let (_, ua) = unquoted_argument(b"-Da=$(v)").unwrap();
523 assert_eq!(ua, UnquotedArgument::Legacy(b"-Da=$(v)"));
524
525 let (_, ua) = unquoted_argument(br#"a" "b"c"d"#).unwrap();
526 assert_eq!(ua, UnquotedArgument::Legacy(br#"a" "b"c"d"#));
527 }
528
529 #[test]
530 fn unquoted_legacy() {
531 use super::unquoted_legacy;
532 let (_, ua) = unquoted_legacy(b"a=\"b\"").unwrap();
533 assert_eq!(ua, b"a=\"b\"");
534
535 let (_, ua) = unquoted_legacy(b"-Da=\"b c\"").unwrap();
536 assert_eq!(ua, b"-Da=\"b c\"");
537
538 let (_, ua) = unquoted_legacy(b"-Da=$(v)").unwrap();
539 assert_eq!(ua, b"-Da=$(v)");
540
541 let (_, ua) = unquoted_legacy(br#"a" "b"c"d"#).unwrap();
542 assert_eq!(ua, br#"a" "b"c"d"#);
543 }
544
545 #[test]
546 fn scoped_arguments() {
547 use super::scoped_arguments;
548
549 let (_, _sa) = scoped_arguments(b"(hello)").debug_unwrap();
550
551 let (_, _sa) = scoped_arguments(b"(hello world)").debug_unwrap();
552
553 let (_, _sa) =
554 scoped_arguments(b"(LibXml2 PRIVATE SYSCONFDIR=\"${CMAKE_INSTALL_FULL_SYSCONFDIR}\")")
555 .debug_unwrap();
556 }
557
558 #[test]
559 fn arguments() {
560 use super::arguments;
561
562 let (_, _) = arguments(b"hello").debug_unwrap();
563
564 let (_, _) = arguments(b"hello world").debug_unwrap();
565 }
566}