1use std::borrow::Cow;
2
3use nom::{
4 branch::alt,
5 bytes::complete::{is_a, is_not, tag, take_until},
6 character::complete::{alpha1, alphanumeric1, char, not_line_ending, space1},
7 combinator::{consumed, map, not, opt, recognize, value},
8 multi::{many0, many0_count, many1},
9 sequence::{delimited, pair, preceded, tuple},
10};
11
12use crate::Token;
13
14pub fn parse_cmakelists(src: &[u8]) -> Result<CMakeListsTokens, CMakeListsParseError> {
15 nom_parse_cmakelists(src)
16 .map(|(_, cm)| cm)
17 .map_err(From::from)
18}
19
20#[derive(Debug)]
21pub struct CMakeListsTokens<'cmlist> {
22 file: Vec<FileElement<'cmlist>>,
23}
24
25impl<'cmlist> CMakeListsTokens<'cmlist> {
26 pub(crate) fn command_invocations(&self) -> impl Iterator<Item = &CommandInvocation<'cmlist>> {
27 self.file.iter().filter_map(|file_element| {
28 if let CMakeLanguage::CommandInvocation((command_invocation, _)) = &file_element.element
29 {
30 Some(command_invocation)
31 } else {
32 None
33 }
34 })
35 }
36}
37
38#[derive(Debug)]
39struct FileElement<'fe> {
40 source: Source<'fe>,
41 element: CMakeLanguage<'fe>,
42}
43
44struct Source<'s>(&'s [u8]);
45
46type IResult<I, O, E = nom::error::VerboseError<I>> = Result<(I, O), nom::Err<E>>;
47
48impl<'s> std::fmt::Debug for Source<'s> {
49 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 f.debug_tuple("Source")
51 .field(&String::from_utf8_lossy(self.0))
52 .finish()
53 }
54}
55
56#[derive(Debug)]
57enum CMakeLanguage<'cml> {
58 CommandInvocation((CommandInvocation<'cml>, LineEnding<'cml>)),
59 Formatting((Vec<Formatting<'cml>>, LineEnding<'cml>)),
60}
61
62#[derive(Debug)]
63enum Formatting<'f> {
64 BracketComment(BracketComment<'f>),
65 Spaces(Spaces),
66}
67
68#[derive(Debug)]
69pub(crate) struct CommandInvocation<'ci> {
70 spaces_before: Vec<Spaces>,
71 pub(crate) identifier: &'ci [u8],
72 spaces_after: Vec<Spaces>,
73 arguments: Arguments<'ci>,
74}
75
76impl<'ci> CommandInvocation<'ci> {
77 pub fn to_text_nodes(&'ci self) -> Vec<Token<'ci>> {
78 self.arguments.to_text_nodes()
79 }
80
81 pub fn identifier(&self) -> Cow<[u8]> {
82 if !self.identifier.iter().any(u8::is_ascii_uppercase) {
83 Cow::Borrowed(self.identifier)
84 } else {
85 Cow::Owned(self.identifier.to_ascii_lowercase())
86 }
87 }
88}
89
90#[derive(Debug)]
91struct Arguments<'a> {
92 argument: Option<Argument<'a>>,
93 separated_arguments: Vec<SeparatedArguments<'a>>,
94}
95
96impl<'a> Arguments<'a> {
97 pub fn to_text_nodes(&'a self) -> Vec<Token<'a>> {
98 let mut text_nodes = vec![];
99 if let Some(arg_tn) = self.argument.as_ref().map(|arg| arg.to_text_node()) {
100 text_nodes.push(arg_tn);
101 }
102 text_nodes.extend(self.separated_arguments.iter().filter_map(|x| {
103 if let SeparatedArguments::Single((_, Some(arg))) = x {
104 Some(arg.to_text_node())
105 } else {
106 None
107 }
108 }));
109 text_nodes
110 }
111}
112
113#[derive(Debug)]
114enum SeparatedArguments<'a> {
115 Single((Vec<Separation<'a>>, Option<Argument<'a>>)),
116 Multi((Vec<Separation<'a>>, Box<Arguments<'a>>)),
117}
118
119#[derive(Debug)]
120enum Separation<'a> {
121 Space(Spaces),
122 LineEnding(LineEnding<'a>),
123}
124
125#[derive(Debug)]
126enum Argument<'a> {
127 Bracket(BracketArgument<'a>),
128 Quoted(QuotedArgument),
129 Unquoted(UnquotedArgument<'a>),
130}
131
132impl<'a> Argument<'a> {
133 fn to_text_node(&'a self) -> Token<'a> {
134 match self {
135 Argument::Bracket(ba) => Token::text_node(ba.bracket_content, false),
136 Argument::Quoted(qa) => Token::text_node(&qa.0, true),
137 Argument::Unquoted(ua) => ua.to_text_node(),
138 }
139 }
140}
141
142#[derive(Debug)]
143struct BracketComment<'bc>(BracketArgument<'bc>);
144
145#[derive(Debug)]
146struct BracketArgument<'ba> {
147 len: usize,
148 bracket_content: &'ba [u8],
149}
150
151#[derive(Debug)]
152struct QuotedArgument(Vec<u8>);
153
154#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
155enum UnquotedArgument<'ua> {
156 Normal(Vec<u8>),
157 Legacy(&'ua [u8]),
158}
159
160impl<'ua> UnquotedArgument<'ua> {
161 fn to_text_node(&'ua self) -> Token<'ua> {
162 match self {
163 UnquotedArgument::Normal(n) => Token::text_node(n, false),
164 UnquotedArgument::Legacy(l) => Token::text_node(l, false),
165 }
166 }
167}
168
169#[derive(Debug)]
170struct LineComment<'lc>(&'lc [u8]);
171
172#[derive(Debug)]
173struct LineEnding<'le> {
174 line_comment: Option<LineComment<'le>>,
175}
176
177#[derive(Debug)]
178struct Spaces(usize);
179
180#[derive(Debug, thiserror::Error)]
181pub enum CMakeListsParseError {
182 #[error("unknown")]
183 Unknown,
184 #[error("parser: {0}")]
185 Parser(String),
186}
187
188impl From<nom::Err<nom::error::VerboseError<&[u8]>>> for CMakeListsParseError {
189 fn from(value: nom::Err<nom::error::VerboseError<&[u8]>>) -> Self {
190 Self::Parser(value.to_string())
191 }
192}
193
194fn nom_parse_cmakelists(src: &[u8]) -> IResult<&[u8], CMakeListsTokens<'_>> {
195 many0(file_element)(src).map(|(src, file)| (src, CMakeListsTokens { file }))
196}
197
198fn file_element(src: &[u8]) -> IResult<&[u8], FileElement<'_>> {
199 alt((
200 map(
201 consumed(tuple((command_invocation, line_ending))),
202 |(source, command_invocation)| FileElement {
203 source: Source(source),
204 element: CMakeLanguage::CommandInvocation(command_invocation),
205 },
206 ),
207 map(
208 consumed(tuple((
209 many0(alt((
210 map(bracket_comment, Formatting::BracketComment),
211 map(spaces, Formatting::Spaces),
212 ))),
213 line_ending,
214 ))),
215 |(source, formatting)| FileElement {
216 source: Source(source),
217 element: CMakeLanguage::Formatting(formatting),
218 },
219 ),
220 ))(src)
221}
222
223fn command_invocation(src: &[u8]) -> IResult<&[u8], CommandInvocation> {
224 map(
225 tuple((many0(spaces), identifier, many0(spaces), scoped_arguments)),
226 |(spaces_before, identifier, spaces_after, arguments)| CommandInvocation {
227 spaces_before,
228 identifier,
229 spaces_after,
230 arguments,
231 },
232 )(src)
233}
234
235fn scoped_arguments(src: &[u8]) -> IResult<&[u8], Arguments<'_>> {
236 delimited(char('('), arguments, char(')'))(src)
237}
238
239fn arguments(src: &[u8]) -> IResult<&[u8], Arguments<'_>> {
240 map(
241 pair(opt(argument), many0(separated_arguments)),
242 |(argument, separated_arguments)| Arguments {
243 argument,
244 separated_arguments,
245 },
246 )(src)
247}
248
249fn separated_arguments(src: &[u8]) -> IResult<&[u8], SeparatedArguments<'_>> {
250 alt((
251 map(
252 pair(many1(separation), opt(argument)),
253 SeparatedArguments::Single,
254 ),
255 map(
256 pair(many0(separation), map(scoped_arguments, Box::new)),
257 SeparatedArguments::Multi,
258 ),
259 ))(src)
260}
261
262fn separation(src: &[u8]) -> IResult<&[u8], Separation<'_>> {
263 alt((
264 map(spaces, Separation::Space),
265 map(line_ending, Separation::LineEnding),
266 ))(src)
267}
268
269fn argument(src: &[u8]) -> IResult<&[u8], Argument<'_>> {
270 alt((
271 map(bracket_argument, Argument::Bracket),
272 map(quoted_argument, Argument::Quoted),
273 map(unquoted_argument, Argument::Unquoted),
274 ))(src)
275}
276
277fn bracket_argument(src: &[u8]) -> IResult<&[u8], BracketArgument> {
278 let (src, _) = char('[')(src)?;
279 let (src, len) = many0_count(char('='))(src)?;
280 let bracket_close = format!("]{}]", "=".repeat(len));
281 let (src, _) = char('[')(src)?;
282 let (src, _) = opt(nom::character::complete::line_ending)(src)?;
283 let (src, bracket_content) = take_until(bracket_close.as_bytes())(src)?;
284 let (src, _) = tag(bracket_close.as_bytes())(src)?;
285 Ok((
286 src,
287 BracketArgument {
288 len,
289 bracket_content,
290 },
291 ))
292}
293
294fn quoted_argument(src: &[u8]) -> IResult<&[u8], QuotedArgument> {
295 map(
296 delimited(tag(b"\""), many0(quoted_element), tag(b"\"")),
297 |x| QuotedArgument(x.into_iter().flatten().collect()),
298 )(src)
299}
300
301fn quoted_element(src: &[u8]) -> IResult<&[u8], Vec<u8>> {
302 alt((
303 map(is_not("\\\""), |x: &[u8]| x.to_vec()),
304 map(escape_sequence, |x| x.to_vec()),
305 value(
306 Vec::default(),
307 pair(char('\\'), nom::character::complete::line_ending),
308 ),
309 ))(src)
310}
311
312fn escape_sequence(src: &[u8]) -> IResult<&[u8], &[u8]> {
313 preceded(
314 char('\\'),
315 alt((
316 is_a("()#\" \\$@^;"),
317 value(&b"\t"[..], char('t')),
318 value(&b"\r"[..], char('r')),
319 value(&b"\n"[..], char('n')),
320 )),
321 )(src)
322}
323
324fn unquoted_argument(src: &[u8]) -> IResult<&[u8], UnquotedArgument> {
325 alt((
326 map(unquoted_legacy, UnquotedArgument::Legacy),
327 map(many1(unquoted_element), |x| {
328 UnquotedArgument::Normal(x.iter().flat_map(|x| x.to_vec()).collect())
329 }),
330 ))(src)
331}
332
333fn unquoted_element(src: &[u8]) -> IResult<&[u8], &[u8]> {
334 alt((is_not(" \t\r\n()#\"\\"), escape_sequence))(src)
335}
336
337fn unquoted_legacy(src: &[u8]) -> IResult<&[u8], &[u8]> {
338 recognize(pair(
339 alt((
340 value((), is_not(" \t\r\n()#\"\\$")),
341 value((), delimited(tag(b"$("), is_not(")"), tag(b")"))),
342 )),
343 many1(alt((
344 value((), is_not(" \t\r\n()#\"\\$")),
345 value((), delimited(tag(b"$("), is_not(")"), tag(b")"))),
346 value((), delimited(char('"'), is_not("\""), char('"'))),
347 ))),
348 ))(src)
349}
350
351fn identifier(src: &[u8]) -> IResult<&[u8], &[u8]> {
352 recognize(pair(
353 alt((alpha1, tag("_"))),
354 many0_count(alt((alphanumeric1, tag("_")))),
355 ))(src)
356}
357
358fn line_ending(src: &[u8]) -> IResult<&[u8], LineEnding> {
359 map(
360 tuple((opt(line_comment), nom::character::complete::line_ending)),
361 |(line_comment, _)| LineEnding { line_comment },
362 )(src)
363}
364
365fn line_comment(src: &[u8]) -> IResult<&[u8], LineComment> {
366 preceded(
367 char('#'),
368 map(
369 recognize(tuple((
370 not(tuple((char('['), many0(char('=')), char('[')))),
371 not_line_ending,
372 ))),
373 LineComment,
374 ),
375 )(src)
376}
377
378fn bracket_comment(src: &[u8]) -> IResult<&[u8], BracketComment> {
379 map(preceded(char('#'), bracket_argument), BracketComment)(src)
380}
381
382fn spaces(src: &[u8]) -> IResult<&[u8], Spaces> {
383 map(space1, |spaces: &[u8]| Spaces(spaces.len()))(src)
384}
385
386#[cfg(test)]
387mod tests {
388 trait CheckNomError<O> {
389 fn debug_unwrap(self) -> (&'static [u8], O);
390 }
391
392 impl<O> CheckNomError<O> for super::IResult<&'static [u8], O> {
393 fn debug_unwrap(self) -> (&'static [u8], O) {
394 match self {
395 Ok(ok) => ok,
396 Err(err) => match err {
397 nom::Err::Incomplete(_e) => panic!("Incomplete: {err}"),
398 nom::Err::Error(e) => {
399 let mut msgs = vec![];
400 for (src, knd) in e.errors {
401 msgs.push(format!(
402 "{knd:?}: '{}'",
403 String::from_utf8_lossy(&src[..src.len().min(50)])
404 ));
405 }
406 panic!("Error: {}", msgs.join("\n"));
407 }
408 nom::Err::Failure(e) => {
409 let mut msgs = vec![];
410 for (src, knd) in e.errors {
411 msgs.push(format!(
412 "{knd:?}: '{}'",
413 String::from_utf8_lossy(&src[..src.len().min(50)])
414 ));
415 }
416 panic!("Failure: {}", msgs.join("\n"));
417 }
418 },
419 }
420 }
421 }
422
423 #[test]
424 fn parse_cmakelists() {
425 let ex1 = include_bytes!("../../fixture/CMakeLists.txt.ex1");
426 let _ = super::parse_cmakelists(ex1).unwrap();
427
428 let ex2 = include_bytes!("../../fixture/CMakeLists.txt.ex2");
429 let _ = super::parse_cmakelists(ex2).unwrap();
430
431 let ex3 = include_bytes!("../../fixture/CMakeLists.txt.ex3");
432 let _ = super::parse_cmakelists(ex3).unwrap();
433
434 let ex4 = include_bytes!("../../fixture/CMakeLists.txt.ex4");
435 let _ = super::parse_cmakelists(ex4).unwrap();
436 }
437
438 #[test]
439 fn file_element() {
440 use super::file_element;
441
442 let input = include_bytes!("../../fixture/CMakeLists.txt.ex2");
443 let (src, _) = file_element(input).debug_unwrap();
444 let (src, _) = file_element(src).unwrap();
445 let (_, _) = file_element(src).unwrap();
446 }
447
448 #[test]
449 fn bracket_argument() {
450 use super::bracket_argument;
451 let (_, ba) = bracket_argument(b"[[hello]]").unwrap();
452 assert_eq!(ba.bracket_content, b"hello");
453 let (_, ba) = bracket_argument(b"[=[hel]]lo]=]").unwrap();
454 assert_eq!(ba.bracket_content, b"hel]]lo");
455 let (_, ba) = bracket_argument(b"[=[hel]]\r\nlo]=]").unwrap();
456 assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
457 let (_, ba) = bracket_argument(b"[=[\r\nhel]]\r\nlo]=]").unwrap();
458 assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
459 let (_, ba) = bracket_argument(b"[=[\nhel]]\r\nlo]=]").unwrap();
460 assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
461 }
462
463 #[test]
464 fn line_comment() {
465 use super::line_comment;
466
467 let (_, lc) = line_comment(b"#").unwrap();
468 assert_eq!(lc.0, b"");
469 let (_, lc) = line_comment(b"#hello").unwrap();
470 assert_eq!(lc.0, b"hello");
471 let (_, lc) = line_comment(b"# [[hello").unwrap();
472 assert_eq!(lc.0, b" [[hello");
473 let (_, lc) = line_comment(b"#\r\n").unwrap();
474 assert_eq!(lc.0, b"");
475
476 let res = line_comment(b"#[[hello");
477 assert!(res.is_err());
478 let res = line_comment(b"#[=[hello");
479 assert!(res.is_err());
480 }
481
482 #[test]
483 fn quoted_argument() {
484 use super::quoted_argument;
485
486 let (_, qa) = quoted_argument(br#""hello""#).unwrap();
487 assert_eq!(&qa.0, b"hello");
488 let (_, qa) = quoted_argument(
489 br#""hello\
490, world""#,
491 )
492 .unwrap();
493 assert_eq!(&qa.0, b"hello, world");
494 let (_, qa) = quoted_argument(br#""hello\nworld""#).unwrap();
495 assert_eq!(&qa.0, b"hello\nworld");
496 }
497
498 #[test]
499 fn unquoted_argument() {
500 use super::{unquoted_argument, UnquotedArgument};
501
502 let (_, ua) = unquoted_argument(b"hello").unwrap();
503 assert_eq!(ua, UnquotedArgument::Normal(b"hello".to_vec()));
504
505 let (_, ua) = unquoted_argument(b"a=\"b\"").unwrap();
506 assert_eq!(ua, UnquotedArgument::Legacy(b"a=\"b\""));
507
508 let (_, ua) = unquoted_argument(b"-Da=\"b c\"").unwrap();
509 assert_eq!(ua, UnquotedArgument::Legacy(b"-Da=\"b c\""));
510
511 let (_, ua) = unquoted_argument(b"-Da=$(v)").unwrap();
512 assert_eq!(ua, UnquotedArgument::Legacy(b"-Da=$(v)"));
513
514 let (_, ua) = unquoted_argument(br#"a" "b"c"d"#).unwrap();
515 assert_eq!(ua, UnquotedArgument::Legacy(br#"a" "b"c"d"#));
516 }
517
518 #[test]
519 fn unquoted_legacy() {
520 use super::unquoted_legacy;
521 let (_, ua) = unquoted_legacy(b"a=\"b\"").unwrap();
522 assert_eq!(ua, b"a=\"b\"");
523
524 let (_, ua) = unquoted_legacy(b"-Da=\"b c\"").unwrap();
525 assert_eq!(ua, b"-Da=\"b c\"");
526
527 let (_, ua) = unquoted_legacy(b"-Da=$(v)").unwrap();
528 assert_eq!(ua, b"-Da=$(v)");
529
530 let (_, ua) = unquoted_legacy(br#"a" "b"c"d"#).unwrap();
531 assert_eq!(ua, br#"a" "b"c"d"#);
532 }
533
534 #[test]
535 fn scoped_arguments() {
536 use super::scoped_arguments;
537
538 let (_, _sa) = scoped_arguments(b"(hello)").debug_unwrap();
539
540 let (_, _sa) = scoped_arguments(b"(hello world)").debug_unwrap();
541
542 let (_, _sa) =
543 scoped_arguments(b"(LibXml2 PRIVATE SYSCONFDIR=\"${CMAKE_INSTALL_FULL_SYSCONFDIR}\")")
544 .debug_unwrap();
545 }
546
547 #[test]
548 fn arguments() {
549 use super::arguments;
550
551 let (_, _) = arguments(b"hello").debug_unwrap();
552
553 let (_, _) = arguments(b"hello world").debug_unwrap();
554 }
555}