Skip to main content

ros_cmake_parser/
parser.rs

1use std::borrow::Cow;
2
3use nom::{
4    branch::alt,
5    bytes::complete::{is_a, is_not, tag, take_until},
6    character::complete::{alpha1, alphanumeric1, char, not_line_ending, space1},
7    combinator::{consumed, map, not, opt, recognize, value},
8    multi::{many0, many0_count, many1},
9    sequence::{delimited, pair, preceded, tuple},
10};
11
12use crate::Token;
13
14pub fn parse_cmakelists(src: &[u8]) -> Result<CMakeListsTokens<'_>, CMakeListsParseError> {
15    nom_parse_cmakelists(src)
16        .map(|(_, cm)| cm)
17        .map_err(From::from)
18}
19
20#[derive(Debug)]
21pub struct CMakeListsTokens<'cmlist> {
22    file: Vec<FileElement<'cmlist>>,
23}
24
25impl<'cmlist> CMakeListsTokens<'cmlist> {
26    pub(crate) fn command_invocations(&self) -> impl Iterator<Item = &CommandInvocation<'cmlist>> {
27        self.file.iter().filter_map(|file_element| {
28            if let CMakeLanguage::CommandInvocation((command_invocation, _)) = &file_element.element
29            {
30                Some(command_invocation)
31            } else {
32                None
33            }
34        })
35    }
36}
37
38#[allow(dead_code)]
39#[derive(Debug)]
40struct FileElement<'fe> {
41    source: Source<'fe>,
42    element: CMakeLanguage<'fe>,
43}
44
45struct Source<'s>(&'s [u8]);
46
47type IResult<I, O, E = nom::error::VerboseError<I>> = Result<(I, O), nom::Err<E>>;
48
49impl<'s> std::fmt::Debug for Source<'s> {
50    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
51        f.debug_tuple("Source")
52            .field(&String::from_utf8_lossy(self.0))
53            .finish()
54    }
55}
56
57#[allow(dead_code)]
58#[derive(Debug)]
59enum CMakeLanguage<'cml> {
60    CommandInvocation((CommandInvocation<'cml>, LineEnding<'cml>)),
61    Formatting((Vec<Formatting<'cml>>, LineEnding<'cml>)),
62}
63
64#[allow(dead_code)]
65#[derive(Debug)]
66enum Formatting<'f> {
67    BracketComment(BracketComment<'f>),
68    Spaces(Spaces),
69}
70
71#[allow(dead_code)]
72#[derive(Debug)]
73pub(crate) struct CommandInvocation<'ci> {
74    spaces_before: Vec<Spaces>,
75    pub(crate) identifier: &'ci [u8],
76    spaces_after: Vec<Spaces>,
77    arguments: Arguments<'ci>,
78}
79
80impl<'ci> CommandInvocation<'ci> {
81    pub fn to_text_nodes(&'ci self) -> Vec<Token<'ci>> {
82        self.arguments.to_text_nodes()
83    }
84
85    pub fn identifier(&self) -> Cow<'_, [u8]> {
86        if !self.identifier.iter().any(u8::is_ascii_uppercase) {
87            Cow::Borrowed(self.identifier)
88        } else {
89            Cow::Owned(self.identifier.to_ascii_lowercase())
90        }
91    }
92}
93
94#[derive(Debug)]
95struct Arguments<'a> {
96    argument: Option<Argument<'a>>,
97    separated_arguments: Vec<SeparatedArguments<'a>>,
98}
99
100impl<'a> Arguments<'a> {
101    pub fn to_text_nodes(&'a self) -> Vec<Token<'a>> {
102        let mut text_nodes = vec![];
103        if let Some(arg_tn) = self.argument.as_ref().map(|arg| arg.to_text_node()) {
104            text_nodes.push(arg_tn);
105        }
106        text_nodes.extend(self.separated_arguments.iter().filter_map(|x| {
107            if let SeparatedArguments::Single((_, Some(arg))) = x {
108                Some(arg.to_text_node())
109            } else {
110                None
111            }
112        }));
113        text_nodes
114    }
115}
116
117#[allow(dead_code)]
118#[derive(Debug)]
119enum SeparatedArguments<'a> {
120    Single((Vec<Separation<'a>>, Option<Argument<'a>>)),
121    Multi((Vec<Separation<'a>>, Box<Arguments<'a>>)),
122}
123
124#[allow(dead_code)]
125#[derive(Debug)]
126enum Separation<'a> {
127    Space(Spaces),
128    LineEnding(LineEnding<'a>),
129}
130
131#[derive(Debug)]
132enum Argument<'a> {
133    Bracket(BracketArgument<'a>),
134    Quoted(QuotedArgument),
135    Unquoted(UnquotedArgument<'a>),
136}
137
138impl<'a> Argument<'a> {
139    fn to_text_node(&'a self) -> Token<'a> {
140        match self {
141            Argument::Bracket(ba) => Token::text_node(ba.bracket_content, false),
142            Argument::Quoted(qa) => Token::text_node(&qa.0, true),
143            Argument::Unquoted(ua) => ua.to_text_node(),
144        }
145    }
146}
147
148#[allow(dead_code)]
149#[derive(Debug)]
150struct BracketComment<'bc>(BracketArgument<'bc>);
151
152#[allow(dead_code)]
153#[derive(Debug)]
154struct BracketArgument<'ba> {
155    len: usize,
156    bracket_content: &'ba [u8],
157}
158
159#[derive(Debug)]
160struct QuotedArgument(Vec<u8>);
161
162#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
163enum UnquotedArgument<'ua> {
164    Normal(Vec<u8>),
165    Legacy(&'ua [u8]),
166}
167
168impl<'ua> UnquotedArgument<'ua> {
169    fn to_text_node(&'ua self) -> Token<'ua> {
170        match self {
171            UnquotedArgument::Normal(n) => Token::text_node(n, false),
172            UnquotedArgument::Legacy(l) => Token::text_node(l, false),
173        }
174    }
175}
176
177#[allow(dead_code)]
178#[derive(Debug)]
179struct LineComment<'lc>(&'lc [u8]);
180
181#[allow(dead_code)]
182#[derive(Debug)]
183struct LineEnding<'le> {
184    line_comment: Option<LineComment<'le>>,
185}
186
187#[allow(dead_code)]
188#[derive(Debug)]
189struct Spaces(usize);
190
191#[derive(Debug, thiserror::Error)]
192pub enum CMakeListsParseError {
193    #[error("unknown")]
194    Unknown,
195    #[error("parser: {0}")]
196    Parser(String),
197}
198
199impl From<nom::Err<nom::error::VerboseError<&[u8]>>> for CMakeListsParseError {
200    fn from(value: nom::Err<nom::error::VerboseError<&[u8]>>) -> Self {
201        Self::Parser(value.to_string())
202    }
203}
204
205fn nom_parse_cmakelists(src: &[u8]) -> IResult<&[u8], CMakeListsTokens<'_>> {
206    many0(file_element)(src).map(|(src, file)| (src, CMakeListsTokens { file }))
207}
208
209fn file_element(src: &[u8]) -> IResult<&[u8], FileElement<'_>> {
210    alt((
211        map(
212            consumed(tuple((command_invocation, line_ending))),
213            |(source, command_invocation)| FileElement {
214                source: Source(source),
215                element: CMakeLanguage::CommandInvocation(command_invocation),
216            },
217        ),
218        map(
219            consumed(tuple((
220                many0(alt((
221                    map(bracket_comment, Formatting::BracketComment),
222                    map(spaces, Formatting::Spaces),
223                ))),
224                line_ending,
225            ))),
226            |(source, formatting)| FileElement {
227                source: Source(source),
228                element: CMakeLanguage::Formatting(formatting),
229            },
230        ),
231    ))(src)
232}
233
234fn command_invocation(src: &[u8]) -> IResult<&[u8], CommandInvocation<'_>> {
235    map(
236        tuple((many0(spaces), identifier, many0(spaces), scoped_arguments)),
237        |(spaces_before, identifier, spaces_after, arguments)| CommandInvocation {
238            spaces_before,
239            identifier,
240            spaces_after,
241            arguments,
242        },
243    )(src)
244}
245
246fn scoped_arguments(src: &[u8]) -> IResult<&[u8], Arguments<'_>> {
247    delimited(char('('), arguments, char(')'))(src)
248}
249
250fn arguments(src: &[u8]) -> IResult<&[u8], Arguments<'_>> {
251    map(
252        pair(opt(argument), many0(separated_arguments)),
253        |(argument, separated_arguments)| Arguments {
254            argument,
255            separated_arguments,
256        },
257    )(src)
258}
259
260fn separated_arguments(src: &[u8]) -> IResult<&[u8], SeparatedArguments<'_>> {
261    alt((
262        map(
263            pair(many1(separation), opt(argument)),
264            SeparatedArguments::Single,
265        ),
266        map(
267            pair(many0(separation), map(scoped_arguments, Box::new)),
268            SeparatedArguments::Multi,
269        ),
270    ))(src)
271}
272
273fn separation(src: &[u8]) -> IResult<&[u8], Separation<'_>> {
274    alt((
275        map(spaces, Separation::Space),
276        map(line_ending, Separation::LineEnding),
277    ))(src)
278}
279
280fn argument(src: &[u8]) -> IResult<&[u8], Argument<'_>> {
281    alt((
282        map(bracket_argument, Argument::Bracket),
283        map(quoted_argument, Argument::Quoted),
284        map(unquoted_argument, Argument::Unquoted),
285    ))(src)
286}
287
288fn bracket_argument(src: &[u8]) -> IResult<&[u8], BracketArgument<'_>> {
289    let (src, _) = char('[')(src)?;
290    let (src, len) = many0_count(char('='))(src)?;
291    let bracket_close = format!("]{}]", "=".repeat(len));
292    let (src, _) = char('[')(src)?;
293    let (src, _) = opt(nom::character::complete::line_ending)(src)?;
294    let (src, bracket_content) = take_until(bracket_close.as_bytes())(src)?;
295    let (src, _) = tag(bracket_close.as_bytes())(src)?;
296    Ok((
297        src,
298        BracketArgument {
299            len,
300            bracket_content,
301        },
302    ))
303}
304
305fn quoted_argument(src: &[u8]) -> IResult<&[u8], QuotedArgument> {
306    map(
307        delimited(tag(b"\""), many0(quoted_element), tag(b"\"")),
308        |x| QuotedArgument(x.into_iter().flatten().collect()),
309    )(src)
310}
311
312fn quoted_element(src: &[u8]) -> IResult<&[u8], Vec<u8>> {
313    alt((
314        map(is_not("\\\""), |x: &[u8]| x.to_vec()),
315        map(escape_sequence, |x| x.to_vec()),
316        value(
317            Vec::default(),
318            pair(char('\\'), nom::character::complete::line_ending),
319        ),
320    ))(src)
321}
322
323fn escape_sequence(src: &[u8]) -> IResult<&[u8], &[u8]> {
324    preceded(
325        char('\\'),
326        alt((
327            is_a("()#\" \\$@^;"),
328            value(&b"\t"[..], char('t')),
329            value(&b"\r"[..], char('r')),
330            value(&b"\n"[..], char('n')),
331        )),
332    )(src)
333}
334
335fn unquoted_argument(src: &[u8]) -> IResult<&[u8], UnquotedArgument<'_>> {
336    alt((
337        map(unquoted_legacy, UnquotedArgument::Legacy),
338        map(many1(unquoted_element), |x| {
339            UnquotedArgument::Normal(x.iter().flat_map(|x| x.to_vec()).collect())
340        }),
341    ))(src)
342}
343
344fn unquoted_element(src: &[u8]) -> IResult<&[u8], &[u8]> {
345    alt((is_not(" \t\r\n()#\"\\"), escape_sequence))(src)
346}
347
348fn unquoted_legacy(src: &[u8]) -> IResult<&[u8], &[u8]> {
349    recognize(pair(
350        alt((
351            value((), is_not(" \t\r\n()#\"\\$")),
352            value((), delimited(tag(b"$("), is_not(")"), tag(b")"))),
353        )),
354        many1(alt((
355            value((), is_not(" \t\r\n()#\"\\$")),
356            value((), delimited(tag(b"$("), is_not(")"), tag(b")"))),
357            value((), delimited(char('"'), is_not("\""), char('"'))),
358        ))),
359    ))(src)
360}
361
362fn identifier(src: &[u8]) -> IResult<&[u8], &[u8]> {
363    recognize(pair(
364        alt((alpha1, tag("_"))),
365        many0_count(alt((alphanumeric1, tag("_")))),
366    ))(src)
367}
368
369fn line_ending(src: &[u8]) -> IResult<&[u8], LineEnding<'_>> {
370    map(
371        tuple((opt(line_comment), nom::character::complete::line_ending)),
372        |(line_comment, _)| LineEnding { line_comment },
373    )(src)
374}
375
376fn line_comment(src: &[u8]) -> IResult<&[u8], LineComment<'_>> {
377    preceded(
378        char('#'),
379        map(
380            recognize(tuple((
381                not(tuple((char('['), many0(char('=')), char('[')))),
382                not_line_ending,
383            ))),
384            LineComment,
385        ),
386    )(src)
387}
388
389fn bracket_comment(src: &[u8]) -> IResult<&[u8], BracketComment<'_>> {
390    map(preceded(char('#'), bracket_argument), BracketComment)(src)
391}
392
393fn spaces(src: &[u8]) -> IResult<&[u8], Spaces> {
394    map(space1, |spaces: &[u8]| Spaces(spaces.len()))(src)
395}
396
397#[cfg(test)]
398mod tests {
399    trait CheckNomError<O> {
400        fn debug_unwrap(self) -> (&'static [u8], O);
401    }
402
403    impl<O> CheckNomError<O> for super::IResult<&'static [u8], O> {
404        fn debug_unwrap(self) -> (&'static [u8], O) {
405            match self {
406                Ok(ok) => ok,
407                Err(err) => match err {
408                    nom::Err::Incomplete(_e) => panic!("Incomplete: {err}"),
409                    nom::Err::Error(e) => {
410                        let mut msgs = vec![];
411                        for (src, knd) in e.errors {
412                            msgs.push(format!(
413                                "{knd:?}: '{}'",
414                                String::from_utf8_lossy(&src[..src.len().min(50)])
415                            ));
416                        }
417                        panic!("Error: {}", msgs.join("\n"));
418                    }
419                    nom::Err::Failure(e) => {
420                        let mut msgs = vec![];
421                        for (src, knd) in e.errors {
422                            msgs.push(format!(
423                                "{knd:?}: '{}'",
424                                String::from_utf8_lossy(&src[..src.len().min(50)])
425                            ));
426                        }
427                        panic!("Failure: {}", msgs.join("\n"));
428                    }
429                },
430            }
431        }
432    }
433
434    #[test]
435    fn parse_cmakelists() {
436        let ex1 = include_bytes!("../../fixture/CMakeLists.txt.ex1");
437        let _ = super::parse_cmakelists(ex1).unwrap();
438
439        let ex2 = include_bytes!("../../fixture/CMakeLists.txt.ex2");
440        let _ = super::parse_cmakelists(ex2).unwrap();
441
442        let ex3 = include_bytes!("../../fixture/CMakeLists.txt.ex3");
443        let _ = super::parse_cmakelists(ex3).unwrap();
444
445        let ex4 = include_bytes!("../../fixture/CMakeLists.txt.ex4");
446        let _ = super::parse_cmakelists(ex4).unwrap();
447    }
448
449    #[test]
450    fn file_element() {
451        use super::file_element;
452
453        let input = include_bytes!("../../fixture/CMakeLists.txt.ex2");
454        let (src, _) = file_element(input).debug_unwrap();
455        let (src, _) = file_element(src).unwrap();
456        let (_, _) = file_element(src).unwrap();
457    }
458
459    #[test]
460    fn bracket_argument() {
461        use super::bracket_argument;
462        let (_, ba) = bracket_argument(b"[[hello]]").unwrap();
463        assert_eq!(ba.bracket_content, b"hello");
464        let (_, ba) = bracket_argument(b"[=[hel]]lo]=]").unwrap();
465        assert_eq!(ba.bracket_content, b"hel]]lo");
466        let (_, ba) = bracket_argument(b"[=[hel]]\r\nlo]=]").unwrap();
467        assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
468        let (_, ba) = bracket_argument(b"[=[\r\nhel]]\r\nlo]=]").unwrap();
469        assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
470        let (_, ba) = bracket_argument(b"[=[\nhel]]\r\nlo]=]").unwrap();
471        assert_eq!(ba.bracket_content, b"hel]]\r\nlo");
472    }
473
474    #[test]
475    fn line_comment() {
476        use super::line_comment;
477
478        let (_, lc) = line_comment(b"#").unwrap();
479        assert_eq!(lc.0, b"");
480        let (_, lc) = line_comment(b"#hello").unwrap();
481        assert_eq!(lc.0, b"hello");
482        let (_, lc) = line_comment(b"# [[hello").unwrap();
483        assert_eq!(lc.0, b" [[hello");
484        let (_, lc) = line_comment(b"#\r\n").unwrap();
485        assert_eq!(lc.0, b"");
486
487        let res = line_comment(b"#[[hello");
488        assert!(res.is_err());
489        let res = line_comment(b"#[=[hello");
490        assert!(res.is_err());
491    }
492
493    #[test]
494    fn quoted_argument() {
495        use super::quoted_argument;
496
497        let (_, qa) = quoted_argument(br#""hello""#).unwrap();
498        assert_eq!(&qa.0, b"hello");
499        let (_, qa) = quoted_argument(
500            br#""hello\
501, world""#,
502        )
503        .unwrap();
504        assert_eq!(&qa.0, b"hello, world");
505        let (_, qa) = quoted_argument(br#""hello\nworld""#).unwrap();
506        assert_eq!(&qa.0, b"hello\nworld");
507    }
508
509    #[test]
510    fn unquoted_argument() {
511        use super::{unquoted_argument, UnquotedArgument};
512
513        let (_, ua) = unquoted_argument(b"hello").unwrap();
514        assert_eq!(ua, UnquotedArgument::Normal(b"hello".to_vec()));
515
516        let (_, ua) = unquoted_argument(b"a=\"b\"").unwrap();
517        assert_eq!(ua, UnquotedArgument::Legacy(b"a=\"b\""));
518
519        let (_, ua) = unquoted_argument(b"-Da=\"b c\"").unwrap();
520        assert_eq!(ua, UnquotedArgument::Legacy(b"-Da=\"b c\""));
521
522        let (_, ua) = unquoted_argument(b"-Da=$(v)").unwrap();
523        assert_eq!(ua, UnquotedArgument::Legacy(b"-Da=$(v)"));
524
525        let (_, ua) = unquoted_argument(br#"a" "b"c"d"#).unwrap();
526        assert_eq!(ua, UnquotedArgument::Legacy(br#"a" "b"c"d"#));
527    }
528
529    #[test]
530    fn unquoted_legacy() {
531        use super::unquoted_legacy;
532        let (_, ua) = unquoted_legacy(b"a=\"b\"").unwrap();
533        assert_eq!(ua, b"a=\"b\"");
534
535        let (_, ua) = unquoted_legacy(b"-Da=\"b c\"").unwrap();
536        assert_eq!(ua, b"-Da=\"b c\"");
537
538        let (_, ua) = unquoted_legacy(b"-Da=$(v)").unwrap();
539        assert_eq!(ua, b"-Da=$(v)");
540
541        let (_, ua) = unquoted_legacy(br#"a" "b"c"d"#).unwrap();
542        assert_eq!(ua, br#"a" "b"c"d"#);
543    }
544
545    #[test]
546    fn scoped_arguments() {
547        use super::scoped_arguments;
548
549        let (_, _sa) = scoped_arguments(b"(hello)").debug_unwrap();
550
551        let (_, _sa) = scoped_arguments(b"(hello world)").debug_unwrap();
552
553        let (_, _sa) =
554            scoped_arguments(b"(LibXml2 PRIVATE SYSCONFDIR=\"${CMAKE_INSTALL_FULL_SYSCONFDIR}\")")
555                .debug_unwrap();
556    }
557
558    #[test]
559    fn arguments() {
560        use super::arguments;
561
562        let (_, _) = arguments(b"hello").debug_unwrap();
563
564        let (_, _) = arguments(b"hello world").debug_unwrap();
565    }
566}