cttt_parser/
lib.rs

1// MIT License
2//
3// Copyright (c) 2023 Justin Poehnelt <justin.poehnelt@gmail.com>
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23#![doc = include_str!("../README.md")]
24
25use pest::Parser;
26use pest_derive::Parser;
27
28#[derive(Parser)]
29#[grammar = "grammar.pest"] // relative to src
30struct ChangeParser;
31
32/// A Comment with a command and arguments
33#[derive(Debug, PartialEq, serde::Serialize, Clone)]
34pub struct Comment {
35    command: Option<String>,
36    debug: CommentDebug,
37    args: Vec<String>,
38}
39
40/// Debug information for a Comment
41#[derive(Debug, PartialEq, serde::Serialize, Clone)]
42pub struct CommentDebug {
43    pub comment: String,
44    pub line: usize,
45    pub col: usize,
46}
47
48/// Namespace for commands
49pub static NAMESPACE: &str = "@cttt";
50
51/// Parse a string into a vector of Comments.
52pub fn parse(s: &str) -> Result<Vec<Comment>, pest::error::Error<Rule>> {
53    let parse = ChangeParser::parse(Rule::document, s).unwrap();
54
55    let mut comments: Vec<Comment> = vec![];
56
57    // make an iterator over the pairs in the rule
58    for pair in parse {
59        // match the rule, as the rule is an enum
60        match pair.as_rule() {
61            Rule::EOI => (),
62            Rule::document => {
63                // for each sub-rule, print the inner contents
64                for document in pair.into_inner() {
65                    match document.as_rule() {
66                        Rule::EOI => (),
67                        Rule::comment => {
68                            let mut command = None;
69                            let mut args: Vec<String> = vec![];
70                            let (line, col) = document.as_span().start_pos().line_col();
71
72                            let comment = document
73                                .as_span()
74                                .as_str()
75                                .to_string()
76                                .trim_end()
77                                .to_string();
78
79                            let col = comment.find(NAMESPACE).unwrap_or(0) + col - 1;
80
81                            // match the sub-rule
82                            for part in document.into_inner() {
83                                match part.as_rule() {
84                                    Rule::command => {
85                                        command = Some(part.as_span().as_str().to_string())
86                                    }
87                                    Rule::args => {
88                                        args = match part.as_span().as_str().trim() {
89                                            "" => vec![],
90                                            s => s
91                                                .trim()
92                                                .split(',')
93                                                .map(|s| s.to_string().trim().to_string())
94                                                .filter(|s| !s.is_empty())
95                                                .collect(),
96                                        }
97                                    }
98                                    _ => (),
99                                }
100                            }
101
102                            comments.push(Comment {
103                                args,
104                                command,
105                                debug: CommentDebug { comment, line, col },
106                            });
107                        }
108                        _ => unreachable!(),
109                    }
110                }
111            }
112            _ => unreachable!(),
113        }
114    }
115
116    Ok(comments)
117}
118
119/// Error identifying location of unknown command
120#[derive(Debug, PartialEq, serde::Serialize)]
121pub struct UnknownCommandError {
122    comment: String,
123    command: String,
124    col: usize,
125    line: usize,
126}
127
128/// Error identifying location of unknown command or other parsing error
129#[derive(Debug)]
130pub enum StrictParseError {
131    UnknownCommand(Vec<UnknownCommandError>),
132    Pest(pest::error::Error<Rule>),
133}
134
135/// Parse a string into a vector of Comments, and check for unknown commands.
136pub fn parse_strict(s: &str, commands: Vec<String>) -> Result<Vec<Comment>, StrictParseError> {
137    let comments = parse(s).map_err(StrictParseError::Pest)?;
138
139    let mut unknown_command_errors: Vec<UnknownCommandError> = vec![];
140
141    // check for unknown commands
142    comments.iter().for_each(|c| match &c.command {
143        Some(command) => {
144            if !commands.contains(command) {
145                let col = command.find(NAMESPACE).unwrap_or(0)
146                    + c.debug.col
147                    + NAMESPACE.len()
148                    + ".".len();
149
150                unknown_command_errors.push(UnknownCommandError {
151                    comment: c.debug.comment.clone(),
152                    command: c.command.clone().unwrap(),
153                    line: c.debug.line,
154                    col,
155                });
156            }
157        }
158        None => (),
159    });
160
161    if !unknown_command_errors.is_empty() {
162        return Err(StrictParseError::UnknownCommand(unknown_command_errors));
163    }
164
165    Ok(comments)
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_parse_basic() {
174        let input = "/* @cttt.named(123) */\n/* @cttt.change(123,abc) */";
175        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
176        ---
177        - command: named
178          debug:
179            comment: /* @cttt.named(123) */
180            line: 1
181            col: 3
182          args:
183            - "123"
184        - command: change
185          debug:
186            comment: "/* @cttt.change(123,abc) */"
187            line: 2
188            col: 3
189          args:
190            - "123"
191            - abc
192        "###);
193    }
194
195    #[test]
196    fn test_parse_no_command() {
197        let input = "// @cttt";
198        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
199        ---
200        - command: ~
201          debug:
202            comment: // @cttt
203            line: 1
204            col: 3
205          args: []
206        "###);
207    }
208
209    #[test]
210    fn test_parse_nested() {
211        let input =
212            "// @cttt.named(123)\n// @cttt.named(2)\nx +=1;\n// @cttt.change(3,4,5)\n// @cttt.change(1)";
213        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
214        ---
215        - command: named
216          debug:
217            comment: // @cttt.named(123)
218            line: 1
219            col: 3
220          args:
221            - "123"
222        - command: named
223          debug:
224            comment: // @cttt.named(2)
225            line: 2
226            col: 3
227          args:
228            - "2"
229        - command: change
230          debug:
231            comment: "// @cttt.change(3,4,5)"
232            line: 4
233            col: 3
234          args:
235            - "3"
236            - "4"
237            - "5"
238        - command: change
239          debug:
240            comment: // @cttt.change(1)
241            line: 5
242            col: 3
243          args:
244            - "1"
245        "###);
246    }
247
248    #[test]
249    fn test_parse_case_insensitive() {
250        let input = "// @CTTT.named(SPECIAL_BLOCK)\n// @cttt.CHANGE(./foo.txt,abc)";
251        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
252        ---
253        - command: named
254          debug:
255            comment: // @CTTT.named(SPECIAL_BLOCK)
256            line: 1
257            col: 0
258          args:
259            - SPECIAL_BLOCK
260        - command: CHANGE
261          debug:
262            comment: "// @cttt.CHANGE(./foo.txt,abc)"
263            line: 2
264            col: 3
265          args:
266            - "./foo.txt"
267            - abc
268        "###);
269    }
270
271    #[test]
272    fn test_parse_kebab_command() {
273        let input = "// @cttt.named-bar-baz()";
274        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
275        ---
276        - command: named-bar-baz
277          debug:
278            comment: // @cttt.named-bar-baz()
279            line: 1
280            col: 3
281          args: []
282        "###);
283    }
284
285    #[test]
286    fn test_parse_args_whitespace() {
287        let input = "// @cttt.change( )";
288        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
289        ---
290        - command: change
291          debug:
292            comment: // @cttt.change( )
293            line: 1
294            col: 3
295          args: []
296        "###);
297    }
298
299    #[test]
300    fn test_parse_args_whitespace_separated() {
301        let input = "// @cttt.change(foo, bar)";
302        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
303        ---
304        - command: change
305          debug:
306            comment: "// @cttt.change(foo, bar)"
307            line: 1
308            col: 3
309          args:
310            - foo
311            - bar
312        "###);
313    }
314
315    #[test]
316    fn test_parse_args_whitespace_trailing_comma() {
317        let input = "// @cttt.change(foo, bar,)";
318        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
319        ---
320        - command: change
321          debug:
322            comment: "// @cttt.change(foo, bar,)"
323            line: 1
324            col: 3
325          args:
326            - foo
327            - bar
328        "###);
329    }
330
331    #[test]
332    fn test_parse_args_characters() {
333        let input = "// @cttt.change(./aFoo_Bar-123)";
334        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
335        ---
336        - command: change
337          debug:
338            comment: // @cttt.change(./aFoo_Bar-123)
339            line: 1
340            col: 3
341          args:
342            - "./aFoo_Bar-123"
343        "###);
344    }
345
346    #[test]
347    fn test_parse_args_file_path() {
348        let input = "// @cttt.change(./foo/README.md, /bar/foo.rs)";
349        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
350        ---
351        - command: change
352          debug:
353            comment: "// @cttt.change(./foo/README.md, /bar/foo.rs)"
354            line: 1
355            col: 3
356          args:
357            - "./foo/README.md"
358            - /bar/foo.rs
359        "###);
360    }
361
362    #[test]
363    fn test_parse_comment_syntax() {
364        for (leading, trailing) in vec![
365            ("--", ""),
366            ("!", ""),
367            ("(*", "*)"),
368            ("{-", "-}"),
369            ("{", "}"),
370            ("/*", "*/"),
371            ("/**", "*/"),
372            ("//", ""),
373            ("\"\"\"", "\"\"\""),
374            ("#", ""),
375            ("<!--", "-->"),
376        ] {
377            let input = format!("{} {}.{} {}", leading, NAMESPACE, "foo()", trailing);
378            let output = parse(&input).unwrap();
379
380            assert_eq!(output[0].command.clone().unwrap(), String::from("foo"));
381            assert_eq!(output[0].debug.comment, input.trim_end());
382        }
383    }
384
385    #[test]
386    fn test_parse_comment_multiline() {
387        let input = "
388            /**
389             * @cttt.named(123)
390             */
391            x = 123;
392            /**
393             * @cttt.noop()
394             */";
395        insta::assert_yaml_snapshot!(parse(input).unwrap(), @r###"
396        ---
397        - command: named
398          debug:
399            comment: "             * @cttt.named(123)"
400            line: 3
401            col: 15
402          args:
403            - "123"
404        - command: noop
405          debug:
406            comment: "             * @cttt.noop()"
407            line: 7
408            col: 15
409          args: []
410        "###);
411    }
412
413    #[test]
414    fn test_parse_strict_commands() {
415        let input = "// @cttt.unknown()\n// @cttt";
416        let commands = vec!["foo".to_string(), "bar".to_string()];
417
418        let output = parse_strict(input, commands).unwrap_err();
419
420        match output {
421            StrictParseError::UnknownCommand(e) => {
422                insta::assert_yaml_snapshot!(e, @r###"
423                ---
424                - comment: // @cttt.unknown()
425                  command: unknown
426                  col: 9
427                  line: 1
428                "###);
429            }
430            _ => panic!("unexpected error"),
431        }
432    }
433}