vim_plugin_metadata/
parser.rs

1use crate::{Error, VimNode, VimPlugin, VimPluginSection};
2use itertools::Itertools;
3use std::collections::{HashMap, HashSet};
4use std::path::Path;
5use std::{fs, str};
6use tree_sitter::{Node, Parser, Point, TreeCursor};
7use walkdir::WalkDir;
8
9#[derive(Default)]
10pub struct VimParser {
11    parser: Parser,
12}
13
14impl VimParser {
15    pub fn new() -> crate::Result<Self> {
16        let mut parser = Parser::new();
17        parser.set_language(&tree_sitter_vim::language())?;
18        Ok(Self { parser })
19    }
20
21    pub fn parse_plugin_dir<P: AsRef<Path> + Copy>(&mut self, path: P) -> crate::Result<VimPlugin> {
22        let mut nodes_for_sections: HashMap<String, Vec<VimNode>> = HashMap::new();
23        let section_order = ["instant", "plugin", "syntax", "autoload"];
24        let sections_exclude = HashSet::from(["vroom"]);
25        for entry in WalkDir::new(path) {
26            let entry = entry?;
27            if !(entry.file_type().is_file()
28                && entry.file_name().to_string_lossy().ends_with(".vim"))
29            {
30                continue;
31            }
32            let section_name = entry
33                .path()
34                .strip_prefix(path)
35                .unwrap()
36                .iter()
37                .nth(0)
38                .expect("path should be a strict prefix of path under it")
39                .to_string_lossy();
40            if sections_exclude.contains(section_name.as_ref()) {
41                continue;
42            }
43            let module_contents = fs::read_to_string(entry.path())?;
44            let module_nodes = self.parse_module(module_contents.as_str())?;
45            nodes_for_sections
46                .entry(section_name.into())
47                .or_default()
48                .extend(module_nodes);
49        }
50        let sections = Self::sorted_by_partial_key_order(
51            IntoIterator::into_iter(nodes_for_sections),
52            &section_order,
53        )
54        .map(|(name, nodes)| VimPluginSection { name, nodes })
55        .collect();
56        Ok(VimPlugin { content: sections })
57    }
58
59    pub fn parse_module(&mut self, code: &str) -> crate::Result<Vec<VimNode>> {
60        let tree = self.parser.parse(code, None).ok_or(Error::ParsingFailure)?;
61        let mut tree_cursor = tree.walk();
62        let mut nodes: Vec<VimNode> = Vec::new();
63        let mut last_block_comment: Option<(String, Point)> = None;
64        tree_cursor.goto_first_child();
65        loop {
66            let node = tree_cursor.node();
67            if let Some((finished_comment_text, _)) =
68                last_block_comment.take_if(|(_, next_pos)| *next_pos != node.start_position())
69            {
70                // Block comment wasn't immediately above the next node.
71                // Treat it as bare standalone doc comment.
72                nodes.push(VimNode::StandaloneDocComment(finished_comment_text));
73            }
74            match node.kind() {
75                "comment" => {
76                    if let Some((finished_comment_text, _)) = last_block_comment.take() {
77                        // New comment block after dangling comment block.
78                        nodes.push(VimNode::StandaloneDocComment(finished_comment_text));
79                    }
80                    last_block_comment =
81                        Self::consume_block_comment(&mut tree_cursor, code.as_bytes());
82                }
83                "function_definition" => {
84                    let doc = last_block_comment
85                        .take()
86                        .map(|(comment_text, _)| comment_text);
87                    if let Some(funcname) =
88                        Self::get_funcname_for_def(&mut tree_cursor, code.as_bytes())
89                    {
90                        nodes.push(VimNode::Function {
91                            name: funcname.to_owned(),
92                            doc,
93                        });
94                    } else {
95                        eprintln!(
96                            "Failed to find function name for function_definition at {:?}",
97                            tree_cursor.node().start_position()
98                        );
99                    }
100                }
101                _ => {
102                    // Silently ignore other node kinds.
103                }
104            }
105            if !tree_cursor.goto_next_sibling() {
106                break;
107            }
108        }
109        // Consume any dangling last_block_comment.
110        if let Some((comment_text, _)) = last_block_comment.take() {
111            nodes.push(VimNode::StandaloneDocComment(comment_text));
112        };
113        Ok(nodes)
114    }
115
116    fn sorted_by_partial_key_order<T>(
117        iter: impl Iterator<Item = (String, Vec<T>)>,
118        order: &[&str],
119    ) -> impl Iterator<Item = (String, Vec<T>)> {
120        let order_index: HashMap<_, _> = order
121            .iter()
122            .enumerate()
123            .map(|(i, name)| (*name, i))
124            .collect();
125        iter.sorted_by(|(k1, _), (k2, _)| {
126            Ord::cmp(
127                &(order_index.get(k1.as_str()).unwrap_or(&order.len()), k1),
128                &(order_index.get(k2.as_str()).unwrap_or(&order.len()), k2),
129            )
130        })
131    }
132
133    fn get_funcname_for_def<'a>(tree_cursor: &mut TreeCursor, source: &'a [u8]) -> Option<&'a str> {
134        let node = tree_cursor.node();
135        assert_eq!(node.kind(), "function_definition");
136        let mut sub_cursor = node.walk();
137        let decl = node
138            .children(&mut sub_cursor)
139            .find(|c| c.kind() == "function_declaration");
140        let ident = decl.and_then(|decl| {
141            decl.children(&mut sub_cursor)
142                .find(|c| c.kind() == "identifier" || c.kind() == "scoped_identifier")
143        });
144
145        ident.as_ref().map(|n| Self::get_node_text(n, source))
146    }
147
148    fn consume_block_comment(
149        tree_cursor: &mut TreeCursor,
150        source: &[u8],
151    ) -> Option<(String, Point)> {
152        let node = tree_cursor.node();
153        assert_eq!(node.kind(), "comment");
154        let cur_pos = node.start_position();
155        let mut next_pos = Point {
156            row: cur_pos.row + 1,
157            ..cur_pos
158        };
159
160        let mut comment_lines: Vec<String> = Vec::new();
161        let comment_node_text = Self::get_node_text(&node, source);
162        if let Some(leader_text) = comment_node_text.strip_prefix("\"\"") {
163            // Valid leader, start comment block.
164            if !leader_text.trim().is_empty() {
165                // Treat trailing text after leader as first comment line.
166                comment_lines.push(
167                    leader_text
168                        .strip_prefix(" ")
169                        .unwrap_or(leader_text)
170                        .to_owned(),
171                );
172            }
173        } else {
174            // Regular non-doc comment, ignore and let parsing skip.
175            return None;
176        }
177
178        // Consume remaining comment lines at same indentation.
179        while tree_cursor.goto_next_sibling() {
180            let node = tree_cursor.node();
181            if node.kind() != "comment" || node.start_position() != next_pos {
182                // Back up so cursor still points to last consumed node.
183                tree_cursor.goto_previous_sibling();
184                break;
185            }
186            next_pos = Point {
187                row: next_pos.row + 1,
188                ..next_pos
189            };
190            let node_text = Self::get_node_text(&node, source);
191            let comment_body = match &node_text[1..] {
192                t if t.starts_with(" ") => &t[1..],
193                t => t,
194            };
195            comment_lines.push(comment_body.to_owned());
196        }
197        Some((comment_lines.join("\n"), next_pos))
198    }
199
200    fn get_node_text<'a>(node: &Node, source: &'a [u8]) -> &'a str {
201        str::from_utf8(&source[node.byte_range()]).unwrap()
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use pretty_assertions::assert_eq;
209    use std::fs;
210    use std::path::Path;
211    use tempfile::tempdir;
212
213    #[test]
214    fn parse_module_empty() {
215        let mut parser = VimParser::new().unwrap();
216        assert_eq!(parser.parse_module("").unwrap(), vec![]);
217    }
218
219    #[test]
220    fn parse_module_one_nondoc_comment() {
221        let mut parser = VimParser::new().unwrap();
222        assert_eq!(parser.parse_module("\" A comment").unwrap(), vec![]);
223    }
224
225    #[test]
226    fn parse_module_one_doc() {
227        let code = r#"
228""
229" Foo
230"#;
231        let mut parser = VimParser::new().unwrap();
232        assert_eq!(
233            parser.parse_module(code).unwrap(),
234            vec![VimNode::StandaloneDocComment("Foo".into())]
235        );
236    }
237
238    #[test]
239    fn parse_module_messy_multiline_doc() {
240        let code = r#"
241"" Foo
242"bar
243"#;
244        let mut parser = VimParser::new().unwrap();
245        assert_eq!(
246            parser.parse_module(code).unwrap(),
247            vec![VimNode::StandaloneDocComment("Foo\nbar".into())]
248        );
249    }
250
251    #[test]
252    fn parse_module_bare_function() {
253        let code = r#"
254func MyFunc() abort
255  return 1
256endfunc
257"#;
258        let mut parser = VimParser::new().unwrap();
259        assert_eq!(
260            parser.parse_module(code).unwrap(),
261            vec![VimNode::Function {
262                name: "MyFunc".into(),
263                doc: None
264            }]
265        );
266    }
267
268    #[test]
269    fn parse_module_doc_and_function() {
270        let code = r#"
271""
272" Does a thing.
273"
274" Call and enjoy.
275func MyFunc() abort
276  return 1
277endfunc
278"#;
279        let mut parser = VimParser::new().unwrap();
280        assert_eq!(
281            parser.parse_module(code).unwrap(),
282            vec![VimNode::Function {
283                name: "MyFunc".into(),
284                doc: Some("Does a thing.\n\nCall and enjoy.".into()),
285            }]
286        );
287    }
288
289    #[test]
290    fn parse_module_two_docs() {
291        let code = r#"
292"" One doc
293
294"" Another doc
295"#;
296        let mut parser = VimParser::new().unwrap();
297        assert_eq!(
298            parser.parse_module(code).unwrap(),
299            vec![
300                VimNode::StandaloneDocComment("One doc".into()),
301                VimNode::StandaloneDocComment("Another doc".into()),
302            ]
303        );
304    }
305
306    #[test]
307    fn parse_module_different_doc_indentations() {
308        let code = r#"
309"" One doc
310 " Ignored comment
311"#;
312        let mut parser = VimParser::new().unwrap();
313        assert_eq!(
314            parser.parse_module(code).unwrap(),
315            vec![
316                VimNode::StandaloneDocComment("One doc".into()),
317                // Comment at different indentation is treated as a normal
318                // non-doc comment and ignored.
319            ]
320        );
321    }
322
323    #[test]
324    fn parse_module_two_funcs() {
325        let code = r#"func FuncOne() | endfunc
326func FuncTwo() | endfunc"#;
327        let mut parser = VimParser::new().unwrap();
328        assert_eq!(
329            parser.parse_module(code).unwrap(),
330            vec![
331                VimNode::Function {
332                    name: "FuncOne".into(),
333                    doc: None
334                },
335                VimNode::Function {
336                    name: "FuncTwo".into(),
337                    doc: None
338                },
339            ]
340        );
341    }
342
343    #[test]
344    fn parse_module_autoload_funcname() {
345        let code = "func foo#bar#Baz() | endfunc";
346        let mut parser = VimParser::new().unwrap();
347        assert_eq!(
348            parser.parse_module(code).unwrap(),
349            vec![VimNode::Function {
350                name: "foo#bar#Baz".into(),
351                doc: None
352            }]
353        );
354    }
355
356    #[test]
357    fn parse_module_scriptlocal_funcname() {
358        let code = "func s:SomeFunc() | endfunc";
359        let mut parser = VimParser::new().unwrap();
360        assert_eq!(
361            parser.parse_module(code).unwrap(),
362            vec![VimNode::Function {
363                name: "s:SomeFunc".into(),
364                doc: None
365            }]
366        );
367    }
368
369    #[test]
370    fn parse_module_nested_func() {
371        let code = r#"
372function! Outer() abort
373  let l:thing = {}
374  function l:thing.Inner() abort
375    return 1
376  endfunction
377  return l:thing
378endfunction
379"#;
380        let mut parser = VimParser::new().unwrap();
381        assert_eq!(
382            parser.parse_module(code).unwrap(),
383            vec![
384                VimNode::Function {
385                    name: "Outer".into(),
386                    doc: None
387                },
388                // TODO: Should have more nodes for inner function.
389            ]
390        );
391    }
392
393    #[test]
394    fn parse_module_unicode() {
395        let code = r#"
396""
397" Fun stuff 🎈 ( ͡° ͜ʖ ͡°)
398"#;
399        let mut parser = VimParser::new().unwrap();
400        assert_eq!(
401            parser.parse_module(code).unwrap(),
402            vec![VimNode::StandaloneDocComment(
403                "Fun stuff 🎈 ( ͡° ͜ʖ ͡°)".into()
404            )]
405        );
406    }
407
408    #[test]
409    fn parse_plugin_dir_empty() {
410        let mut parser = VimParser::new().unwrap();
411        let tmp_dir = tempdir().unwrap();
412        let plugin = parser.parse_plugin_dir(tmp_dir.path()).unwrap();
413        assert_eq!(plugin, VimPlugin { content: vec![] });
414    }
415
416    #[test]
417    fn parse_plugin_dir_one_autoload_func() {
418        let mut parser = VimParser::new().unwrap();
419        let tmp_dir = tempdir().unwrap();
420        create_plugin_file(
421            tmp_dir.path(),
422            "autoload/foo.vim",
423            r#"
424func! foo#Bar() abort
425  sleep 1
426endfunc
427"#,
428        );
429        let plugin = parser.parse_plugin_dir(tmp_dir.path()).unwrap();
430        assert_eq!(
431            plugin,
432            VimPlugin {
433                content: vec![VimPluginSection {
434                    name: "autoload".into(),
435                    nodes: vec![VimNode::Function {
436                        name: "foo#Bar".into(),
437                        doc: None
438                    }]
439                }]
440            }
441        );
442    }
443
444    #[test]
445    fn parse_plugin_dir_subdirs_instant_plugin_autoload_others() {
446        let mut parser = VimParser::new().unwrap();
447        let tmp_dir = tempdir().unwrap();
448        create_plugin_file(tmp_dir.path(), "autoload/x.vim", "");
449        create_plugin_file(tmp_dir.path(), "plugin/x.vim", "");
450        create_plugin_file(tmp_dir.path(), "instant/x.vim", "");
451        create_plugin_file(tmp_dir.path(), "other1/x.vim", "");
452        create_plugin_file(tmp_dir.path(), "other2/x.vim", "");
453        assert_eq!(
454            parser.parse_plugin_dir(tmp_dir.path()).unwrap(),
455            VimPlugin {
456                content: vec![
457                    VimPluginSection {
458                        name: "instant".into(),
459                        nodes: vec![],
460                    },
461                    VimPluginSection {
462                        name: "plugin".into(),
463                        nodes: vec![],
464                    },
465                    VimPluginSection {
466                        name: "autoload".into(),
467                        nodes: vec![],
468                    },
469                    VimPluginSection {
470                        name: "other1".into(),
471                        nodes: vec![],
472                    },
473                    VimPluginSection {
474                        name: "other2".into(),
475                        nodes: vec![],
476                    },
477                ]
478            }
479        );
480    }
481
482    fn create_plugin_file<P: AsRef<Path>>(root: &Path, subpath: P, contents: &str) {
483        let filepath = root.join(subpath);
484        fs::create_dir_all(filepath.parent().unwrap()).unwrap();
485        fs::write(filepath, contents).unwrap()
486    }
487}