Skip to main content

harper_comments/
comment_parser.rs

1use std::path::Path;
2
3use crate::comment_parsers;
4use comment_parsers::{Go, JavaDoc, JsDoc, Lua, Solidity, Unit};
5use harper_core::Token;
6use harper_core::parsers::{self, MarkdownOptions, Parser};
7use harper_core::spell::MutableDictionary;
8use tree_sitter::Node;
9
10use crate::masker::CommentMasker;
11
12pub struct CommentParser {
13    inner: parsers::Mask<CommentMasker, Box<dyn Parser>>,
14}
15
16impl CommentParser {
17    pub fn create_ident_dict(&self, source: &[char]) -> Option<MutableDictionary> {
18        self.inner.masker.create_ident_dict(source)
19    }
20
21    pub fn new_from_language_id(
22        language_id: &str,
23        markdown_options: MarkdownOptions,
24    ) -> Option<Self> {
25        let language = match language_id {
26            "c" => tree_sitter_c::LANGUAGE,
27            "clojure" => tree_sitter_clojure::LANGUAGE,
28            "cmake" => tree_sitter_cmake::LANGUAGE,
29            "cpp" => tree_sitter_cpp::LANGUAGE,
30            "csharp" => tree_sitter_c_sharp::LANGUAGE,
31            "dart" => harper_tree_sitter_dart::LANGUAGE,
32            "go" => tree_sitter_go::LANGUAGE,
33            "groovy" => tree_sitter_groovy::LANGUAGE,
34            "haskell" => tree_sitter_haskell::LANGUAGE,
35            "daml" => tree_sitter_haskell::LANGUAGE,
36            "java" => tree_sitter_java::LANGUAGE,
37            "javascript" => tree_sitter_javascript::LANGUAGE,
38            "javascriptreact" => tree_sitter_typescript::LANGUAGE_TSX,
39            "kotlin" => tree_sitter_kotlin_ng::LANGUAGE,
40            "lua" => tree_sitter_lua::LANGUAGE,
41            "nix" => tree_sitter_nix::LANGUAGE,
42            "php" => tree_sitter_php::LANGUAGE_PHP,
43            "powershell" => tree_sitter_powershell::LANGUAGE,
44            "ruby" => tree_sitter_ruby::LANGUAGE,
45            "rust" => tree_sitter_rust::LANGUAGE,
46            "scala" => tree_sitter_scala::LANGUAGE,
47            "shellscript" => tree_sitter_bash::LANGUAGE,
48            "solidity" => tree_sitter_solidity::LANGUAGE,
49            "swift" => tree_sitter_swift::LANGUAGE,
50            "toml" => tree_sitter_toml_ng::LANGUAGE,
51            "typescript" => tree_sitter_typescript::LANGUAGE_TYPESCRIPT,
52            "typescriptreact" => tree_sitter_typescript::LANGUAGE_TSX,
53            "zig" => tree_sitter_zig::LANGUAGE,
54            _ => return None,
55        };
56
57        let comment_parser: Box<dyn Parser> = match language_id {
58            "go" => Box::new(Go::new_markdown(markdown_options)),
59            "java" => Box::new(JavaDoc::default()),
60            "javascript" | "javascriptreact" | "typescript" | "typescriptreact" => {
61                Box::new(JsDoc::new_markdown(markdown_options))
62            }
63            "lua" => Box::new(Lua::new_markdown(markdown_options)),
64            "solidity" => Box::new(Solidity::new_markdown(markdown_options)),
65            _ => Box::new(Unit::new_markdown(markdown_options)),
66        };
67
68        Some(Self {
69            inner: parsers::Mask::new(
70                CommentMasker::new(language.into(), Self::node_condition),
71                comment_parser,
72            ),
73        })
74    }
75
76    /// Infer the programming language from a provided filename.
77    pub fn new_from_filename(filename: &Path, markdown_options: MarkdownOptions) -> Option<Self> {
78        Self::new_from_language_id(Self::filename_to_filetype(filename)?, markdown_options)
79    }
80
81    /// Convert a provided path to a corresponding Language Server Protocol file
82    /// type.
83    ///
84    /// Note to contributors: try to keep this in sync with
85    /// [`Self::new_from_language_id`]
86    fn filename_to_filetype(path: &Path) -> Option<&'static str> {
87        Some(match path.extension()?.to_str()? {
88            "c" => "c",
89            "bb" | "cljc" | "cljd" | "clj" | "cljs" => "clojure",
90            "cmake" => "cmake",
91            "cpp" | "h" => "cpp",
92            "cs" => "csharp",
93            "dart" => "dart",
94            "go" => "go",
95            "groovy" | "gradle" => "groovy",
96            "hs" => "haskell",
97            "daml" => "daml",
98            "java" => "java",
99            "js" => "javascript",
100            "jsx" => "javascriptreact",
101            "kt" | "kts" => "kotlin",
102            "lua" => "lua",
103            "nix" => "nix",
104            "php" => "php",
105            "ps1" | "psd1" | "psm1" => "powershell",
106            "rb" => "ruby",
107            "rs" => "rust",
108            "sbt" | "sc" | "scala" | "mill" => "scala",
109            "bash" | "sh" => "shellscript",
110            "sol" => "solidity",
111            "swift" => "swift",
112            "toml" => "toml",
113            "ts" => "typescript",
114            "tsx" => "typescriptreact",
115            "zig" => "zig",
116            _ => return None,
117        })
118    }
119
120    fn node_condition(n: &Node) -> bool {
121        n.kind().contains("comment")
122    }
123}
124
125impl Parser for CommentParser {
126    fn parse(&self, source: &[char]) -> Vec<Token> {
127        self.inner.parse(source)
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::CommentParser;
134    use harper_core::parsers::{MarkdownOptions, StrParser};
135
136    #[test]
137    fn hang() {
138        use std::sync::mpsc::channel;
139        use std::thread;
140        use std::time::Duration;
141
142        let (tx, rx) = channel::<()>();
143
144        let handle = thread::spawn(move || {
145            let opts = MarkdownOptions::default();
146            let parser = CommentParser::new_from_language_id("java", opts).unwrap();
147            let _res = parser.parse_str("//{@j");
148            tx.send(()).expect("send failed");
149        });
150
151        rx.recv_timeout(Duration::from_secs(10)).expect("timed out");
152        handle.join().expect("failed to join");
153    }
154}