Skip to main content

mq_view/
highlighter.rs

1use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
2
3/// Syntax highlighter supporting various programming languages and HTML.
4///
5/// This struct uses tree-sitter to provide syntax highlighting with ANSI color codes
6/// for various programming languages and markup formats.
7///
8/// # Examples
9///
10/// ```rust
11/// use mq_view::SyntaxHighlighter;
12///
13/// let mut highlighter = SyntaxHighlighter::new();
14/// let code = "fn main() { println!(\"Hello\"); }";
15/// let highlighted = highlighter.highlight(code, Some("rust"));
16/// println!("{}", highlighted);
17/// ```
18pub struct SyntaxHighlighter {
19    highlighter: Highlighter,
20}
21
22impl SyntaxHighlighter {
23    pub fn new() -> Self {
24        Self {
25            highlighter: Highlighter::new(),
26        }
27    }
28
29    /// Get the appropriate tree-sitter language and highlight configuration for a given language
30    fn get_highlight_config(lang: &str) -> Option<HighlightConfiguration> {
31        let (language, query) = match lang.to_lowercase().as_str() {
32            #[cfg(feature = "lang-rust")]
33            "rust" | "rs" => (
34                tree_sitter_rust::LANGUAGE.into(),
35                tree_sitter_rust::HIGHLIGHTS_QUERY,
36            ),
37            #[cfg(feature = "lang-javascript")]
38            "javascript" | "js" => (
39                tree_sitter_javascript::LANGUAGE.into(),
40                tree_sitter_javascript::HIGHLIGHT_QUERY,
41            ),
42            #[cfg(feature = "lang-typescript")]
43            "typescript" | "ts" => (
44                tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
45                tree_sitter_typescript::HIGHLIGHTS_QUERY,
46            ),
47            #[cfg(feature = "lang-typescript")]
48            "tsx" => (
49                tree_sitter_typescript::LANGUAGE_TSX.into(),
50                tree_sitter_typescript::HIGHLIGHTS_QUERY,
51            ),
52            #[cfg(feature = "lang-python")]
53            "python" | "py" => (
54                tree_sitter_python::LANGUAGE.into(),
55                tree_sitter_python::HIGHLIGHTS_QUERY,
56            ),
57            #[cfg(feature = "lang-go")]
58            "go" => (
59                tree_sitter_go::LANGUAGE.into(),
60                tree_sitter_go::HIGHLIGHTS_QUERY,
61            ),
62            #[cfg(feature = "lang-html")]
63            "html" => (
64                tree_sitter_html::LANGUAGE.into(),
65                tree_sitter_html::HIGHLIGHTS_QUERY,
66            ),
67            #[cfg(feature = "lang-css")]
68            "css" => (
69                tree_sitter_css::LANGUAGE.into(),
70                tree_sitter_css::HIGHLIGHTS_QUERY,
71            ),
72            #[cfg(feature = "lang-json")]
73            "json" => (
74                tree_sitter_json::LANGUAGE.into(),
75                tree_sitter_json::HIGHLIGHTS_QUERY,
76            ),
77            #[cfg(feature = "lang-bash")]
78            "bash" | "sh" => (
79                tree_sitter_bash::LANGUAGE.into(),
80                tree_sitter_bash::HIGHLIGHT_QUERY,
81            ),
82            #[cfg(feature = "lang-c")]
83            "c" => (
84                tree_sitter_c::LANGUAGE.into(),
85                tree_sitter_c::HIGHLIGHT_QUERY,
86            ),
87            #[cfg(feature = "lang-cpp")]
88            "cpp" | "c++" | "cxx" => (
89                tree_sitter_cpp::LANGUAGE.into(),
90                tree_sitter_cpp::HIGHLIGHT_QUERY,
91            ),
92            #[cfg(feature = "lang-java")]
93            "java" => (
94                tree_sitter_java::LANGUAGE.into(),
95                tree_sitter_java::HIGHLIGHTS_QUERY,
96            ),
97            #[cfg(feature = "lang-haskell")]
98            "hs" | "haskell" => (
99                tree_sitter_haskell::LANGUAGE.into(),
100                tree_sitter_haskell::HIGHLIGHTS_QUERY,
101            ),
102            #[cfg(feature = "lang-elm")]
103            "elm" => (
104                tree_sitter_elm::LANGUAGE.into(),
105                tree_sitter_elm::HIGHLIGHTS_QUERY,
106            ),
107            #[cfg(feature = "lang-mq")]
108            "mq" => (
109                tree_sitter_mq::LANGUAGE.into(),
110                tree_sitter_mq::HIGHLIGHTS_QUERY,
111            ),
112            #[cfg(feature = "lang-ocaml")]
113            "ocaml" | "ml" => (
114                tree_sitter_ocaml::LANGUAGE_OCAML.into(),
115                tree_sitter_ocaml::HIGHLIGHTS_QUERY,
116            ),
117            #[cfg(feature = "lang-swift")]
118            "swift" => (
119                tree_sitter_swift::LANGUAGE.into(),
120                tree_sitter_swift::HIGHLIGHTS_QUERY,
121            ),
122            #[cfg(feature = "lang-elixir")]
123            "ex" | "exs" => (
124                tree_sitter_elixir::LANGUAGE.into(),
125                tree_sitter_elixir::HIGHLIGHTS_QUERY,
126            ),
127            _ => return None,
128        };
129
130        let mut config = HighlightConfiguration::new(language, "", query, "", "").ok()?;
131
132        config.configure(&[
133            "attribute",
134            "constant",
135            "function.builtin",
136            "function",
137            "keyword",
138            "operator",
139            "property",
140            "punctuation",
141            "punctuation.bracket",
142            "punctuation.delimiter",
143            "string",
144            "string.special",
145            "tag",
146            "type",
147            "type.builtin",
148            "variable",
149            "variable.builtin",
150            "variable.parameter",
151            "comment",
152            "number",
153            "boolean",
154            "escape",
155            "label",
156            "namespace",
157            "constructor",
158            "embedded",
159        ]);
160
161        Some(config)
162    }
163
164    /// Highlight code and return colored output
165    pub fn highlight(&mut self, code: &str, lang: Option<&str>) -> String {
166        // If no language specified or config not available, return plain text
167        let Some(lang) = lang else {
168            return code.to_string();
169        };
170
171        let Some(config) = Self::get_highlight_config(lang) else {
172            return code.to_string();
173        };
174
175        let highlights = match self
176            .highlighter
177            .highlight(&config, code.as_bytes(), None, |_| None)
178        {
179            Ok(h) => h,
180            Err(_) => return code.to_string(),
181        };
182
183        let mut result = String::new();
184        let mut current_pos = 0;
185
186        for event in highlights {
187            match event {
188                Ok(HighlightEvent::Source { start, end }) => {
189                    if start > current_pos {
190                        // Add unhighlighted text
191                        result.push_str(&code[current_pos..start]);
192                    }
193                    result.push_str(&code[start..end]);
194                    current_pos = end;
195                }
196                Ok(HighlightEvent::HighlightStart(Highlight(idx))) => {
197                    // Apply color based on highlight type
198                    let color_code = Self::get_color_for_highlight(idx);
199                    result.push_str(color_code);
200                }
201                Ok(HighlightEvent::HighlightEnd) => {
202                    // Reset color
203                    result.push_str("\x1b[0m");
204                }
205                Err(_) => {}
206            }
207        }
208
209        // Add any remaining text
210        if current_pos < code.len() {
211            result.push_str(&code[current_pos..]);
212        }
213
214        result
215    }
216
217    /// Map highlight index to ANSI color codes
218    fn get_color_for_highlight(idx: usize) -> &'static str {
219        match idx {
220            0 => "\x1b[36m",  // attribute - cyan
221            1 => "\x1b[35m",  // constant - magenta
222            2 => "\x1b[33m",  // function.builtin - yellow
223            3 => "\x1b[34m",  // function - blue
224            4 => "\x1b[95m",  // keyword - bright magenta
225            5 => "\x1b[37m",  // operator - white
226            6 => "\x1b[36m",  // property - cyan
227            7 => "\x1b[90m",  // punctuation - bright black
228            8 => "\x1b[90m",  // punctuation.bracket - bright black
229            9 => "\x1b[90m",  // punctuation.delimiter - bright black
230            10 => "\x1b[32m", // string - green
231            11 => "\x1b[92m", // string.special - bright green
232            12 => "\x1b[34m", // tag - blue
233            13 => "\x1b[33m", // type - yellow
234            14 => "\x1b[93m", // type.builtin - bright yellow
235            15 => "\x1b[37m", // variable - white
236            16 => "\x1b[35m", // variable.builtin - magenta
237            17 => "\x1b[36m", // variable.parameter - cyan
238            18 => "\x1b[90m", // comment - bright black (gray)
239            19 => "\x1b[35m", // number - magenta
240            20 => "\x1b[35m", // boolean - magenta
241            21 => "\x1b[36m", // escape - cyan
242            22 => "\x1b[33m", // label - yellow
243            23 => "\x1b[36m", // namespace - cyan
244            24 => "\x1b[33m", // constructor - yellow
245            25 => "\x1b[37m", // embedded - white
246            _ => "\x1b[0m",   // default - reset
247        }
248    }
249}
250
251impl Default for SyntaxHighlighter {
252    fn default() -> Self {
253        Self::new()
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260    use rstest::rstest;
261
262    #[rstest]
263    #[cfg_attr(
264        feature = "lang-rust",
265        case::rust("rust", r#"fn main() { println!("Hello, world!"); }"#)
266    )]
267    #[cfg_attr(
268        feature = "lang-python",
269        case::python("python", r#"def main(): print("Hello, world!")"#)
270    )]
271    #[cfg_attr(
272        feature = "lang-javascript",
273        case::js("javascript", r#"function main() { console.log('Hello, world!'); }"#)
274    )]
275    #[cfg_attr(
276        feature = "lang-typescript",
277        case::ts(
278            "typescript",
279            r#"function main(): void { console.log('Hello, world!'); }"#
280        )
281    )]
282    #[cfg_attr(
283        feature = "lang-go",
284        case::go("go", r#"func main() { fmt.Println("Hello, world!") }"#)
285    )]
286    #[cfg_attr(feature = "lang-html", case::html("html", r#"<h1>Hello</h1>"#))]
287    #[cfg_attr(feature = "lang-css", case::css("css", r#"body { color: red; }"#))]
288    #[cfg_attr(feature = "lang-json", case::json("json", r#"{ "hello": "world" }"#))]
289    #[cfg_attr(feature = "lang-bash", case::bash("bash", r#"echo 'Hello, world!'"#))]
290    #[cfg_attr(
291        feature = "lang-c",
292        case::c("c", r#"int main() { printf("Hello, world!"); }"#)
293    )]
294    #[cfg_attr(feature = "lang-java", case::java("java", r#"public class Main { public static void main(String[] args) { System.out.println("Hello, world!"); } }"#))]
295    #[cfg_attr(
296        feature = "lang-haskell",
297        case::haskell("haskell", r#"main = putStrLn "Hello, world!""#)
298    )]
299    #[cfg_attr(
300        feature = "lang-elm",
301        case::elm("elm", r#"main = text "Hello, world!""#)
302    )]
303    #[cfg_attr(feature = "lang-mq", case::mq("mq", r#"fn(): "Hello, world!""#))]
304    #[cfg_attr(feature = "lang-mq", case::bool("mq", r#"fn(): true"#))]
305    #[cfg_attr(feature = "lang-mq", case::number("mq", r#"fn(): 42"#))]
306    fn test_highlighting_for_supported_languages(#[case] lang: &str, #[case] code: &str) {
307        let mut highlighter = SyntaxHighlighter::new();
308        let result = highlighter.highlight(code, Some(lang));
309        assert!(
310            result.contains("\x1b["),
311            "Expected ANSI escape codes for language: {}",
312            lang
313        );
314    }
315
316    #[rstest]
317    #[case("unknown", "some code")]
318    #[case("unsupported", "another code")]
319    fn test_highlighting_for_unsupported_languages(#[case] lang: &str, #[case] code: &str) {
320        let mut highlighter = SyntaxHighlighter::new();
321        let result = highlighter.highlight(code, Some(lang));
322        assert_eq!(
323            result, code,
324            "Should return original code for unsupported language: {}",
325            lang
326        );
327    }
328
329    #[test]
330    fn test_highlighting_empty_code() {
331        let mut highlighter = SyntaxHighlighter::new();
332        let result = highlighter.highlight("", Some("rust"));
333        assert_eq!(result, "");
334    }
335
336    #[test]
337    fn test_highlighting_with_invalid_code() {
338        let mut highlighter = SyntaxHighlighter::new();
339        // Intentionally malformed code for rust
340        let code = "fn {";
341        let result = highlighter.highlight(code, Some("rust"));
342        // Should not panic, may or may not contain ANSI codes
343        assert!(!result.is_empty());
344    }
345}