Skip to main content

mq_view/
highlighter.rs

1use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
2
3/// Syntax highlighter supporting various programming languages and HTML.
4///
5/// This struct uses tree-sitter to provide syntax highlighting with ANSI color codes
6/// for various programming languages and markup formats.
7///
8/// # Examples
9///
10/// ```rust
11/// use mq_view::SyntaxHighlighter;
12///
13/// let mut highlighter = SyntaxHighlighter::new();
14/// let code = "fn main() { println!(\"Hello\"); }";
15/// let highlighted = highlighter.highlight(code, Some("rust"));
16/// println!("{}", highlighted);
17/// ```
18pub struct SyntaxHighlighter {
19    highlighter: Highlighter,
20}
21
22impl SyntaxHighlighter {
23    pub fn new() -> Self {
24        Self {
25            highlighter: Highlighter::new(),
26        }
27    }
28
29    /// Get the appropriate tree-sitter language and highlight configuration for a given language
30    fn get_highlight_config(lang: &str) -> Option<HighlightConfiguration> {
31        let (language, query) = match lang.to_lowercase().as_str() {
32            #[cfg(feature = "lang-rust")]
33            "rust" | "rs" => (
34                tree_sitter_rust::LANGUAGE.into(),
35                tree_sitter_rust::HIGHLIGHTS_QUERY,
36            ),
37            #[cfg(feature = "lang-javascript")]
38            "javascript" | "js" => (
39                tree_sitter_javascript::LANGUAGE.into(),
40                tree_sitter_javascript::HIGHLIGHT_QUERY,
41            ),
42            #[cfg(feature = "lang-typescript")]
43            "typescript" | "ts" => (
44                tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
45                tree_sitter_typescript::HIGHLIGHTS_QUERY,
46            ),
47            #[cfg(feature = "lang-typescript")]
48            "tsx" => (
49                tree_sitter_typescript::LANGUAGE_TSX.into(),
50                tree_sitter_typescript::HIGHLIGHTS_QUERY,
51            ),
52            #[cfg(feature = "lang-python")]
53            "python" | "py" => (
54                tree_sitter_python::LANGUAGE.into(),
55                tree_sitter_python::HIGHLIGHTS_QUERY,
56            ),
57            #[cfg(feature = "lang-go")]
58            "go" => (
59                tree_sitter_go::LANGUAGE.into(),
60                tree_sitter_go::HIGHLIGHTS_QUERY,
61            ),
62            #[cfg(feature = "lang-html")]
63            "html" => (
64                tree_sitter_html::LANGUAGE.into(),
65                tree_sitter_html::HIGHLIGHTS_QUERY,
66            ),
67            #[cfg(feature = "lang-css")]
68            "css" => (
69                tree_sitter_css::LANGUAGE.into(),
70                tree_sitter_css::HIGHLIGHTS_QUERY,
71            ),
72            #[cfg(feature = "lang-json")]
73            "json" => (
74                tree_sitter_json::LANGUAGE.into(),
75                tree_sitter_json::HIGHLIGHTS_QUERY,
76            ),
77            #[cfg(feature = "lang-bash")]
78            "bash" | "sh" => (
79                tree_sitter_bash::LANGUAGE.into(),
80                tree_sitter_bash::HIGHLIGHT_QUERY,
81            ),
82            #[cfg(feature = "lang-c")]
83            "c" => (
84                tree_sitter_c::LANGUAGE.into(),
85                tree_sitter_c::HIGHLIGHT_QUERY,
86            ),
87            #[cfg(feature = "lang-cpp")]
88            "cpp" | "c++" | "cxx" => (
89                tree_sitter_cpp::LANGUAGE.into(),
90                tree_sitter_cpp::HIGHLIGHT_QUERY,
91            ),
92            #[cfg(feature = "lang-java")]
93            "java" => (
94                tree_sitter_java::LANGUAGE.into(),
95                tree_sitter_java::HIGHLIGHTS_QUERY,
96            ),
97            #[cfg(feature = "lang-haskell")]
98            "hs" | "haskell" => (
99                tree_sitter_haskell::LANGUAGE.into(),
100                tree_sitter_haskell::HIGHLIGHTS_QUERY,
101            ),
102            #[cfg(feature = "lang-elm")]
103            "elm" => (
104                tree_sitter_elm::LANGUAGE.into(),
105                tree_sitter_elm::HIGHLIGHTS_QUERY,
106            ),
107            #[cfg(feature = "lang-mq")]
108            "mq" => (
109                tree_sitter_mq::LANGUAGE.into(),
110                tree_sitter_mq::HIGHLIGHTS_QUERY,
111            ),
112            #[cfg(feature = "lang-ocaml")]
113            "ocaml" | "ml" => (
114                tree_sitter_ocaml::LANGUAGE_OCAML.into(),
115                tree_sitter_ocaml::HIGHLIGHTS_QUERY,
116            ),
117            #[cfg(feature = "lang-swift")]
118            "swift" => (
119                tree_sitter_swift::LANGUAGE.into(),
120                tree_sitter_swift::HIGHLIGHTS_QUERY,
121            ),
122            #[cfg(feature = "lang-elixir")]
123            "ex" | "exs" => (
124                tree_sitter_elixir::LANGUAGE.into(),
125                tree_sitter_elixir::HIGHLIGHTS_QUERY,
126            ),
127            #[cfg(feature = "lang-toml")]
128            "toml" => (
129                tree_sitter_toml_ng::LANGUAGE.into(),
130                tree_sitter_toml_ng::HIGHLIGHTS_QUERY,
131            ),
132            #[cfg(feature = "lang-clojure")]
133            "clojure" | "clj" => (
134                tree_sitter_clojure::LANGUAGE.into(),
135                include_str!("../queries/clojure_highlights.scm"),
136            ),
137            #[cfg(feature = "lang-yaml")]
138            "yaml" | "yml" => (
139                tree_sitter_yaml::LANGUAGE.into(),
140                tree_sitter_yaml::HIGHLIGHTS_QUERY,
141            ),
142            #[cfg(feature = "lang-ruby")]
143            "ruby" | "rb" => (
144                tree_sitter_ruby::LANGUAGE.into(),
145                tree_sitter_ruby::HIGHLIGHTS_QUERY,
146            ),
147            #[cfg(feature = "lang-php")]
148            "php" => (
149                tree_sitter_php::LANGUAGE_PHP.into(),
150                tree_sitter_php::HIGHLIGHTS_QUERY,
151            ),
152            #[cfg(feature = "lang-lua")]
153            "lua" => (
154                tree_sitter_lua::LANGUAGE.into(),
155                tree_sitter_lua::HIGHLIGHTS_QUERY,
156            ),
157            #[cfg(feature = "lang-kotlin")]
158            "kotlin" | "kt" | "kts" => (
159                tree_sitter_kotlin_ng::LANGUAGE.into(),
160                include_str!("../queries/kotlin_highlights.scm"),
161            ),
162            #[cfg(feature = "lang-scala")]
163            "scala" => (
164                tree_sitter_scala::LANGUAGE.into(),
165                tree_sitter_scala::HIGHLIGHTS_QUERY,
166            ),
167            #[cfg(feature = "lang-make")]
168            "make" | "makefile" => (
169                tree_sitter_make::LANGUAGE.into(),
170                tree_sitter_make::HIGHLIGHTS_QUERY,
171            ),
172            #[cfg(feature = "lang-sql")]
173            "sql" => (
174                tree_sitter_sequel::LANGUAGE.into(),
175                tree_sitter_sequel::HIGHLIGHTS_QUERY,
176            ),
177            #[cfg(feature = "lang-dockerfile")]
178            "dockerfile" | "docker" => (
179                tree_sitter_containerfile::LANGUAGE.into(),
180                tree_sitter_containerfile::HIGHLIGHTS_QUERY,
181            ),
182            _ => return None,
183        };
184
185        let mut config = HighlightConfiguration::new(language, "", query, "", "").ok()?;
186
187        config.configure(&[
188            "attribute",
189            "constant",
190            "function.builtin",
191            "function",
192            "keyword",
193            "operator",
194            "property",
195            "punctuation",
196            "punctuation.bracket",
197            "punctuation.delimiter",
198            "string",
199            "string.special",
200            "tag",
201            "type",
202            "type.builtin",
203            "variable",
204            "variable.builtin",
205            "variable.parameter",
206            "comment",
207            "number",
208            "boolean",
209            "escape",
210            "label",
211            "namespace",
212            "constructor",
213            "embedded",
214        ]);
215
216        Some(config)
217    }
218
219    /// Highlight code and return colored output
220    pub fn highlight(&mut self, code: &str, lang: Option<&str>) -> String {
221        // If no language specified or config not available, return plain text
222        let Some(lang) = lang else {
223            return code.to_string();
224        };
225
226        let Some(config) = Self::get_highlight_config(lang) else {
227            return code.to_string();
228        };
229
230        let highlights = match self
231            .highlighter
232            .highlight(&config, code.as_bytes(), None, |_| None)
233        {
234            Ok(h) => h,
235            Err(_) => return code.to_string(),
236        };
237
238        let mut result = String::new();
239        let mut current_pos = 0;
240
241        for event in highlights {
242            match event {
243                Ok(HighlightEvent::Source { start, end }) => {
244                    if start > current_pos {
245                        // Add unhighlighted text
246                        result.push_str(&code[current_pos..start]);
247                    }
248                    result.push_str(&code[start..end]);
249                    current_pos = end;
250                }
251                Ok(HighlightEvent::HighlightStart(Highlight(idx))) => {
252                    // Apply color based on highlight type
253                    let color_code = Self::get_color_for_highlight(idx);
254                    result.push_str(color_code);
255                }
256                Ok(HighlightEvent::HighlightEnd) => {
257                    // Reset color
258                    result.push_str("\x1b[0m");
259                }
260                Err(_) => {}
261            }
262        }
263
264        // Add any remaining text
265        if current_pos < code.len() {
266            result.push_str(&code[current_pos..]);
267        }
268
269        result
270    }
271
272    /// Map highlight index to ANSI color codes
273    fn get_color_for_highlight(idx: usize) -> &'static str {
274        match idx {
275            0 => "\x1b[36m",  // attribute - cyan
276            1 => "\x1b[35m",  // constant - magenta
277            2 => "\x1b[33m",  // function.builtin - yellow
278            3 => "\x1b[34m",  // function - blue
279            4 => "\x1b[95m",  // keyword - bright magenta
280            5 => "\x1b[37m",  // operator - white
281            6 => "\x1b[36m",  // property - cyan
282            7 => "\x1b[90m",  // punctuation - bright black
283            8 => "\x1b[90m",  // punctuation.bracket - bright black
284            9 => "\x1b[90m",  // punctuation.delimiter - bright black
285            10 => "\x1b[32m", // string - green
286            11 => "\x1b[92m", // string.special - bright green
287            12 => "\x1b[34m", // tag - blue
288            13 => "\x1b[33m", // type - yellow
289            14 => "\x1b[93m", // type.builtin - bright yellow
290            15 => "\x1b[37m", // variable - white
291            16 => "\x1b[35m", // variable.builtin - magenta
292            17 => "\x1b[36m", // variable.parameter - cyan
293            18 => "\x1b[90m", // comment - bright black (gray)
294            19 => "\x1b[35m", // number - magenta
295            20 => "\x1b[35m", // boolean - magenta
296            21 => "\x1b[36m", // escape - cyan
297            22 => "\x1b[33m", // label - yellow
298            23 => "\x1b[36m", // namespace - cyan
299            24 => "\x1b[33m", // constructor - yellow
300            25 => "\x1b[37m", // embedded - white
301            _ => "\x1b[0m",   // default - reset
302        }
303    }
304}
305
306impl Default for SyntaxHighlighter {
307    fn default() -> Self {
308        Self::new()
309    }
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315    use rstest::rstest;
316
317    #[rstest]
318    #[cfg_attr(
319        feature = "lang-rust",
320        case::rust("rust", r#"fn main() { println!("Hello, world!"); }"#)
321    )]
322    #[cfg_attr(
323        feature = "lang-python",
324        case::python("python", r#"def main(): print("Hello, world!")"#)
325    )]
326    #[cfg_attr(
327        feature = "lang-javascript",
328        case::js("javascript", r#"function main() { console.log('Hello, world!'); }"#)
329    )]
330    #[cfg_attr(
331        feature = "lang-typescript",
332        case::ts(
333            "typescript",
334            r#"function main(): void { console.log('Hello, world!'); }"#
335        )
336    )]
337    #[cfg_attr(
338        feature = "lang-go",
339        case::go("go", r#"func main() { fmt.Println("Hello, world!") }"#)
340    )]
341    #[cfg_attr(feature = "lang-html", case::html("html", r#"<h1>Hello</h1>"#))]
342    #[cfg_attr(feature = "lang-css", case::css("css", r#"body { color: red; }"#))]
343    #[cfg_attr(feature = "lang-json", case::json("json", r#"{ "hello": "world" }"#))]
344    #[cfg_attr(feature = "lang-bash", case::bash("bash", r#"echo 'Hello, world!'"#))]
345    #[cfg_attr(
346        feature = "lang-c",
347        case::c("c", r#"int main() { printf("Hello, world!"); }"#)
348    )]
349    #[cfg_attr(feature = "lang-java", case::java("java", r#"public class Main { public static void main(String[] args) { System.out.println("Hello, world!"); } }"#))]
350    #[cfg_attr(
351        feature = "lang-haskell",
352        case::haskell("haskell", r#"main = putStrLn "Hello, world!""#)
353    )]
354    #[cfg_attr(
355        feature = "lang-elm",
356        case::elm("elm", r#"main = text "Hello, world!""#)
357    )]
358    #[cfg_attr(feature = "lang-mq", case::mq("mq", r#"fn(): "Hello, world!""#))]
359    #[cfg_attr(feature = "lang-mq", case::bool("mq", r#"fn(): true"#))]
360    #[cfg_attr(feature = "lang-mq", case::number("mq", r#"fn(): 42"#))]
361    #[cfg_attr(
362        feature = "lang-toml",
363        case::toml("toml", "[package]\nname = \"hello\"\nversion = \"1.0.0\"")
364    )]
365    #[cfg_attr(
366        feature = "lang-clojure",
367        case::clojure("clojure", r#"(defn main [] (println "Hello, world!"))"#)
368    )]
369    #[cfg_attr(
370        feature = "lang-yaml",
371        case::yaml("yaml", "name: hello\nversion: 1.0.0")
372    )]
373    #[cfg_attr(
374        feature = "lang-ruby",
375        case::ruby("ruby", r#"def main; puts "Hello, world!"; end"#)
376    )]
377    #[cfg_attr(
378        feature = "lang-php",
379        case::php("php", r#"<?php function main() { echo "Hello, world!"; }"#)
380    )]
381    #[cfg_attr(feature = "lang-lua", case::lua("lua", r#"print("Hello, world!")"#))]
382    #[cfg_attr(
383        feature = "lang-kotlin",
384        case::kotlin("kotlin", r#"fun main() { println("Hello, world!") }"#)
385    )]
386    #[cfg_attr(
387        feature = "lang-scala",
388        case::scala(
389            "scala",
390            r#"object Main { def main(args: Array[String]): Unit = println("Hello, world!") }"#
391        )
392    )]
393    #[cfg_attr(
394        feature = "lang-make",
395        case::make("make", "all:\n\techo \"Hello, world!\"")
396    )]
397    #[cfg_attr(
398        feature = "lang-sql",
399        case::sql("sql", "SELECT * FROM users WHERE id = 1;")
400    )]
401    #[cfg_attr(
402        feature = "lang-dockerfile",
403        case::dockerfile("dockerfile", "FROM rust:latest\nRUN cargo build")
404    )]
405    fn test_highlighting_for_supported_languages(#[case] lang: &str, #[case] code: &str) {
406        let mut highlighter = SyntaxHighlighter::new();
407        let result = highlighter.highlight(code, Some(lang));
408        assert!(
409            result.contains("\x1b["),
410            "Expected ANSI escape codes for language: {}",
411            lang
412        );
413    }
414
415    #[rstest]
416    #[case("unknown", "some code")]
417    #[case("unsupported", "another code")]
418    fn test_highlighting_for_unsupported_languages(#[case] lang: &str, #[case] code: &str) {
419        let mut highlighter = SyntaxHighlighter::new();
420        let result = highlighter.highlight(code, Some(lang));
421        assert_eq!(
422            result, code,
423            "Should return original code for unsupported language: {}",
424            lang
425        );
426    }
427
428    #[test]
429    fn test_highlighting_empty_code() {
430        let mut highlighter = SyntaxHighlighter::new();
431        let result = highlighter.highlight("", Some("rust"));
432        assert_eq!(result, "");
433    }
434
435    #[test]
436    fn test_highlighting_with_invalid_code() {
437        let mut highlighter = SyntaxHighlighter::new();
438        // Intentionally malformed code for rust
439        let code = "fn {";
440        let result = highlighter.highlight(code, Some("rust"));
441        // Should not panic, may or may not contain ANSI codes
442        assert!(!result.is_empty());
443    }
444}