Skip to main content

limit_cli/
syntax.rs

1use std::io;
2
3use syntect::easy::HighlightLines;
4use syntect::highlighting::{Theme, ThemeSet};
5use syntect::parsing::{SyntaxReference, SyntaxSet};
6use syntect::util::LinesWithEndings;
7use thiserror::Error;
8
9/// Syntax highlighting errors
10#[derive(Debug, Error)]
11#[allow(dead_code)]
12pub enum HighlightError {
13    #[error("Syntax not found: {0}")]
14    SyntaxNotFound(String),
15
16    #[error("IO error: {0}")]
17    Io(#[from] io::Error),
18
19    #[error("Parsing error: {0}")]
20    Parse(String),
21}
22
23/// Syntax highlighter supporting 100+ languages
24pub struct SyntaxHighlighter {
25    syntax_set: SyntaxSet,
26    theme: Theme,
27}
28
29impl SyntaxHighlighter {
30    /// Load default syntax set and theme
31    pub fn new() -> Result<Self, HighlightError> {
32        let syntax_set = SyntaxSet::load_defaults_newlines();
33        let theme_set = ThemeSet::load_defaults();
34        let theme = theme_set.themes["base16-ocean.dark"].clone();
35
36        Ok(Self { syntax_set, theme })
37    }
38
39    /// Load with a custom theme from the built-in theme set
40    #[allow(dead_code)]
41    pub fn with_theme(theme_name: &str) -> Result<Self, HighlightError> {
42        let syntax_set = SyntaxSet::load_defaults_newlines();
43        let theme_set = ThemeSet::load_defaults();
44
45        let theme = theme_set
46            .themes
47            .get(theme_name)
48            .ok_or_else(|| HighlightError::SyntaxNotFound(theme_name.to_string()))?
49            .clone();
50
51        Ok(Self { syntax_set, theme })
52    }
53
54    /// List available built-in themes
55    #[allow(dead_code)]
56    pub fn list_builtin_themes() -> Vec<&'static str> {
57        vec![
58            "base16-ocean.dark",
59            "base16-ocean.light",
60            "Solarized (dark)",
61            "Solarized (light)",
62            "InspiredGitHub",
63            "Monokai Extended",
64            "Nord",
65        ]
66    }
67
68    /// Detect and return syntax reference for a language identifier
69    pub fn detect_language(&self, lang: &str) -> SyntaxReference {
70        let lang_lower = lang.to_lowercase();
71
72        let token = match lang_lower.as_str() {
73            "rust" | "rs" => "Rust",
74            "python" | "py" => "Python",
75            "typescript" | "ts" => "JavaScript", // TS highlighted as JS (syntect default)
76            "javascript" | "js" => "JavaScript",
77            "javascript react" | "jsx" => "JavaScript (Babel)",
78            "go" | "golang" => "Go",
79            "java" => "Java",
80            "c" => "C",
81            "cpp" | "c++" | "cxx" => "C++",
82            "csharp" | "c#" | "cs" => "C#",
83            "ruby" | "rb" => "Ruby",
84            "php" => "PHP",
85            "html" | "htm" => "HTML",
86            "xml" => "XML",
87            "css" => "CSS",
88            "scss" | "sass" => "SCSS",
89            "sql" => "SQL",
90            "bash" | "sh" | "shell" => "Bash",
91            "zsh" => "Shell Script (zsh)",
92            "fish" => "Fish",
93            "json" => "JSON",
94            "yaml" | "yml" => "YAML",
95            "toml" => "TOML",
96            "ini" => "INI",
97            "markdown" | "md" => "Markdown",
98
99            "lua" => "Lua",
100            "r" => "R",
101            "scala" => "Scala",
102            "kotlin" | "kt" => "Kotlin",
103            "swift" => "Swift",
104            "dart" => "Dart",
105            "elixir" | "ex" => "Elixir",
106            "erlang" | "erl" => "Erlang",
107            "haskell" | "hs" => "Haskell",
108            "clojure" | "clj" => "Clojure",
109            "fsharp" | "fs" => "F#",
110            "ocaml" | "ml" => "OCaml",
111            "elm" => "Elm",
112            "purescript" | "purs" => "PureScript",
113            "reason" | "re" => "Reason",
114            "nix" => "Nix",
115            "dockerfile" => "Dockerfile",
116            "makefile" => "Makefile",
117            "cmake" => "CMake",
118            "gradle" => "Gradle",
119            "groovy" => "Groovy",
120            "powershell" | "ps1" => "PowerShell",
121            "vue" => "Vue",
122            "svelte" => "Svelte",
123            "solidity" | "sol" => "Solidity",
124            "asm" | "assembly" | "nasm" => "Assembly",
125            "verilog" => "Verilog",
126            "vhdl" => "VHDL",
127            "matlab" => "MATLAB",
128            "julia" => "Julia",
129            "nim" => "Nim",
130            "racket" => "Racket",
131            "scheme" => "Scheme",
132            "lisp" | "cl" => "Lisp",
133            "commonlisp" => "Common Lisp",
134            "cobol" => "COBOL",
135            "fortran" => "Fortran",
136            "pascal" => "Pascal",
137            "ada" => "Ada",
138            "crystal" => "Crystal",
139            "wren" => "Wren",
140            "zig" => "Zig",
141            "v" => "V",
142            "odin" => "Odin",
143            "gleam" => "Gleam",
144            _ => {
145                // Try to find by token directly
146                if let Some(syntax) = self.syntax_set.find_syntax_by_token(lang) {
147                    return syntax.clone();
148                }
149                // Fallback to plain text
150                return self.syntax_set.find_syntax_plain_text().clone();
151            }
152        };
153
154        self.syntax_set
155            .find_syntax_by_token(token)
156            .unwrap_or_else(|| self.syntax_set.find_syntax_plain_text())
157            .clone()
158    }
159    /// Highlight code for terminal ANSI output (used in CLI/REPL)
160    pub fn highlight_to_ansi(&self, code: &str, lang: &str) -> Result<String, HighlightError> {
161        use syntect::util::as_24_bit_terminal_escaped;
162
163        let syntax = self.detect_language(lang);
164        let mut highlighter = HighlightLines::new(&syntax, &self.theme);
165
166        let mut output = String::new();
167        for line in LinesWithEndings::from(code) {
168            let ranges = highlighter
169                .highlight_line(line, &self.syntax_set)
170                .map_err(|e| HighlightError::Parse(e.to_string()))?;
171            let escaped = as_24_bit_terminal_escaped(&ranges[..], false);
172            output.push_str(&escaped);
173        }
174
175        Ok(output)
176    }
177
178    /// Get theme name (for debugging/display)
179    #[allow(dead_code)]
180    pub fn theme_name(&self) -> &str {
181        // Theme doesn't store the name, so we return a default
182        "base16-ocean.dark"
183    }
184}
185
186impl Default for SyntaxHighlighter {
187    fn default() -> Self {
188        Self::new().expect("Failed to initialize syntax highlighter")
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195
196    #[test]
197    fn test_highlighter_new() {
198        let highlighter = SyntaxHighlighter::new();
199        assert!(highlighter.is_ok());
200    }
201
202    #[test]
203    fn test_highlighter_default() {
204        let highlighter = SyntaxHighlighter::default();
205        assert_eq!(highlighter.theme_name(), "base16-ocean.dark");
206    }
207
208    #[test]
209    fn test_highlight_rust_code() {
210        let highlighter = SyntaxHighlighter::new().unwrap();
211        let code = r#"fn main() {
212    println!("Hello, world!");
213}"#;
214
215        let result = highlighter.highlight_to_ansi(code, "rust");
216        assert!(result.is_ok());
217        let highlighted = result.unwrap();
218        assert!(highlighted.contains("\x1b[")); // ANSI codes present
219    }
220
221    #[test]
222    fn test_highlight_python_code() {
223        let highlighter = SyntaxHighlighter::new().unwrap();
224        let code = "def hello():\n    print('world')";
225
226        let result = highlighter.highlight_to_ansi(code, "python");
227        assert!(result.is_ok());
228        assert!(result.unwrap().contains("\x1b[")); // ANSI codes present
229    }
230
231    #[test]
232    fn test_highlight_javascript_code() {
233        let highlighter = SyntaxHighlighter::new().unwrap();
234        let code = "const x = 42;\nconsole.log(x);";
235
236        let result = highlighter.highlight_to_ansi(code, "javascript");
237        assert!(result.is_ok());
238    }
239
240    #[test]
241    fn test_highlight_json() {
242        let highlighter = SyntaxHighlighter::new().unwrap();
243        let code = r#"{"key": "value"}"#;
244
245        let result = highlighter.highlight_to_ansi(code, "json");
246        assert!(result.is_ok());
247    }
248
249    #[test]
250    fn test_detect_languages() {
251        let highlighter = SyntaxHighlighter::new().unwrap();
252
253        let rust_syntax = highlighter.detect_language("rust");
254        assert_eq!(rust_syntax.name, "Rust");
255
256        let py_syntax = highlighter.detect_language("python");
257        assert_eq!(py_syntax.name, "Python");
258
259        // TypeScript uses JavaScript syntax (syntect default doesn't have separate TS)
260        let ts_syntax = highlighter.detect_language("typescript");
261        assert_eq!(ts_syntax.name, "JavaScript");
262
263        let js_syntax = highlighter.detect_language("javascript");
264        assert_eq!(js_syntax.name, "JavaScript");
265
266        let go_syntax = highlighter.detect_language("go");
267        assert_eq!(go_syntax.name, "Go");
268
269        let yaml_syntax = highlighter.detect_language("yaml");
270        assert_eq!(yaml_syntax.name, "YAML");
271
272        let json_syntax = highlighter.detect_language("json");
273        assert_eq!(json_syntax.name, "JSON");
274
275        let plain = highlighter.detect_language("unknown");
276        assert_eq!(plain.name, "Plain Text");
277    }
278
279    #[test]
280    fn test_fallback_on_empty_code() {
281        let highlighter = SyntaxHighlighter::new().unwrap();
282        let empty_code = "";
283
284        let result = highlighter.highlight_to_ansi(empty_code, "rust");
285        assert!(result.is_ok());
286    }
287
288    #[test]
289    fn test_list_themes() {
290        let themes = SyntaxHighlighter::list_builtin_themes();
291        assert!(!themes.is_empty());
292        assert!(themes.contains(&"base16-ocean.dark"));
293        assert!(themes.contains(&"Solarized (dark)"));
294        assert!(themes.contains(&"Nord"));
295    }
296
297    #[test]
298    fn test_with_theme_valid() {
299        let highlighter = SyntaxHighlighter::with_theme("Solarized (dark)");
300        assert!(highlighter.is_ok());
301    }
302
303    #[test]
304    fn test_with_theme_invalid() {
305        let highlighter = SyntaxHighlighter::with_theme("invalid-theme-name");
306        assert!(highlighter.is_err());
307    }
308
309    #[test]
310    fn test_language_aliases() {
311        let highlighter = SyntaxHighlighter::new().unwrap();
312
313        // Test various aliases
314        assert_eq!(highlighter.detect_language("rs").name, "Rust");
315        assert_eq!(highlighter.detect_language("py").name, "Python");
316        assert_eq!(highlighter.detect_language("js").name, "JavaScript");
317        assert_eq!(highlighter.detect_language("ts").name, "JavaScript"); // TS -> JS
318        assert_eq!(highlighter.detect_language("yml").name, "YAML");
319        assert_eq!(
320            highlighter.detect_language("sh").name,
321            "Bourne Again Shell (bash)"
322        );
323    }
324
325    #[test]
326    fn test_multiline_code() {
327        let highlighter = SyntaxHighlighter::new().unwrap();
328        let code = r#"fn main() {
329    // This is a comment
330    let x = 42;
331    println!("{}", x);
332}"#;
333
334        let result = highlighter.highlight_to_ansi(code, "rust");
335        assert!(result.is_ok());
336        let highlighted = result.unwrap();
337        assert!(highlighted.contains("\x1b[")); // ANSI codes present
338    }
339
340    #[test]
341    fn test_special_characters_in_code() {
342        let highlighter = SyntaxHighlighter::new().unwrap();
343        let code = r#"let s = "特殊字符 \n\t\r"; println!("{}", s);"#;
344
345        let result = highlighter.highlight_to_ansi(code, "rust");
346        assert!(result.is_ok());
347    }
348}