Skip to main content

vtcode_tui/ui/
syntax_highlight.rs

1//! Syntax Highlighting Engine
2//!
3//! Global syntax highlighting using `syntect` with TextMate themes.
4//! Follows the architecture from OpenAI Codex PRs #11447 and #12581.
5//!
6//! # Architecture
7//!
8//! - **SyntaxSet**: Process-global singleton (~250 grammars, loaded once)
9//! - **ThemeSet**: Process-global singleton loaded once
10//! - **Highlighting**: Guardrails skip large inputs (>512KB or >10K lines)
11//!
12//! # Usage
13//!
14//! ```rust
15//! use crate::ui::syntax_highlight::{
16//!     highlight_code_to_segments, get_active_syntax_theme
17//! };
18//! use crate::ui::theme::active_theme_id;
19//!
20//! // Auto-resolve syntax theme from current UI theme
21//! let syntax_theme = get_active_syntax_theme();
22//!
23//! // Highlight code with proper theme
24//! let segments = highlight_code_to_segments(code, Some("rust"), syntax_theme);
25//! ```
26//!
27//! # Performance
28//!
29//! - Single SyntaxSet load (~1MB, ~50ms)
30//! - Single ThemeSet load shared by all highlighters
31//! - Input guardrails prevent highlighting huge files
32//! - Parser state preserved across multiline constructs
33
34use crate::ui::theme::get_syntax_theme_for_ui_theme;
35use anstyle::Style as AnstyleStyle;
36use anstyle_syntect::to_anstyle;
37use once_cell::sync::Lazy;
38use syntect::highlighting::{Theme, ThemeSet};
39use syntect::parsing::{SyntaxReference, SyntaxSet};
40use syntect::util::LinesWithEndings;
41use tracing::warn;
42
43/// Default syntax highlighting theme
44const DEFAULT_THEME_NAME: &str = "base16-ocean.dark";
45
46/// Input size guardrail - skip highlighting for files > 512 KB
47const MAX_INPUT_SIZE_BYTES: usize = 512 * 1024;
48
49/// Input line guardrail - skip highlighting for files > 10K lines
50const MAX_INPUT_LINES: usize = 10_000;
51
52/// Global SyntaxSet singleton (~250 grammars)
53static SHARED_SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
54
55/// Global ThemeSet singleton.
56static SHARED_THEME_SET: Lazy<ThemeSet> = Lazy::new(|| match ThemeSet::load_defaults() {
57    defaults if !defaults.themes.is_empty() => defaults,
58    _ => {
59        warn!("Failed to load default syntax highlighting themes");
60        ThemeSet {
61            themes: Default::default(),
62        }
63    }
64});
65
66/// Get the global SyntaxSet reference
67#[inline]
68pub fn syntax_set() -> &'static SyntaxSet {
69    &SHARED_SYNTAX_SET
70}
71
72/// Find syntax by language token (e.g., "rust", "python")
73#[inline]
74pub fn find_syntax_by_token(token: &str) -> &'static SyntaxReference {
75    SHARED_SYNTAX_SET
76        .find_syntax_by_token(token)
77        .unwrap_or_else(|| SHARED_SYNTAX_SET.find_syntax_plain_text())
78}
79
80/// Find syntax by exact name
81#[inline]
82pub fn find_syntax_by_name(name: &str) -> Option<&'static SyntaxReference> {
83    SHARED_SYNTAX_SET.find_syntax_by_name(name)
84}
85
86/// Find syntax by file extension
87#[inline]
88pub fn find_syntax_by_extension(ext: &str) -> Option<&'static SyntaxReference> {
89    SHARED_SYNTAX_SET.find_syntax_by_extension(ext)
90}
91
92/// Get plain text syntax fallback
93#[inline]
94pub fn find_syntax_plain_text() -> &'static SyntaxReference {
95    SHARED_SYNTAX_SET.find_syntax_plain_text()
96}
97
98fn fallback_theme() -> Theme {
99    SHARED_THEME_SET
100        .themes
101        .values()
102        .next()
103        .cloned()
104        .unwrap_or_default()
105}
106
107fn plain_text_line_segments(code: &str) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
108    let mut result = Vec::new();
109    let mut ends_with_newline = false;
110    for line in LinesWithEndings::from(code) {
111        ends_with_newline = line.ends_with('\n');
112        let trimmed = line.trim_end_matches('\n');
113        result.push(vec![(
114            syntect::highlighting::Style::default(),
115            trimmed.to_string(),
116        )]);
117    }
118
119    if ends_with_newline {
120        result.push(Vec::new());
121    }
122
123    result
124}
125
126/// Load a theme from the process-global theme set.
127///
128/// # Arguments
129/// * `theme_name` - Theme identifier (TextMate theme name)
130/// * `cache` - Ignored. Kept for API compatibility.
131///
132/// # Returns
133/// Cloned theme instance (safe for multi-threaded use)
134pub fn load_theme(theme_name: &str, _cache: bool) -> Theme {
135    if let Some(theme) = SHARED_THEME_SET.themes.get(theme_name) {
136        theme.clone()
137    } else {
138        warn!(
139            theme = theme_name,
140            "Unknown syntax highlighting theme, falling back to default"
141        );
142        fallback_theme()
143    }
144}
145
146/// Get the default syntax theme name
147#[inline]
148pub fn default_theme_name() -> String {
149    DEFAULT_THEME_NAME.to_string()
150}
151
152/// Get all available theme names
153pub fn available_themes() -> Vec<String> {
154    SHARED_THEME_SET.themes.keys().cloned().collect()
155}
156
157/// Check if input should be highlighted (guardrails)
158#[inline]
159pub fn should_highlight(code: &str) -> bool {
160    code.len() <= MAX_INPUT_SIZE_BYTES && code.lines().count() <= MAX_INPUT_LINES
161}
162
163/// Get the recommended syntax theme for the current UI theme
164///
165/// This ensures syntax highlighting colors complement the UI theme background.
166/// Based on OpenAI Codex PRs #11447 and #12581.
167#[inline]
168pub fn get_active_syntax_theme() -> &'static str {
169    get_syntax_theme_for_ui_theme(&crate::ui::theme::active_theme_id())
170}
171
172/// Get the recommended syntax theme for a specific UI theme
173#[inline]
174pub fn get_syntax_theme(theme: &str) -> &'static str {
175    get_syntax_theme_for_ui_theme(theme)
176}
177
178#[inline]
179fn select_syntax(language: Option<&str>) -> &'static SyntaxReference {
180    language
181        .map(find_syntax_by_token)
182        .unwrap_or_else(find_syntax_plain_text)
183}
184
185/// Highlight code and return styled segments per line.
186///
187/// Uses `LinesWithEndings` semantics by preserving an empty trailing line
188/// when the input ends with `\n`.
189pub fn highlight_code_to_line_segments(
190    code: &str,
191    language: Option<&str>,
192    theme_name: &str,
193) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
194    if !should_highlight(code) {
195        return plain_text_line_segments(code);
196    }
197
198    let syntax = select_syntax(language);
199    let theme = load_theme(theme_name, true);
200    let mut highlighter = syntect::easy::HighlightLines::new(syntax, &theme);
201    let mut result = Vec::new();
202    let mut ends_with_newline = false;
203
204    for line in LinesWithEndings::from(code) {
205        ends_with_newline = line.ends_with('\n');
206        let trimmed = line.trim_end_matches('\n');
207        let segments = match highlighter.highlight_line(trimmed, syntax_set()) {
208            Ok(ranges) => ranges
209                .into_iter()
210                .map(|(style, text)| (style, text.to_string()))
211                .collect(),
212            Err(_) => vec![(syntect::highlighting::Style::default(), trimmed.to_string())],
213        };
214        result.push(segments);
215    }
216
217    if ends_with_newline {
218        result.push(Vec::new());
219    }
220
221    result
222}
223
224/// Highlight code and convert to `anstyle` segments with optional bg stripping.
225pub fn highlight_code_to_anstyle_line_segments(
226    code: &str,
227    language: Option<&str>,
228    theme_name: &str,
229    strip_background: bool,
230) -> Vec<Vec<(AnstyleStyle, String)>> {
231    highlight_code_to_line_segments(code, language, theme_name)
232        .into_iter()
233        .map(|ranges| {
234            ranges
235                .into_iter()
236                .filter(|(_, text)| !text.is_empty())
237                .map(|(style, text)| {
238                    let mut anstyle = to_anstyle(style);
239                    if strip_background {
240                        anstyle = anstyle.bg_color(None);
241                    }
242                    (anstyle, text)
243                })
244                .collect()
245        })
246        .collect()
247}
248
249/// Highlight one line and convert to `anstyle` segments with optional bg stripping.
250pub fn highlight_line_to_anstyle_segments(
251    line: &str,
252    language: Option<&str>,
253    theme_name: &str,
254    strip_background: bool,
255) -> Option<Vec<(AnstyleStyle, String)>> {
256    highlight_code_to_anstyle_line_segments(line, language, theme_name, strip_background)
257        .into_iter()
258        .next()
259}
260
261/// Highlight code and return styled segments
262///
263/// # Arguments
264/// * `code` - Source code to highlight
265/// * `language` - Optional language hint (auto-detected if None)
266/// * `theme_name` - Syntax theme name (use `get_active_syntax_theme()` for UI theme sync)
267///
268/// # Returns
269/// Vector of (Style, String) tuples for rendering
270///
271/// # Performance
272/// - Returns None early if input exceeds guardrails
273/// - Uses cached theme when available
274pub fn highlight_code_to_segments(
275    code: &str,
276    language: Option<&str>,
277    theme_name: &str,
278) -> Vec<(syntect::highlighting::Style, String)> {
279    highlight_code_to_line_segments(code, language, theme_name)
280        .into_iter()
281        .flatten()
282        .collect()
283}
284
285/// Highlight a single line (for diff rendering)
286///
287/// Preserves parser state for multiline constructs
288pub fn highlight_line_for_diff(
289    line: &str,
290    language: Option<&str>,
291    theme_name: &str,
292) -> Option<Vec<(syntect::highlighting::Style, String)>> {
293    highlight_code_to_line_segments(line, language, theme_name)
294        .into_iter()
295        .next()
296}
297
298/// Convert code to ANSI escape sequences
299pub fn highlight_code_to_ansi(code: &str, language: Option<&str>, theme_name: &str) -> String {
300    let segments = highlight_code_to_segments(code, language, theme_name);
301    let mut output = String::with_capacity(code.len() + segments.len() * 10);
302
303    for (style, text) in segments {
304        let ansi_style = to_anstyle(style);
305        output.push_str(&ansi_style.to_string());
306        output.push_str(&text);
307        output.push_str("\x1b[0m"); // Reset
308    }
309
310    output
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    #[test]
318    fn test_syntax_set_loaded() {
319        let ss = syntax_set();
320        assert!(!ss.syntaxes().is_empty());
321    }
322
323    #[test]
324    fn test_find_syntax_by_token() {
325        let rust = find_syntax_by_token("rust");
326        assert!(rust.name.contains("Rust"));
327    }
328
329    #[test]
330    fn test_should_highlight_guardrails() {
331        assert!(should_highlight("fn main() {}"));
332        assert!(!should_highlight(&"x".repeat(MAX_INPUT_SIZE_BYTES + 1)));
333    }
334
335    #[test]
336    fn test_get_active_syntax_theme() {
337        let theme = get_active_syntax_theme();
338        assert!(!theme.is_empty());
339    }
340
341    #[test]
342    fn test_highlight_code_to_segments() {
343        let segments =
344            highlight_code_to_segments("fn main() {}", Some("rust"), "base16-ocean.dark");
345        assert!(!segments.is_empty());
346    }
347
348    #[test]
349    fn test_theme_loading_stable() {
350        let theme1 = load_theme("base16-ocean.dark", true);
351        let theme2 = load_theme("base16-ocean.dark", true);
352        assert_eq!(theme1.name, theme2.name);
353    }
354}