Skip to main content

vtcode_tui/ui/
syntax_highlight.rs

1//! Syntax Highlighting Engine
2//!
3//! Global syntax highlighting using `syntect` with TextMate themes.
4//! Follows the architecture from OpenAI Codex PRs #11447 and #12581.
5//!
6//! # Architecture
7//!
8//! - **SyntaxSet**: Process-global singleton (~250 grammars, loaded once)
9//! - **ThemeSet**: Process-global singleton loaded once
10//! - **Highlighting**: Guardrails skip large inputs (>512KB or >10K lines)
11//!
12//! # Usage
13//!
14//! ```rust
15//! use vtcode_tui::ui::syntax_highlight::{
16//!     get_active_syntax_theme, highlight_code_to_segments,
17//! };
18//!
19//! // Auto-resolve syntax theme from current UI theme
20//! let syntax_theme = get_active_syntax_theme();
21//!
22//! // Highlight code with proper theme
23//! let code = "fn main() { println!(\"hi\"); }";
24//! let segments = highlight_code_to_segments(code, Some("rust"), syntax_theme);
25//! assert!(!segments.is_empty());
26//! ```
27//!
28//! # Performance
29//!
30//! - Single SyntaxSet load (~1MB, ~50ms)
31//! - Single ThemeSet load shared by all highlighters
32//! - Input guardrails prevent highlighting huge files
33//! - Parser state preserved across multiline constructs
34
35use crate::ui::theme::get_syntax_theme_for_ui_theme;
36use anstyle::{Ansi256Color, AnsiColor, Effects, RgbColor, Style as AnstyleStyle};
37use once_cell::sync::Lazy;
38use syntect::highlighting::{FontStyle, Highlighter, Theme, ThemeSet};
39use syntect::parsing::{Scope, SyntaxReference, SyntaxSet};
40use syntect::util::LinesWithEndings;
41use tracing::warn;
42use vtcode_commons::ansi_codes::RESET;
43
44/// Default syntax highlighting theme
45const DEFAULT_THEME_NAME: &str = "base16-ocean.dark";
46
47/// Input size guardrail - skip highlighting for files > 512 KB
48const MAX_INPUT_SIZE_BYTES: usize = 512 * 1024;
49
50/// Input line guardrail - skip highlighting for files > 10K lines
51const MAX_INPUT_LINES: usize = 10_000;
52
53// Syntect/bat encode ANSI palette semantics in alpha:
54// `a=0` => ANSI palette index stored in `r`, `a=1` => terminal default.
55const ANSI_ALPHA_INDEX: u8 = 0x00;
56const ANSI_ALPHA_DEFAULT: u8 = 0x01;
57const OPAQUE_ALPHA: u8 = u8::MAX;
58
59/// Global SyntaxSet singleton (~250 grammars)
60static SHARED_SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
61
62/// Global ThemeSet singleton.
63static SHARED_THEME_SET: Lazy<ThemeSet> = Lazy::new(|| match ThemeSet::load_defaults() {
64    defaults if !defaults.themes.is_empty() => defaults,
65    _ => {
66        warn!("Failed to load default syntax highlighting themes");
67        ThemeSet {
68            themes: Default::default(),
69        }
70    }
71});
72
73/// Get the global SyntaxSet reference
74#[inline]
75pub fn syntax_set() -> &'static SyntaxSet {
76    &SHARED_SYNTAX_SET
77}
78
79/// Find syntax by language token (e.g., "rust", "python")
80#[inline]
81pub fn find_syntax_by_token(token: &str) -> &'static SyntaxReference {
82    SHARED_SYNTAX_SET
83        .find_syntax_by_token(token)
84        .unwrap_or_else(|| SHARED_SYNTAX_SET.find_syntax_plain_text())
85}
86
87/// Find syntax by exact name
88#[inline]
89pub fn find_syntax_by_name(name: &str) -> Option<&'static SyntaxReference> {
90    SHARED_SYNTAX_SET.find_syntax_by_name(name)
91}
92
93/// Find syntax by file extension
94#[inline]
95pub fn find_syntax_by_extension(ext: &str) -> Option<&'static SyntaxReference> {
96    SHARED_SYNTAX_SET.find_syntax_by_extension(ext)
97}
98
99/// Get plain text syntax fallback
100#[inline]
101pub fn find_syntax_plain_text() -> &'static SyntaxReference {
102    SHARED_SYNTAX_SET.find_syntax_plain_text()
103}
104
105fn fallback_theme() -> Theme {
106    SHARED_THEME_SET
107        .themes
108        .values()
109        .next()
110        .cloned()
111        .unwrap_or_default()
112}
113
114fn plain_text_line_segments(code: &str) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
115    let mut result = Vec::new();
116    let mut ends_with_newline = false;
117    for line in LinesWithEndings::from(code) {
118        ends_with_newline = line.ends_with('\n');
119        let trimmed = line.trim_end_matches('\n');
120        result.push(vec![(
121            syntect::highlighting::Style::default(),
122            trimmed.to_string(),
123        )]);
124    }
125
126    if ends_with_newline {
127        result.push(Vec::new());
128    }
129
130    result
131}
132
133/// Load a theme from the process-global theme set.
134///
135/// # Arguments
136/// * `theme_name` - Theme identifier (TextMate theme name)
137/// * `cache` - Ignored. Kept for API compatibility.
138///
139/// # Returns
140/// Cloned theme instance (safe for multi-threaded use)
141pub fn load_theme(theme_name: &str, _cache: bool) -> Theme {
142    if let Some(theme) = SHARED_THEME_SET.themes.get(theme_name) {
143        theme.clone()
144    } else {
145        warn!(
146            theme = theme_name,
147            "Unknown syntax highlighting theme, falling back to default"
148        );
149        fallback_theme()
150    }
151}
152
153/// Get the default syntax theme name
154#[inline]
155pub fn default_theme_name() -> String {
156    DEFAULT_THEME_NAME.to_string()
157}
158
159/// Get all available theme names
160pub fn available_themes() -> Vec<String> {
161    SHARED_THEME_SET.themes.keys().cloned().collect()
162}
163
164/// Check if input should be highlighted (guardrails)
165#[inline]
166pub fn should_highlight(code: &str) -> bool {
167    code.len() <= MAX_INPUT_SIZE_BYTES && code.lines().count() <= MAX_INPUT_LINES
168}
169
170/// Get the recommended syntax theme for the current UI theme
171///
172/// This ensures syntax highlighting colors complement the UI theme background.
173/// Based on OpenAI Codex PRs #11447 and #12581.
174#[inline]
175pub fn get_active_syntax_theme() -> &'static str {
176    get_syntax_theme_for_ui_theme(&crate::ui::theme::active_theme_id())
177}
178
179/// Get the recommended syntax theme for a specific UI theme
180#[inline]
181pub fn get_syntax_theme(theme: &str) -> &'static str {
182    get_syntax_theme_for_ui_theme(theme)
183}
184
185/// Raw RGB diff backgrounds extracted from syntax theme scopes.
186///
187/// Prefers `markup.inserted` / `markup.deleted` and falls back to
188/// `diff.inserted` / `diff.deleted`.
189#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
190pub struct DiffScopeBackgroundRgbs {
191    pub inserted: Option<(u8, u8, u8)>,
192    pub deleted: Option<(u8, u8, u8)>,
193}
194
195/// Resolve diff-scope background colors from the currently active syntax theme.
196pub fn diff_scope_background_rgbs() -> DiffScopeBackgroundRgbs {
197    let theme_name = get_active_syntax_theme();
198    let theme = load_theme(theme_name, true);
199    diff_scope_background_rgbs_for_theme(&theme)
200}
201
202fn diff_scope_background_rgbs_for_theme(theme: &Theme) -> DiffScopeBackgroundRgbs {
203    let highlighter = Highlighter::new(theme);
204    let inserted = scope_background_rgb(&highlighter, "markup.inserted")
205        .or_else(|| scope_background_rgb(&highlighter, "diff.inserted"));
206    let deleted = scope_background_rgb(&highlighter, "markup.deleted")
207        .or_else(|| scope_background_rgb(&highlighter, "diff.deleted"));
208    DiffScopeBackgroundRgbs { inserted, deleted }
209}
210
211fn scope_background_rgb(highlighter: &Highlighter<'_>, scope_name: &str) -> Option<(u8, u8, u8)> {
212    let scope = Scope::new(scope_name).ok()?;
213    let background = highlighter.style_mod_for_stack(&[scope]).background?;
214    Some((background.r, background.g, background.b))
215}
216
217fn ansi_palette_color(index: u8) -> anstyle::Color {
218    match index {
219        0x00 => AnsiColor::Black.into(),
220        0x01 => AnsiColor::Red.into(),
221        0x02 => AnsiColor::Green.into(),
222        0x03 => AnsiColor::Yellow.into(),
223        0x04 => AnsiColor::Blue.into(),
224        0x05 => AnsiColor::Magenta.into(),
225        0x06 => AnsiColor::Cyan.into(),
226        0x07 => AnsiColor::White.into(),
227        index => Ansi256Color(index).into(),
228    }
229}
230
231fn convert_syntect_color(color: syntect::highlighting::Color) -> Option<anstyle::Color> {
232    match color.a {
233        // Bat-compatible encoding for ANSI-family themes.
234        ANSI_ALPHA_INDEX => Some(ansi_palette_color(color.r)),
235        // Preserve terminal defaults rather than forcing black.
236        ANSI_ALPHA_DEFAULT => None,
237        // Standard syntect themes use opaque RGB values.
238        OPAQUE_ALPHA => Some(RgbColor(color.r, color.g, color.b).into()),
239        // Some theme dumps use other alpha values; keep them readable as RGB.
240        _ => Some(RgbColor(color.r, color.g, color.b).into()),
241    }
242}
243
244fn convert_syntect_style(style: syntect::highlighting::Style) -> AnstyleStyle {
245    let mut effects = Effects::new();
246    if style.font_style.contains(FontStyle::BOLD) {
247        effects |= Effects::BOLD;
248    }
249    if style.font_style.contains(FontStyle::ITALIC) {
250        effects |= Effects::ITALIC;
251    }
252    if style.font_style.contains(FontStyle::UNDERLINE) {
253        effects |= Effects::UNDERLINE;
254    }
255
256    AnstyleStyle::new()
257        .fg_color(convert_syntect_color(style.foreground))
258        .bg_color(convert_syntect_color(style.background))
259        .effects(effects)
260}
261
262#[inline]
263fn select_syntax(language: Option<&str>) -> &'static SyntaxReference {
264    language
265        .map(find_syntax_by_token)
266        .unwrap_or_else(find_syntax_plain_text)
267}
268
269/// Highlight code and return styled segments per line.
270///
271/// Uses `LinesWithEndings` semantics by preserving an empty trailing line
272/// when the input ends with `\n`.
273pub fn highlight_code_to_line_segments(
274    code: &str,
275    language: Option<&str>,
276    theme_name: &str,
277) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
278    let theme = load_theme(theme_name, true);
279    highlight_code_to_line_segments_with_theme(code, language, &theme)
280}
281
282fn highlight_code_to_line_segments_with_theme(
283    code: &str,
284    language: Option<&str>,
285    theme: &Theme,
286) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
287    if !should_highlight(code) {
288        return plain_text_line_segments(code);
289    }
290
291    let syntax = select_syntax(language);
292    let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
293    let mut result = Vec::new();
294    let mut ends_with_newline = false;
295
296    for line in LinesWithEndings::from(code) {
297        ends_with_newline = line.ends_with('\n');
298        let trimmed = line.trim_end_matches('\n');
299        let segments = match highlighter.highlight_line(trimmed, syntax_set()) {
300            Ok(ranges) => ranges
301                .into_iter()
302                .map(|(style, text)| (style, text.to_string()))
303                .collect(),
304            Err(_) => vec![(syntect::highlighting::Style::default(), trimmed.to_string())],
305        };
306        result.push(segments);
307    }
308
309    if ends_with_newline {
310        result.push(Vec::new());
311    }
312
313    result
314}
315
316fn highlight_code_to_anstyle_line_segments_with_theme(
317    code: &str,
318    language: Option<&str>,
319    theme: &Theme,
320    strip_background: bool,
321) -> Vec<Vec<(AnstyleStyle, String)>> {
322    highlight_code_to_line_segments_with_theme(code, language, theme)
323        .into_iter()
324        .map(|ranges| {
325            ranges
326                .into_iter()
327                .filter(|(_, text)| !text.is_empty())
328                .map(|(style, text)| {
329                    let mut anstyle = convert_syntect_style(style);
330                    if strip_background {
331                        anstyle = anstyle.bg_color(None);
332                    }
333                    (anstyle, text)
334                })
335                .collect()
336        })
337        .collect()
338}
339
340/// Highlight code and convert to `anstyle` segments with optional bg stripping.
341pub fn highlight_code_to_anstyle_line_segments(
342    code: &str,
343    language: Option<&str>,
344    theme_name: &str,
345    strip_background: bool,
346) -> Vec<Vec<(AnstyleStyle, String)>> {
347    let theme = load_theme(theme_name, true);
348    highlight_code_to_anstyle_line_segments_with_theme(code, language, &theme, strip_background)
349}
350
351/// Highlight one line and convert to `anstyle` segments with optional bg stripping.
352pub fn highlight_line_to_anstyle_segments(
353    line: &str,
354    language: Option<&str>,
355    theme_name: &str,
356    strip_background: bool,
357) -> Option<Vec<(AnstyleStyle, String)>> {
358    highlight_code_to_anstyle_line_segments(line, language, theme_name, strip_background)
359        .into_iter()
360        .next()
361}
362
363/// Highlight code and return styled segments
364///
365/// # Arguments
366/// * `code` - Source code to highlight
367/// * `language` - Optional language hint (auto-detected if None)
368/// * `theme_name` - Syntax theme name (use `get_active_syntax_theme()` for UI theme sync)
369///
370/// # Returns
371/// Vector of (Style, String) tuples for rendering
372///
373/// # Performance
374/// - Returns None early if input exceeds guardrails
375/// - Uses cached theme when available
376pub fn highlight_code_to_segments(
377    code: &str,
378    language: Option<&str>,
379    theme_name: &str,
380) -> Vec<(syntect::highlighting::Style, String)> {
381    highlight_code_to_line_segments(code, language, theme_name)
382        .into_iter()
383        .flatten()
384        .collect()
385}
386
387/// Highlight a single line (for diff rendering)
388///
389/// Preserves parser state for multiline constructs
390pub fn highlight_line_for_diff(
391    line: &str,
392    language: Option<&str>,
393    theme_name: &str,
394) -> Option<Vec<(syntect::highlighting::Style, String)>> {
395    highlight_code_to_line_segments(line, language, theme_name)
396        .into_iter()
397        .next()
398}
399
400/// Convert code to ANSI escape sequences
401pub fn highlight_code_to_ansi(code: &str, language: Option<&str>, theme_name: &str) -> String {
402    let segments = highlight_code_to_anstyle_line_segments(code, language, theme_name, false);
403    let mut output = String::with_capacity(code.len() + segments.len() * 10);
404
405    for (ansi_style, text) in segments.into_iter().flatten() {
406        output.push_str(&ansi_style.to_string());
407        output.push_str(&text);
408        output.push_str(RESET);
409    }
410
411    output
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use std::str::FromStr;
418    use syntect::highlighting::Color as SyntectColor;
419    use syntect::highlighting::ScopeSelectors;
420    use syntect::highlighting::StyleModifier;
421    use syntect::highlighting::ThemeItem;
422    use syntect::highlighting::ThemeSettings;
423
424    fn theme_item(scope: &str, background: Option<(u8, u8, u8)>) -> ThemeItem {
425        ThemeItem {
426            scope: ScopeSelectors::from_str(scope).expect("scope selector should parse"),
427            style: StyleModifier {
428                background: background.map(|(r, g, b)| SyntectColor { r, g, b, a: 255 }),
429                ..StyleModifier::default()
430            },
431        }
432    }
433
434    #[test]
435    fn test_syntax_set_loaded() {
436        let ss = syntax_set();
437        assert!(!ss.syntaxes().is_empty());
438    }
439
440    #[test]
441    fn test_find_syntax_by_token() {
442        let rust = find_syntax_by_token("rust");
443        assert!(rust.name.contains("Rust"));
444    }
445
446    #[test]
447    fn test_should_highlight_guardrails() {
448        assert!(should_highlight("fn main() {}"));
449        assert!(!should_highlight(&"x".repeat(MAX_INPUT_SIZE_BYTES + 1)));
450    }
451
452    #[test]
453    fn test_get_active_syntax_theme() {
454        let theme = get_active_syntax_theme();
455        assert!(!theme.is_empty());
456    }
457
458    #[test]
459    fn test_highlight_code_to_segments() {
460        let segments =
461            highlight_code_to_segments("fn main() {}", Some("rust"), "base16-ocean.dark");
462        assert!(!segments.is_empty());
463    }
464
465    #[test]
466    fn test_theme_loading_stable() {
467        let theme1 = load_theme("base16-ocean.dark", true);
468        let theme2 = load_theme("base16-ocean.dark", true);
469        assert_eq!(theme1.name, theme2.name);
470    }
471
472    #[test]
473    fn convert_syntect_style_uses_named_ansi_for_low_palette_indexes() {
474        let style = convert_syntect_style(syntect::highlighting::Style {
475            foreground: SyntectColor {
476                r: 0x02,
477                g: 0,
478                b: 0,
479                a: ANSI_ALPHA_INDEX,
480            },
481            background: SyntectColor {
482                r: 0,
483                g: 0,
484                b: 0,
485                a: OPAQUE_ALPHA,
486            },
487            font_style: FontStyle::empty(),
488        });
489
490        assert_eq!(style.get_fg_color(), Some(AnsiColor::Green.into()));
491    }
492
493    #[test]
494    fn convert_syntect_style_uses_ansi256_for_high_palette_indexes() {
495        let style = convert_syntect_style(syntect::highlighting::Style {
496            foreground: SyntectColor {
497                r: 0x9a,
498                g: 0,
499                b: 0,
500                a: ANSI_ALPHA_INDEX,
501            },
502            background: SyntectColor {
503                r: 0,
504                g: 0,
505                b: 0,
506                a: OPAQUE_ALPHA,
507            },
508            font_style: FontStyle::empty(),
509        });
510
511        assert_eq!(style.get_fg_color(), Some(Ansi256Color(0x9a).into()));
512    }
513
514    #[test]
515    fn convert_syntect_style_uses_terminal_default_for_alpha_one() {
516        let style = convert_syntect_style(syntect::highlighting::Style {
517            foreground: SyntectColor {
518                r: 0,
519                g: 0,
520                b: 0,
521                a: ANSI_ALPHA_DEFAULT,
522            },
523            background: SyntectColor {
524                r: 0,
525                g: 0,
526                b: 0,
527                a: OPAQUE_ALPHA,
528            },
529            font_style: FontStyle::empty(),
530        });
531
532        assert_eq!(style.get_fg_color(), None);
533    }
534
535    #[test]
536    fn convert_syntect_style_falls_back_to_rgb_for_unexpected_alpha() {
537        let style = convert_syntect_style(syntect::highlighting::Style {
538            foreground: SyntectColor {
539                r: 10,
540                g: 20,
541                b: 30,
542                a: 0x80,
543            },
544            background: SyntectColor {
545                r: 0,
546                g: 0,
547                b: 0,
548                a: OPAQUE_ALPHA,
549            },
550            font_style: FontStyle::empty(),
551        });
552
553        assert_eq!(style.get_fg_color(), Some(RgbColor(10, 20, 30).into()));
554    }
555
556    #[test]
557    fn convert_syntect_style_preserves_effects() {
558        let style = convert_syntect_style(syntect::highlighting::Style {
559            foreground: SyntectColor {
560                r: 10,
561                g: 20,
562                b: 30,
563                a: OPAQUE_ALPHA,
564            },
565            background: SyntectColor {
566                r: 0,
567                g: 0,
568                b: 0,
569                a: OPAQUE_ALPHA,
570            },
571            font_style: FontStyle::BOLD | FontStyle::ITALIC | FontStyle::UNDERLINE,
572        });
573
574        let effects = style.get_effects();
575        assert!(effects.contains(Effects::BOLD));
576        assert!(effects.contains(Effects::ITALIC));
577        assert!(effects.contains(Effects::UNDERLINE));
578    }
579
580    #[test]
581    fn highlight_pipeline_decodes_alpha_encoded_theme_colors() {
582        let theme = Theme {
583            settings: ThemeSettings {
584                foreground: Some(SyntectColor {
585                    r: 0x02,
586                    g: 0,
587                    b: 0,
588                    a: ANSI_ALPHA_INDEX,
589                }),
590                background: Some(SyntectColor {
591                    r: 0,
592                    g: 0,
593                    b: 0,
594                    a: ANSI_ALPHA_DEFAULT,
595                }),
596                ..ThemeSettings::default()
597            },
598            ..Theme::default()
599        };
600
601        let segments =
602            highlight_code_to_anstyle_line_segments_with_theme("plain text", None, &theme, false);
603        assert_eq!(segments.len(), 1);
604        assert_eq!(segments[0].len(), 1);
605        assert_eq!(
606            segments[0][0].0.get_fg_color(),
607            Some(AnsiColor::Green.into())
608        );
609        assert_eq!(segments[0][0].0.get_bg_color(), None);
610        assert_eq!(segments[0][0].1, "plain text");
611    }
612
613    #[test]
614    fn diff_scope_backgrounds_prefer_markup_scope_then_diff_fallback() {
615        let theme = Theme {
616            settings: ThemeSettings::default(),
617            scopes: vec![
618                theme_item("markup.inserted", Some((10, 20, 30))),
619                theme_item("diff.deleted", Some((40, 50, 60))),
620            ],
621            ..Theme::default()
622        };
623
624        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
625        assert_eq!(
626            rgbs,
627            DiffScopeBackgroundRgbs {
628                inserted: Some((10, 20, 30)),
629                deleted: Some((40, 50, 60)),
630            }
631        );
632    }
633
634    #[test]
635    fn diff_scope_backgrounds_return_none_when_scopes_do_not_match() {
636        let theme = Theme {
637            settings: ThemeSettings::default(),
638            scopes: vec![theme_item("constant.numeric", Some((1, 2, 3)))],
639            ..Theme::default()
640        };
641
642        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
643        assert_eq!(
644            rgbs,
645            DiffScopeBackgroundRgbs {
646                inserted: None,
647                deleted: None,
648            }
649        );
650    }
651
652    #[test]
653    fn diff_scope_backgrounds_fall_back_to_diff_scopes() {
654        let theme = Theme {
655            settings: ThemeSettings::default(),
656            scopes: vec![
657                theme_item("diff.inserted", Some((16, 32, 48))),
658                theme_item("diff.deleted", Some((64, 80, 96))),
659            ],
660            ..Theme::default()
661        };
662
663        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
664        assert_eq!(
665            rgbs,
666            DiffScopeBackgroundRgbs {
667                inserted: Some((16, 32, 48)),
668                deleted: Some((64, 80, 96)),
669            }
670        );
671    }
672}