Skip to main content

vtcode_tui/ui/
syntax_highlight.rs

1//! Syntax Highlighting Engine
2//!
3//! Global syntax highlighting using `syntect` with TextMate themes.
4//! Follows the architecture from OpenAI Codex PRs #11447 and #12581.
5//!
6//! # Architecture
7//!
8//! - **SyntaxSet**: Process-global singleton (~250 grammars, loaded once)
9//! - **ThemeSet**: Process-global singleton loaded once
10//! - **Highlighting**: Guardrails skip large inputs (>512KB or >10K lines)
11//!
12//! # Usage
13//!
14//! ```rust
15//! use vtcode_tui::ui::syntax_highlight::{
16//!     get_active_syntax_theme, highlight_code_to_segments,
17//! };
18//!
19//! // Auto-resolve syntax theme from current UI theme
20//! let syntax_theme = get_active_syntax_theme();
21//!
22//! // Highlight code with proper theme
23//! let code = "fn main() { println!(\"hi\"); }";
24//! let segments = highlight_code_to_segments(code, Some("rust"), syntax_theme);
25//! assert!(!segments.is_empty());
26//! ```
27//!
28//! # Performance
29//!
30//! - Single SyntaxSet load (~1MB, ~50ms)
31//! - Single ThemeSet load shared by all highlighters
32//! - Input guardrails prevent highlighting huge files
33//! - Parser state preserved across multiline constructs
34
35use crate::ui::theme::get_syntax_theme_for_ui_theme;
36use anstyle::{Ansi256Color, AnsiColor, Effects, RgbColor, Style as AnstyleStyle};
37use once_cell::sync::Lazy;
38use syntect::highlighting::{FontStyle, Highlighter, Theme, ThemeSet};
39use syntect::parsing::{Scope, SyntaxReference, SyntaxSet};
40use syntect::util::LinesWithEndings;
41use tracing::warn;
42
43/// Default syntax highlighting theme
44const DEFAULT_THEME_NAME: &str = "base16-ocean.dark";
45
46/// Input size guardrail - skip highlighting for files > 512 KB
47const MAX_INPUT_SIZE_BYTES: usize = 512 * 1024;
48
49/// Input line guardrail - skip highlighting for files > 10K lines
50const MAX_INPUT_LINES: usize = 10_000;
51
52// Syntect/bat encode ANSI palette semantics in alpha:
53// `a=0` => ANSI palette index stored in `r`, `a=1` => terminal default.
54const ANSI_ALPHA_INDEX: u8 = 0x00;
55const ANSI_ALPHA_DEFAULT: u8 = 0x01;
56const OPAQUE_ALPHA: u8 = u8::MAX;
57
58/// Global SyntaxSet singleton (~250 grammars)
59static SHARED_SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
60
61/// Global ThemeSet singleton.
62static SHARED_THEME_SET: Lazy<ThemeSet> = Lazy::new(|| match ThemeSet::load_defaults() {
63    defaults if !defaults.themes.is_empty() => defaults,
64    _ => {
65        warn!("Failed to load default syntax highlighting themes");
66        ThemeSet {
67            themes: Default::default(),
68        }
69    }
70});
71
72/// Get the global SyntaxSet reference
73#[inline]
74pub fn syntax_set() -> &'static SyntaxSet {
75    &SHARED_SYNTAX_SET
76}
77
78/// Find syntax by language token (e.g., "rust", "python")
79#[inline]
80pub fn find_syntax_by_token(token: &str) -> &'static SyntaxReference {
81    SHARED_SYNTAX_SET
82        .find_syntax_by_token(token)
83        .unwrap_or_else(|| SHARED_SYNTAX_SET.find_syntax_plain_text())
84}
85
86/// Find syntax by exact name
87#[inline]
88pub fn find_syntax_by_name(name: &str) -> Option<&'static SyntaxReference> {
89    SHARED_SYNTAX_SET.find_syntax_by_name(name)
90}
91
92/// Find syntax by file extension
93#[inline]
94pub fn find_syntax_by_extension(ext: &str) -> Option<&'static SyntaxReference> {
95    SHARED_SYNTAX_SET.find_syntax_by_extension(ext)
96}
97
98/// Get plain text syntax fallback
99#[inline]
100pub fn find_syntax_plain_text() -> &'static SyntaxReference {
101    SHARED_SYNTAX_SET.find_syntax_plain_text()
102}
103
104fn fallback_theme() -> Theme {
105    SHARED_THEME_SET
106        .themes
107        .values()
108        .next()
109        .cloned()
110        .unwrap_or_default()
111}
112
113fn plain_text_line_segments(code: &str) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
114    let mut result = Vec::new();
115    let mut ends_with_newline = false;
116    for line in LinesWithEndings::from(code) {
117        ends_with_newline = line.ends_with('\n');
118        let trimmed = line.trim_end_matches('\n');
119        result.push(vec![(
120            syntect::highlighting::Style::default(),
121            trimmed.to_string(),
122        )]);
123    }
124
125    if ends_with_newline {
126        result.push(Vec::new());
127    }
128
129    result
130}
131
132/// Load a theme from the process-global theme set.
133///
134/// # Arguments
135/// * `theme_name` - Theme identifier (TextMate theme name)
136/// * `cache` - Ignored. Kept for API compatibility.
137///
138/// # Returns
139/// Cloned theme instance (safe for multi-threaded use)
140pub fn load_theme(theme_name: &str, _cache: bool) -> Theme {
141    if let Some(theme) = SHARED_THEME_SET.themes.get(theme_name) {
142        theme.clone()
143    } else {
144        warn!(
145            theme = theme_name,
146            "Unknown syntax highlighting theme, falling back to default"
147        );
148        fallback_theme()
149    }
150}
151
152/// Get the default syntax theme name
153#[inline]
154pub fn default_theme_name() -> String {
155    DEFAULT_THEME_NAME.to_string()
156}
157
158/// Get all available theme names
159pub fn available_themes() -> Vec<String> {
160    SHARED_THEME_SET.themes.keys().cloned().collect()
161}
162
163/// Check if input should be highlighted (guardrails)
164#[inline]
165pub fn should_highlight(code: &str) -> bool {
166    code.len() <= MAX_INPUT_SIZE_BYTES && code.lines().count() <= MAX_INPUT_LINES
167}
168
169/// Get the recommended syntax theme for the current UI theme
170///
171/// This ensures syntax highlighting colors complement the UI theme background.
172/// Based on OpenAI Codex PRs #11447 and #12581.
173#[inline]
174pub fn get_active_syntax_theme() -> &'static str {
175    get_syntax_theme_for_ui_theme(&crate::ui::theme::active_theme_id())
176}
177
178/// Get the recommended syntax theme for a specific UI theme
179#[inline]
180pub fn get_syntax_theme(theme: &str) -> &'static str {
181    get_syntax_theme_for_ui_theme(theme)
182}
183
184/// Raw RGB diff backgrounds extracted from syntax theme scopes.
185///
186/// Prefers `markup.inserted` / `markup.deleted` and falls back to
187/// `diff.inserted` / `diff.deleted`.
188#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
189pub struct DiffScopeBackgroundRgbs {
190    pub inserted: Option<(u8, u8, u8)>,
191    pub deleted: Option<(u8, u8, u8)>,
192}
193
194/// Resolve diff-scope background colors from the currently active syntax theme.
195pub fn diff_scope_background_rgbs() -> DiffScopeBackgroundRgbs {
196    let theme_name = get_active_syntax_theme();
197    let theme = load_theme(theme_name, true);
198    diff_scope_background_rgbs_for_theme(&theme)
199}
200
201fn diff_scope_background_rgbs_for_theme(theme: &Theme) -> DiffScopeBackgroundRgbs {
202    let highlighter = Highlighter::new(theme);
203    let inserted = scope_background_rgb(&highlighter, "markup.inserted")
204        .or_else(|| scope_background_rgb(&highlighter, "diff.inserted"));
205    let deleted = scope_background_rgb(&highlighter, "markup.deleted")
206        .or_else(|| scope_background_rgb(&highlighter, "diff.deleted"));
207    DiffScopeBackgroundRgbs { inserted, deleted }
208}
209
210fn scope_background_rgb(highlighter: &Highlighter<'_>, scope_name: &str) -> Option<(u8, u8, u8)> {
211    let scope = Scope::new(scope_name).ok()?;
212    let background = highlighter.style_mod_for_stack(&[scope]).background?;
213    Some((background.r, background.g, background.b))
214}
215
216fn ansi_palette_color(index: u8) -> anstyle::Color {
217    match index {
218        0x00 => AnsiColor::Black.into(),
219        0x01 => AnsiColor::Red.into(),
220        0x02 => AnsiColor::Green.into(),
221        0x03 => AnsiColor::Yellow.into(),
222        0x04 => AnsiColor::Blue.into(),
223        0x05 => AnsiColor::Magenta.into(),
224        0x06 => AnsiColor::Cyan.into(),
225        0x07 => AnsiColor::White.into(),
226        index => Ansi256Color(index).into(),
227    }
228}
229
230fn convert_syntect_color(color: syntect::highlighting::Color) -> Option<anstyle::Color> {
231    match color.a {
232        // Bat-compatible encoding for ANSI-family themes.
233        ANSI_ALPHA_INDEX => Some(ansi_palette_color(color.r)),
234        // Preserve terminal defaults rather than forcing black.
235        ANSI_ALPHA_DEFAULT => None,
236        // Standard syntect themes use opaque RGB values.
237        OPAQUE_ALPHA => Some(RgbColor(color.r, color.g, color.b).into()),
238        // Some theme dumps use other alpha values; keep them readable as RGB.
239        _ => Some(RgbColor(color.r, color.g, color.b).into()),
240    }
241}
242
243fn convert_syntect_style(style: syntect::highlighting::Style) -> AnstyleStyle {
244    let mut effects = Effects::new();
245    if style.font_style.contains(FontStyle::BOLD) {
246        effects |= Effects::BOLD;
247    }
248    if style.font_style.contains(FontStyle::ITALIC) {
249        effects |= Effects::ITALIC;
250    }
251    if style.font_style.contains(FontStyle::UNDERLINE) {
252        effects |= Effects::UNDERLINE;
253    }
254
255    AnstyleStyle::new()
256        .fg_color(convert_syntect_color(style.foreground))
257        .bg_color(convert_syntect_color(style.background))
258        .effects(effects)
259}
260
261#[inline]
262fn select_syntax(language: Option<&str>) -> &'static SyntaxReference {
263    language
264        .map(find_syntax_by_token)
265        .unwrap_or_else(find_syntax_plain_text)
266}
267
268/// Highlight code and return styled segments per line.
269///
270/// Uses `LinesWithEndings` semantics by preserving an empty trailing line
271/// when the input ends with `\n`.
272pub fn highlight_code_to_line_segments(
273    code: &str,
274    language: Option<&str>,
275    theme_name: &str,
276) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
277    let theme = load_theme(theme_name, true);
278    highlight_code_to_line_segments_with_theme(code, language, &theme)
279}
280
281fn highlight_code_to_line_segments_with_theme(
282    code: &str,
283    language: Option<&str>,
284    theme: &Theme,
285) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
286    if !should_highlight(code) {
287        return plain_text_line_segments(code);
288    }
289
290    let syntax = select_syntax(language);
291    let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
292    let mut result = Vec::new();
293    let mut ends_with_newline = false;
294
295    for line in LinesWithEndings::from(code) {
296        ends_with_newline = line.ends_with('\n');
297        let trimmed = line.trim_end_matches('\n');
298        let segments = match highlighter.highlight_line(trimmed, syntax_set()) {
299            Ok(ranges) => ranges
300                .into_iter()
301                .map(|(style, text)| (style, text.to_string()))
302                .collect(),
303            Err(_) => vec![(syntect::highlighting::Style::default(), trimmed.to_string())],
304        };
305        result.push(segments);
306    }
307
308    if ends_with_newline {
309        result.push(Vec::new());
310    }
311
312    result
313}
314
315fn highlight_code_to_anstyle_line_segments_with_theme(
316    code: &str,
317    language: Option<&str>,
318    theme: &Theme,
319    strip_background: bool,
320) -> Vec<Vec<(AnstyleStyle, String)>> {
321    highlight_code_to_line_segments_with_theme(code, language, theme)
322        .into_iter()
323        .map(|ranges| {
324            ranges
325                .into_iter()
326                .filter(|(_, text)| !text.is_empty())
327                .map(|(style, text)| {
328                    let mut anstyle = convert_syntect_style(style);
329                    if strip_background {
330                        anstyle = anstyle.bg_color(None);
331                    }
332                    (anstyle, text)
333                })
334                .collect()
335        })
336        .collect()
337}
338
339/// Highlight code and convert to `anstyle` segments with optional bg stripping.
340pub fn highlight_code_to_anstyle_line_segments(
341    code: &str,
342    language: Option<&str>,
343    theme_name: &str,
344    strip_background: bool,
345) -> Vec<Vec<(AnstyleStyle, String)>> {
346    let theme = load_theme(theme_name, true);
347    highlight_code_to_anstyle_line_segments_with_theme(code, language, &theme, strip_background)
348}
349
350/// Highlight one line and convert to `anstyle` segments with optional bg stripping.
351pub fn highlight_line_to_anstyle_segments(
352    line: &str,
353    language: Option<&str>,
354    theme_name: &str,
355    strip_background: bool,
356) -> Option<Vec<(AnstyleStyle, String)>> {
357    highlight_code_to_anstyle_line_segments(line, language, theme_name, strip_background)
358        .into_iter()
359        .next()
360}
361
362/// Highlight code and return styled segments
363///
364/// # Arguments
365/// * `code` - Source code to highlight
366/// * `language` - Optional language hint (auto-detected if None)
367/// * `theme_name` - Syntax theme name (use `get_active_syntax_theme()` for UI theme sync)
368///
369/// # Returns
370/// Vector of (Style, String) tuples for rendering
371///
372/// # Performance
373/// - Returns None early if input exceeds guardrails
374/// - Uses cached theme when available
375pub fn highlight_code_to_segments(
376    code: &str,
377    language: Option<&str>,
378    theme_name: &str,
379) -> Vec<(syntect::highlighting::Style, String)> {
380    highlight_code_to_line_segments(code, language, theme_name)
381        .into_iter()
382        .flatten()
383        .collect()
384}
385
386/// Highlight a single line (for diff rendering)
387///
388/// Preserves parser state for multiline constructs
389pub fn highlight_line_for_diff(
390    line: &str,
391    language: Option<&str>,
392    theme_name: &str,
393) -> Option<Vec<(syntect::highlighting::Style, String)>> {
394    highlight_code_to_line_segments(line, language, theme_name)
395        .into_iter()
396        .next()
397}
398
399/// Convert code to ANSI escape sequences
400pub fn highlight_code_to_ansi(code: &str, language: Option<&str>, theme_name: &str) -> String {
401    let segments = highlight_code_to_anstyle_line_segments(code, language, theme_name, false);
402    let mut output = String::with_capacity(code.len() + segments.len() * 10);
403
404    for (ansi_style, text) in segments.into_iter().flatten() {
405        output.push_str(&ansi_style.to_string());
406        output.push_str(&text);
407        output.push_str("\x1b[0m"); // Reset
408    }
409
410    output
411}
412
413#[cfg(test)]
414mod tests {
415    use super::*;
416    use std::str::FromStr;
417    use syntect::highlighting::Color as SyntectColor;
418    use syntect::highlighting::ScopeSelectors;
419    use syntect::highlighting::StyleModifier;
420    use syntect::highlighting::ThemeItem;
421    use syntect::highlighting::ThemeSettings;
422
423    fn theme_item(scope: &str, background: Option<(u8, u8, u8)>) -> ThemeItem {
424        ThemeItem {
425            scope: ScopeSelectors::from_str(scope).expect("scope selector should parse"),
426            style: StyleModifier {
427                background: background.map(|(r, g, b)| SyntectColor { r, g, b, a: 255 }),
428                ..StyleModifier::default()
429            },
430        }
431    }
432
433    #[test]
434    fn test_syntax_set_loaded() {
435        let ss = syntax_set();
436        assert!(!ss.syntaxes().is_empty());
437    }
438
439    #[test]
440    fn test_find_syntax_by_token() {
441        let rust = find_syntax_by_token("rust");
442        assert!(rust.name.contains("Rust"));
443    }
444
445    #[test]
446    fn test_should_highlight_guardrails() {
447        assert!(should_highlight("fn main() {}"));
448        assert!(!should_highlight(&"x".repeat(MAX_INPUT_SIZE_BYTES + 1)));
449    }
450
451    #[test]
452    fn test_get_active_syntax_theme() {
453        let theme = get_active_syntax_theme();
454        assert!(!theme.is_empty());
455    }
456
457    #[test]
458    fn test_highlight_code_to_segments() {
459        let segments =
460            highlight_code_to_segments("fn main() {}", Some("rust"), "base16-ocean.dark");
461        assert!(!segments.is_empty());
462    }
463
464    #[test]
465    fn test_theme_loading_stable() {
466        let theme1 = load_theme("base16-ocean.dark", true);
467        let theme2 = load_theme("base16-ocean.dark", true);
468        assert_eq!(theme1.name, theme2.name);
469    }
470
471    #[test]
472    fn convert_syntect_style_uses_named_ansi_for_low_palette_indexes() {
473        let style = convert_syntect_style(syntect::highlighting::Style {
474            foreground: SyntectColor {
475                r: 0x02,
476                g: 0,
477                b: 0,
478                a: ANSI_ALPHA_INDEX,
479            },
480            background: SyntectColor {
481                r: 0,
482                g: 0,
483                b: 0,
484                a: OPAQUE_ALPHA,
485            },
486            font_style: FontStyle::empty(),
487        });
488
489        assert_eq!(style.get_fg_color(), Some(AnsiColor::Green.into()));
490    }
491
492    #[test]
493    fn convert_syntect_style_uses_ansi256_for_high_palette_indexes() {
494        let style = convert_syntect_style(syntect::highlighting::Style {
495            foreground: SyntectColor {
496                r: 0x9a,
497                g: 0,
498                b: 0,
499                a: ANSI_ALPHA_INDEX,
500            },
501            background: SyntectColor {
502                r: 0,
503                g: 0,
504                b: 0,
505                a: OPAQUE_ALPHA,
506            },
507            font_style: FontStyle::empty(),
508        });
509
510        assert_eq!(style.get_fg_color(), Some(Ansi256Color(0x9a).into()));
511    }
512
513    #[test]
514    fn convert_syntect_style_uses_terminal_default_for_alpha_one() {
515        let style = convert_syntect_style(syntect::highlighting::Style {
516            foreground: SyntectColor {
517                r: 0,
518                g: 0,
519                b: 0,
520                a: ANSI_ALPHA_DEFAULT,
521            },
522            background: SyntectColor {
523                r: 0,
524                g: 0,
525                b: 0,
526                a: OPAQUE_ALPHA,
527            },
528            font_style: FontStyle::empty(),
529        });
530
531        assert_eq!(style.get_fg_color(), None);
532    }
533
534    #[test]
535    fn convert_syntect_style_falls_back_to_rgb_for_unexpected_alpha() {
536        let style = convert_syntect_style(syntect::highlighting::Style {
537            foreground: SyntectColor {
538                r: 10,
539                g: 20,
540                b: 30,
541                a: 0x80,
542            },
543            background: SyntectColor {
544                r: 0,
545                g: 0,
546                b: 0,
547                a: OPAQUE_ALPHA,
548            },
549            font_style: FontStyle::empty(),
550        });
551
552        assert_eq!(style.get_fg_color(), Some(RgbColor(10, 20, 30).into()));
553    }
554
555    #[test]
556    fn convert_syntect_style_preserves_effects() {
557        let style = convert_syntect_style(syntect::highlighting::Style {
558            foreground: SyntectColor {
559                r: 10,
560                g: 20,
561                b: 30,
562                a: OPAQUE_ALPHA,
563            },
564            background: SyntectColor {
565                r: 0,
566                g: 0,
567                b: 0,
568                a: OPAQUE_ALPHA,
569            },
570            font_style: FontStyle::BOLD | FontStyle::ITALIC | FontStyle::UNDERLINE,
571        });
572
573        let effects = style.get_effects();
574        assert!(effects.contains(Effects::BOLD));
575        assert!(effects.contains(Effects::ITALIC));
576        assert!(effects.contains(Effects::UNDERLINE));
577    }
578
579    #[test]
580    fn highlight_pipeline_decodes_alpha_encoded_theme_colors() {
581        let theme = Theme {
582            settings: ThemeSettings {
583                foreground: Some(SyntectColor {
584                    r: 0x02,
585                    g: 0,
586                    b: 0,
587                    a: ANSI_ALPHA_INDEX,
588                }),
589                background: Some(SyntectColor {
590                    r: 0,
591                    g: 0,
592                    b: 0,
593                    a: ANSI_ALPHA_DEFAULT,
594                }),
595                ..ThemeSettings::default()
596            },
597            ..Theme::default()
598        };
599
600        let segments =
601            highlight_code_to_anstyle_line_segments_with_theme("plain text", None, &theme, false);
602        assert_eq!(segments.len(), 1);
603        assert_eq!(segments[0].len(), 1);
604        assert_eq!(
605            segments[0][0].0.get_fg_color(),
606            Some(AnsiColor::Green.into())
607        );
608        assert_eq!(segments[0][0].0.get_bg_color(), None);
609        assert_eq!(segments[0][0].1, "plain text");
610    }
611
612    #[test]
613    fn diff_scope_backgrounds_prefer_markup_scope_then_diff_fallback() {
614        let theme = Theme {
615            settings: ThemeSettings::default(),
616            scopes: vec![
617                theme_item("markup.inserted", Some((10, 20, 30))),
618                theme_item("diff.deleted", Some((40, 50, 60))),
619            ],
620            ..Theme::default()
621        };
622
623        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
624        assert_eq!(
625            rgbs,
626            DiffScopeBackgroundRgbs {
627                inserted: Some((10, 20, 30)),
628                deleted: Some((40, 50, 60)),
629            }
630        );
631    }
632
633    #[test]
634    fn diff_scope_backgrounds_return_none_when_scopes_do_not_match() {
635        let theme = Theme {
636            settings: ThemeSettings::default(),
637            scopes: vec![theme_item("constant.numeric", Some((1, 2, 3)))],
638            ..Theme::default()
639        };
640
641        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
642        assert_eq!(
643            rgbs,
644            DiffScopeBackgroundRgbs {
645                inserted: None,
646                deleted: None,
647            }
648        );
649    }
650
651    #[test]
652    fn diff_scope_backgrounds_fall_back_to_diff_scopes() {
653        let theme = Theme {
654            settings: ThemeSettings::default(),
655            scopes: vec![
656                theme_item("diff.inserted", Some((16, 32, 48))),
657                theme_item("diff.deleted", Some((64, 80, 96))),
658            ],
659            ..Theme::default()
660        };
661
662        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
663        assert_eq!(
664            rgbs,
665            DiffScopeBackgroundRgbs {
666                inserted: Some((16, 32, 48)),
667                deleted: Some((64, 80, 96)),
668            }
669        );
670    }
671}