Skip to main content

vtcode_tui/ui/
syntax_highlight.rs

1//! Syntax Highlighting Engine
2//!
3//! Global syntax highlighting using `syntect` with TextMate themes.
4//! Follows the architecture from OpenAI Codex PRs #11447 and #12581.
5//!
6//! # Architecture
7//!
8//! - **SyntaxSet**: Process-global singleton (~250 grammars, loaded once)
9//! - **ThemeSet**: Process-global singleton loaded once
10//! - **Highlighting**: Guardrails skip large inputs (>512KB or >10K lines)
11//!
12//! # Usage
13//!
14//! ```rust
15//! use crate::ui::syntax_highlight::{
16//!     highlight_code_to_segments, get_active_syntax_theme
17//! };
18//! use crate::ui::theme::active_theme_id;
19//!
20//! // Auto-resolve syntax theme from current UI theme
21//! let syntax_theme = get_active_syntax_theme();
22//!
23//! // Highlight code with proper theme
24//! let segments = highlight_code_to_segments(code, Some("rust"), syntax_theme);
25//! ```
26//!
27//! # Performance
28//!
29//! - Single SyntaxSet load (~1MB, ~50ms)
30//! - Single ThemeSet load shared by all highlighters
31//! - Input guardrails prevent highlighting huge files
32//! - Parser state preserved across multiline constructs
33
34use crate::ui::theme::get_syntax_theme_for_ui_theme;
35use anstyle::{Ansi256Color, AnsiColor, Effects, RgbColor, Style as AnstyleStyle};
36use once_cell::sync::Lazy;
37use syntect::highlighting::{FontStyle, Highlighter, Theme, ThemeSet};
38use syntect::parsing::{Scope, SyntaxReference, SyntaxSet};
39use syntect::util::LinesWithEndings;
40use tracing::warn;
41
42/// Default syntax highlighting theme
43const DEFAULT_THEME_NAME: &str = "base16-ocean.dark";
44
45/// Input size guardrail - skip highlighting for files > 512 KB
46const MAX_INPUT_SIZE_BYTES: usize = 512 * 1024;
47
48/// Input line guardrail - skip highlighting for files > 10K lines
49const MAX_INPUT_LINES: usize = 10_000;
50
51// Syntect/bat encode ANSI palette semantics in alpha:
52// `a=0` => ANSI palette index stored in `r`, `a=1` => terminal default.
53const ANSI_ALPHA_INDEX: u8 = 0x00;
54const ANSI_ALPHA_DEFAULT: u8 = 0x01;
55const OPAQUE_ALPHA: u8 = u8::MAX;
56
57/// Global SyntaxSet singleton (~250 grammars)
58static SHARED_SYNTAX_SET: Lazy<SyntaxSet> = Lazy::new(SyntaxSet::load_defaults_newlines);
59
60/// Global ThemeSet singleton.
61static SHARED_THEME_SET: Lazy<ThemeSet> = Lazy::new(|| match ThemeSet::load_defaults() {
62    defaults if !defaults.themes.is_empty() => defaults,
63    _ => {
64        warn!("Failed to load default syntax highlighting themes");
65        ThemeSet {
66            themes: Default::default(),
67        }
68    }
69});
70
71/// Get the global SyntaxSet reference
72#[inline]
73pub fn syntax_set() -> &'static SyntaxSet {
74    &SHARED_SYNTAX_SET
75}
76
77/// Find syntax by language token (e.g., "rust", "python")
78#[inline]
79pub fn find_syntax_by_token(token: &str) -> &'static SyntaxReference {
80    SHARED_SYNTAX_SET
81        .find_syntax_by_token(token)
82        .unwrap_or_else(|| SHARED_SYNTAX_SET.find_syntax_plain_text())
83}
84
85/// Find syntax by exact name
86#[inline]
87pub fn find_syntax_by_name(name: &str) -> Option<&'static SyntaxReference> {
88    SHARED_SYNTAX_SET.find_syntax_by_name(name)
89}
90
91/// Find syntax by file extension
92#[inline]
93pub fn find_syntax_by_extension(ext: &str) -> Option<&'static SyntaxReference> {
94    SHARED_SYNTAX_SET.find_syntax_by_extension(ext)
95}
96
97/// Get plain text syntax fallback
98#[inline]
99pub fn find_syntax_plain_text() -> &'static SyntaxReference {
100    SHARED_SYNTAX_SET.find_syntax_plain_text()
101}
102
103fn fallback_theme() -> Theme {
104    SHARED_THEME_SET
105        .themes
106        .values()
107        .next()
108        .cloned()
109        .unwrap_or_default()
110}
111
112fn plain_text_line_segments(code: &str) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
113    let mut result = Vec::new();
114    let mut ends_with_newline = false;
115    for line in LinesWithEndings::from(code) {
116        ends_with_newline = line.ends_with('\n');
117        let trimmed = line.trim_end_matches('\n');
118        result.push(vec![(
119            syntect::highlighting::Style::default(),
120            trimmed.to_string(),
121        )]);
122    }
123
124    if ends_with_newline {
125        result.push(Vec::new());
126    }
127
128    result
129}
130
131/// Load a theme from the process-global theme set.
132///
133/// # Arguments
134/// * `theme_name` - Theme identifier (TextMate theme name)
135/// * `cache` - Ignored. Kept for API compatibility.
136///
137/// # Returns
138/// Cloned theme instance (safe for multi-threaded use)
139pub fn load_theme(theme_name: &str, _cache: bool) -> Theme {
140    if let Some(theme) = SHARED_THEME_SET.themes.get(theme_name) {
141        theme.clone()
142    } else {
143        warn!(
144            theme = theme_name,
145            "Unknown syntax highlighting theme, falling back to default"
146        );
147        fallback_theme()
148    }
149}
150
151/// Get the default syntax theme name
152#[inline]
153pub fn default_theme_name() -> String {
154    DEFAULT_THEME_NAME.to_string()
155}
156
157/// Get all available theme names
158pub fn available_themes() -> Vec<String> {
159    SHARED_THEME_SET.themes.keys().cloned().collect()
160}
161
162/// Check if input should be highlighted (guardrails)
163#[inline]
164pub fn should_highlight(code: &str) -> bool {
165    code.len() <= MAX_INPUT_SIZE_BYTES && code.lines().count() <= MAX_INPUT_LINES
166}
167
168/// Get the recommended syntax theme for the current UI theme
169///
170/// This ensures syntax highlighting colors complement the UI theme background.
171/// Based on OpenAI Codex PRs #11447 and #12581.
172#[inline]
173pub fn get_active_syntax_theme() -> &'static str {
174    get_syntax_theme_for_ui_theme(&crate::ui::theme::active_theme_id())
175}
176
177/// Get the recommended syntax theme for a specific UI theme
178#[inline]
179pub fn get_syntax_theme(theme: &str) -> &'static str {
180    get_syntax_theme_for_ui_theme(theme)
181}
182
183/// Raw RGB diff backgrounds extracted from syntax theme scopes.
184///
185/// Prefers `markup.inserted` / `markup.deleted` and falls back to
186/// `diff.inserted` / `diff.deleted`.
187#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
188pub struct DiffScopeBackgroundRgbs {
189    pub inserted: Option<(u8, u8, u8)>,
190    pub deleted: Option<(u8, u8, u8)>,
191}
192
193/// Resolve diff-scope background colors from the currently active syntax theme.
194pub fn diff_scope_background_rgbs() -> DiffScopeBackgroundRgbs {
195    let theme_name = get_active_syntax_theme();
196    let theme = load_theme(theme_name, true);
197    diff_scope_background_rgbs_for_theme(&theme)
198}
199
200fn diff_scope_background_rgbs_for_theme(theme: &Theme) -> DiffScopeBackgroundRgbs {
201    let highlighter = Highlighter::new(theme);
202    let inserted = scope_background_rgb(&highlighter, "markup.inserted")
203        .or_else(|| scope_background_rgb(&highlighter, "diff.inserted"));
204    let deleted = scope_background_rgb(&highlighter, "markup.deleted")
205        .or_else(|| scope_background_rgb(&highlighter, "diff.deleted"));
206    DiffScopeBackgroundRgbs { inserted, deleted }
207}
208
209fn scope_background_rgb(highlighter: &Highlighter<'_>, scope_name: &str) -> Option<(u8, u8, u8)> {
210    let scope = Scope::new(scope_name).ok()?;
211    let background = highlighter.style_mod_for_stack(&[scope]).background?;
212    Some((background.r, background.g, background.b))
213}
214
215fn ansi_palette_color(index: u8) -> anstyle::Color {
216    match index {
217        0x00 => AnsiColor::Black.into(),
218        0x01 => AnsiColor::Red.into(),
219        0x02 => AnsiColor::Green.into(),
220        0x03 => AnsiColor::Yellow.into(),
221        0x04 => AnsiColor::Blue.into(),
222        0x05 => AnsiColor::Magenta.into(),
223        0x06 => AnsiColor::Cyan.into(),
224        0x07 => AnsiColor::White.into(),
225        index => Ansi256Color(index).into(),
226    }
227}
228
229fn convert_syntect_color(color: syntect::highlighting::Color) -> Option<anstyle::Color> {
230    match color.a {
231        // Bat-compatible encoding for ANSI-family themes.
232        ANSI_ALPHA_INDEX => Some(ansi_palette_color(color.r)),
233        // Preserve terminal defaults rather than forcing black.
234        ANSI_ALPHA_DEFAULT => None,
235        // Standard syntect themes use opaque RGB values.
236        OPAQUE_ALPHA => Some(RgbColor(color.r, color.g, color.b).into()),
237        // Some theme dumps use other alpha values; keep them readable as RGB.
238        _ => Some(RgbColor(color.r, color.g, color.b).into()),
239    }
240}
241
242fn convert_syntect_style(style: syntect::highlighting::Style) -> AnstyleStyle {
243    let mut effects = Effects::new();
244    if style.font_style.contains(FontStyle::BOLD) {
245        effects |= Effects::BOLD;
246    }
247    if style.font_style.contains(FontStyle::ITALIC) {
248        effects |= Effects::ITALIC;
249    }
250    if style.font_style.contains(FontStyle::UNDERLINE) {
251        effects |= Effects::UNDERLINE;
252    }
253
254    AnstyleStyle::new()
255        .fg_color(convert_syntect_color(style.foreground))
256        .bg_color(convert_syntect_color(style.background))
257        .effects(effects)
258}
259
260#[inline]
261fn select_syntax(language: Option<&str>) -> &'static SyntaxReference {
262    language
263        .map(find_syntax_by_token)
264        .unwrap_or_else(find_syntax_plain_text)
265}
266
267/// Highlight code and return styled segments per line.
268///
269/// Uses `LinesWithEndings` semantics by preserving an empty trailing line
270/// when the input ends with `\n`.
271pub fn highlight_code_to_line_segments(
272    code: &str,
273    language: Option<&str>,
274    theme_name: &str,
275) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
276    let theme = load_theme(theme_name, true);
277    highlight_code_to_line_segments_with_theme(code, language, &theme)
278}
279
280fn highlight_code_to_line_segments_with_theme(
281    code: &str,
282    language: Option<&str>,
283    theme: &Theme,
284) -> Vec<Vec<(syntect::highlighting::Style, String)>> {
285    if !should_highlight(code) {
286        return plain_text_line_segments(code);
287    }
288
289    let syntax = select_syntax(language);
290    let mut highlighter = syntect::easy::HighlightLines::new(syntax, theme);
291    let mut result = Vec::new();
292    let mut ends_with_newline = false;
293
294    for line in LinesWithEndings::from(code) {
295        ends_with_newline = line.ends_with('\n');
296        let trimmed = line.trim_end_matches('\n');
297        let segments = match highlighter.highlight_line(trimmed, syntax_set()) {
298            Ok(ranges) => ranges
299                .into_iter()
300                .map(|(style, text)| (style, text.to_string()))
301                .collect(),
302            Err(_) => vec![(syntect::highlighting::Style::default(), trimmed.to_string())],
303        };
304        result.push(segments);
305    }
306
307    if ends_with_newline {
308        result.push(Vec::new());
309    }
310
311    result
312}
313
314fn highlight_code_to_anstyle_line_segments_with_theme(
315    code: &str,
316    language: Option<&str>,
317    theme: &Theme,
318    strip_background: bool,
319) -> Vec<Vec<(AnstyleStyle, String)>> {
320    highlight_code_to_line_segments_with_theme(code, language, theme)
321        .into_iter()
322        .map(|ranges| {
323            ranges
324                .into_iter()
325                .filter(|(_, text)| !text.is_empty())
326                .map(|(style, text)| {
327                    let mut anstyle = convert_syntect_style(style);
328                    if strip_background {
329                        anstyle = anstyle.bg_color(None);
330                    }
331                    (anstyle, text)
332                })
333                .collect()
334        })
335        .collect()
336}
337
338/// Highlight code and convert to `anstyle` segments with optional bg stripping.
339pub fn highlight_code_to_anstyle_line_segments(
340    code: &str,
341    language: Option<&str>,
342    theme_name: &str,
343    strip_background: bool,
344) -> Vec<Vec<(AnstyleStyle, String)>> {
345    let theme = load_theme(theme_name, true);
346    highlight_code_to_anstyle_line_segments_with_theme(code, language, &theme, strip_background)
347}
348
349/// Highlight one line and convert to `anstyle` segments with optional bg stripping.
350pub fn highlight_line_to_anstyle_segments(
351    line: &str,
352    language: Option<&str>,
353    theme_name: &str,
354    strip_background: bool,
355) -> Option<Vec<(AnstyleStyle, String)>> {
356    highlight_code_to_anstyle_line_segments(line, language, theme_name, strip_background)
357        .into_iter()
358        .next()
359}
360
361/// Highlight code and return styled segments
362///
363/// # Arguments
364/// * `code` - Source code to highlight
365/// * `language` - Optional language hint (auto-detected if None)
366/// * `theme_name` - Syntax theme name (use `get_active_syntax_theme()` for UI theme sync)
367///
368/// # Returns
369/// Vector of (Style, String) tuples for rendering
370///
371/// # Performance
372/// - Returns None early if input exceeds guardrails
373/// - Uses cached theme when available
374pub fn highlight_code_to_segments(
375    code: &str,
376    language: Option<&str>,
377    theme_name: &str,
378) -> Vec<(syntect::highlighting::Style, String)> {
379    highlight_code_to_line_segments(code, language, theme_name)
380        .into_iter()
381        .flatten()
382        .collect()
383}
384
385/// Highlight a single line (for diff rendering)
386///
387/// Preserves parser state for multiline constructs
388pub fn highlight_line_for_diff(
389    line: &str,
390    language: Option<&str>,
391    theme_name: &str,
392) -> Option<Vec<(syntect::highlighting::Style, String)>> {
393    highlight_code_to_line_segments(line, language, theme_name)
394        .into_iter()
395        .next()
396}
397
398/// Convert code to ANSI escape sequences
399pub fn highlight_code_to_ansi(code: &str, language: Option<&str>, theme_name: &str) -> String {
400    let segments = highlight_code_to_anstyle_line_segments(code, language, theme_name, false);
401    let mut output = String::with_capacity(code.len() + segments.len() * 10);
402
403    for (ansi_style, text) in segments.into_iter().flatten() {
404        output.push_str(&ansi_style.to_string());
405        output.push_str(&text);
406        output.push_str("\x1b[0m"); // Reset
407    }
408
409    output
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415    use std::str::FromStr;
416    use syntect::highlighting::Color as SyntectColor;
417    use syntect::highlighting::ScopeSelectors;
418    use syntect::highlighting::StyleModifier;
419    use syntect::highlighting::ThemeItem;
420    use syntect::highlighting::ThemeSettings;
421
422    fn theme_item(scope: &str, background: Option<(u8, u8, u8)>) -> ThemeItem {
423        ThemeItem {
424            scope: ScopeSelectors::from_str(scope).expect("scope selector should parse"),
425            style: StyleModifier {
426                background: background.map(|(r, g, b)| SyntectColor { r, g, b, a: 255 }),
427                ..StyleModifier::default()
428            },
429        }
430    }
431
432    #[test]
433    fn test_syntax_set_loaded() {
434        let ss = syntax_set();
435        assert!(!ss.syntaxes().is_empty());
436    }
437
438    #[test]
439    fn test_find_syntax_by_token() {
440        let rust = find_syntax_by_token("rust");
441        assert!(rust.name.contains("Rust"));
442    }
443
444    #[test]
445    fn test_should_highlight_guardrails() {
446        assert!(should_highlight("fn main() {}"));
447        assert!(!should_highlight(&"x".repeat(MAX_INPUT_SIZE_BYTES + 1)));
448    }
449
450    #[test]
451    fn test_get_active_syntax_theme() {
452        let theme = get_active_syntax_theme();
453        assert!(!theme.is_empty());
454    }
455
456    #[test]
457    fn test_highlight_code_to_segments() {
458        let segments =
459            highlight_code_to_segments("fn main() {}", Some("rust"), "base16-ocean.dark");
460        assert!(!segments.is_empty());
461    }
462
463    #[test]
464    fn test_theme_loading_stable() {
465        let theme1 = load_theme("base16-ocean.dark", true);
466        let theme2 = load_theme("base16-ocean.dark", true);
467        assert_eq!(theme1.name, theme2.name);
468    }
469
470    #[test]
471    fn convert_syntect_style_uses_named_ansi_for_low_palette_indexes() {
472        let style = convert_syntect_style(syntect::highlighting::Style {
473            foreground: SyntectColor {
474                r: 0x02,
475                g: 0,
476                b: 0,
477                a: ANSI_ALPHA_INDEX,
478            },
479            background: SyntectColor {
480                r: 0,
481                g: 0,
482                b: 0,
483                a: OPAQUE_ALPHA,
484            },
485            font_style: FontStyle::empty(),
486        });
487
488        assert_eq!(style.get_fg_color(), Some(AnsiColor::Green.into()));
489    }
490
491    #[test]
492    fn convert_syntect_style_uses_ansi256_for_high_palette_indexes() {
493        let style = convert_syntect_style(syntect::highlighting::Style {
494            foreground: SyntectColor {
495                r: 0x9a,
496                g: 0,
497                b: 0,
498                a: ANSI_ALPHA_INDEX,
499            },
500            background: SyntectColor {
501                r: 0,
502                g: 0,
503                b: 0,
504                a: OPAQUE_ALPHA,
505            },
506            font_style: FontStyle::empty(),
507        });
508
509        assert_eq!(style.get_fg_color(), Some(Ansi256Color(0x9a).into()));
510    }
511
512    #[test]
513    fn convert_syntect_style_uses_terminal_default_for_alpha_one() {
514        let style = convert_syntect_style(syntect::highlighting::Style {
515            foreground: SyntectColor {
516                r: 0,
517                g: 0,
518                b: 0,
519                a: ANSI_ALPHA_DEFAULT,
520            },
521            background: SyntectColor {
522                r: 0,
523                g: 0,
524                b: 0,
525                a: OPAQUE_ALPHA,
526            },
527            font_style: FontStyle::empty(),
528        });
529
530        assert_eq!(style.get_fg_color(), None);
531    }
532
533    #[test]
534    fn convert_syntect_style_falls_back_to_rgb_for_unexpected_alpha() {
535        let style = convert_syntect_style(syntect::highlighting::Style {
536            foreground: SyntectColor {
537                r: 10,
538                g: 20,
539                b: 30,
540                a: 0x80,
541            },
542            background: SyntectColor {
543                r: 0,
544                g: 0,
545                b: 0,
546                a: OPAQUE_ALPHA,
547            },
548            font_style: FontStyle::empty(),
549        });
550
551        assert_eq!(style.get_fg_color(), Some(RgbColor(10, 20, 30).into()));
552    }
553
554    #[test]
555    fn convert_syntect_style_preserves_effects() {
556        let style = convert_syntect_style(syntect::highlighting::Style {
557            foreground: SyntectColor {
558                r: 10,
559                g: 20,
560                b: 30,
561                a: OPAQUE_ALPHA,
562            },
563            background: SyntectColor {
564                r: 0,
565                g: 0,
566                b: 0,
567                a: OPAQUE_ALPHA,
568            },
569            font_style: FontStyle::BOLD | FontStyle::ITALIC | FontStyle::UNDERLINE,
570        });
571
572        let effects = style.get_effects();
573        assert!(effects.contains(Effects::BOLD));
574        assert!(effects.contains(Effects::ITALIC));
575        assert!(effects.contains(Effects::UNDERLINE));
576    }
577
578    #[test]
579    fn highlight_pipeline_decodes_alpha_encoded_theme_colors() {
580        let theme = Theme {
581            settings: ThemeSettings {
582                foreground: Some(SyntectColor {
583                    r: 0x02,
584                    g: 0,
585                    b: 0,
586                    a: ANSI_ALPHA_INDEX,
587                }),
588                background: Some(SyntectColor {
589                    r: 0,
590                    g: 0,
591                    b: 0,
592                    a: ANSI_ALPHA_DEFAULT,
593                }),
594                ..ThemeSettings::default()
595            },
596            ..Theme::default()
597        };
598
599        let segments =
600            highlight_code_to_anstyle_line_segments_with_theme("plain text", None, &theme, false);
601        assert_eq!(segments.len(), 1);
602        assert_eq!(segments[0].len(), 1);
603        assert_eq!(
604            segments[0][0].0.get_fg_color(),
605            Some(AnsiColor::Green.into())
606        );
607        assert_eq!(segments[0][0].0.get_bg_color(), None);
608        assert_eq!(segments[0][0].1, "plain text");
609    }
610
611    #[test]
612    fn diff_scope_backgrounds_prefer_markup_scope_then_diff_fallback() {
613        let theme = Theme {
614            settings: ThemeSettings::default(),
615            scopes: vec![
616                theme_item("markup.inserted", Some((10, 20, 30))),
617                theme_item("diff.deleted", Some((40, 50, 60))),
618            ],
619            ..Theme::default()
620        };
621
622        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
623        assert_eq!(
624            rgbs,
625            DiffScopeBackgroundRgbs {
626                inserted: Some((10, 20, 30)),
627                deleted: Some((40, 50, 60)),
628            }
629        );
630    }
631
632    #[test]
633    fn diff_scope_backgrounds_return_none_when_scopes_do_not_match() {
634        let theme = Theme {
635            settings: ThemeSettings::default(),
636            scopes: vec![theme_item("constant.numeric", Some((1, 2, 3)))],
637            ..Theme::default()
638        };
639
640        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
641        assert_eq!(
642            rgbs,
643            DiffScopeBackgroundRgbs {
644                inserted: None,
645                deleted: None,
646            }
647        );
648    }
649
650    #[test]
651    fn diff_scope_backgrounds_fall_back_to_diff_scopes() {
652        let theme = Theme {
653            settings: ThemeSettings::default(),
654            scopes: vec![
655                theme_item("diff.inserted", Some((16, 32, 48))),
656                theme_item("diff.deleted", Some((64, 80, 96))),
657            ],
658            ..Theme::default()
659        };
660
661        let rgbs = diff_scope_background_rgbs_for_theme(&theme);
662        assert_eq!(
663            rgbs,
664            DiffScopeBackgroundRgbs {
665                inserted: Some((16, 32, 48)),
666                deleted: Some((64, 80, 96)),
667            }
668        );
669    }
670}