Skip to main content

atomcode_tuix/highlight/
mod.rs

1// crates/atomcode-tuix/src/highlight/mod.rs
2//
3// Public entry for fenced-code-block syntax highlight. Dispatch:
4//
5//   caps.colors == false             -> plain indented passthrough (no ANSI)
6//   lang_hint == None                -> plain indented passthrough
7//   syntect doesn't know the lang    -> plain indented passthrough
8//   syntect highlights successfully  -> tinted ANSI with 2-space left indent
9//
10// Output is a multi-line string where every line is prefixed with 2 spaces
11// (matches the pre-existing CC-style code-block indent). Caller (`markdown.rs`)
12// inserts it verbatim into the body stream.
13
14use std::str::FromStr;
15use std::sync::OnceLock;
16
17use syntect::easy::HighlightLines;
18use syntect::highlighting::{
19    Color, FontStyle, ScopeSelectors, StyleModifier, Theme, ThemeItem, ThemeSettings,
20};
21use syntect::parsing::SyntaxSet;
22
23use crate::terminal::TerminalCaps;
24
25pub mod theme;
26
27/// Lazily-built syntect syntax set (covers the ~120 default Sublime syntaxes
28/// that ship with syntect). Loaded once on first use; cost is ~5-10ms and
29/// happens before the first tinted code block.
30static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
31
32/// Lazily-built syntect themes — one per palette. `theme.rs`'s runtime
33/// `MODE` selects which is returned per highlight call. Both initialised
34/// on first use; the unused one stays uncompiled until the user flips
35/// to it (effectively never in single-session use).
36static ATOMCODE_THEME_DARK: OnceLock<Theme> = OnceLock::new();
37static ATOMCODE_THEME_LIGHT: OnceLock<Theme> = OnceLock::new();
38
39fn syntax_set() -> &'static SyntaxSet {
40    SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines)
41}
42
43/// RGB tuple for a single syntect scope. Kept in lockstep with the SGR
44/// strings emitted by `theme.rs`'s accessor fns — the test
45/// `palette_rgbs_match_theme_sgr_strings` pins this invariant so any
46/// future drift breaks the build.
47struct Rgb(u8, u8, u8);
48
49struct CodePalette {
50    keyword: Rgb,
51    string: Rgb,
52    number: Rgb,
53    comment: Rgb,
54    function: Rgb,
55    type_: Rgb,
56}
57
58const DARK: CodePalette = CodePalette {
59    keyword:  Rgb(198, 120, 221),
60    string:   Rgb(152, 195, 121),
61    number:   Rgb(209, 154, 102),
62    comment:  Rgb(124, 132, 153),
63    function: Rgb(97, 175, 239),
64    type_:    Rgb(229, 192, 123),
65};
66
67/// `Light` palette: dark, saturated variants hitting ≥ 11:1 contrast
68/// on `#FFFFFF` (overshoots WCAG AA 4.5:1 by a wide margin — chosen
69/// after a Mac Terminal user reported the earlier 8-9:1 values read
70/// "soft" against the white background; the slightly-softer colours
71/// stayed AA-compliant but didn't feel "saturated" the way users
72/// expect light-theme code highlighting to look). Reproduces the same
73/// scope→colour mapping as `DARK` so syntect's TextMate selectors
74/// don't need re-tuning. Must stay in lockstep with `theme.rs`'s
75/// per-token accessor SGR strings.
76const LIGHT: CodePalette = CodePalette {
77    keyword:  Rgb(74, 0, 114),    // #4A0072
78    string:   Rgb(0, 100, 0),     // #006400
79    number:   Rgb(102, 51, 0),    // #663300
80    comment:  Rgb(74, 80, 96),    // #4A5060 (kept moderate — comments stay secondary)
81    function: Rgb(0, 33, 113),    // #002171
82    type_:    Rgb(91, 58, 0),     // #5B3A00
83};
84
85fn atomcode_theme() -> &'static Theme {
86    // `theme::set_theme_mode(true)` flips `MODE`; we read it here to
87    // pick the right OnceLock-cached Theme. Each variant is built at
88    // most once per process.
89    if theme::is_light_for_highlight() {
90        ATOMCODE_THEME_LIGHT.get_or_init(|| build_atomcode_theme(&LIGHT))
91    } else {
92        ATOMCODE_THEME_DARK.get_or_init(|| build_atomcode_theme(&DARK))
93    }
94}
95
96/// Build the syntect Theme from a palette. Uses TextMate scope
97/// selectors that match across most syntect-bundled syntaxes:
98///
99///   keyword / storage              -> KEYWORD (purple)
100///   string                         -> STRING (green)
101///   constant.numeric / .language   -> NUMBER (amber)
102///   comment                        -> COMMENT (italic slate gray)
103///   entity.name.function / support.function -> FUNCTION (blue)
104///   entity.name.type / support.type / support.class -> TYPE (sand)
105///
106/// Default foreground is set to the sentinel Color { a: 0 } so that chunks
107/// not matching any scope above can be detected and emitted without ANSI.
108fn build_atomcode_theme(p: &CodePalette) -> Theme {
109    let item = |scope_str: &str, c: &Rgb, italic: bool| ThemeItem {
110        scope: ScopeSelectors::from_str(scope_str).expect("valid scope selector"),
111        style: StyleModifier {
112            foreground: Some(Color { r: c.0, g: c.1, b: c.2, a: 0xFF }),
113            background: None,
114            font_style: if italic {
115                Some(FontStyle::ITALIC)
116            } else {
117                None
118            },
119        },
120    };
121    Theme {
122        name: Some("atomcode-mid-lightness".into()),
123        author: None,
124        settings: ThemeSettings {
125            // Sentinel default fg. alpha=0 means "we don't paint this chunk."
126            // The highlight loop reads style.foreground.a to distinguish
127            // matched-scope text from passthrough.
128            foreground: Some(Color { r: 0, g: 0, b: 0, a: 0 }),
129            ..ThemeSettings::default()
130        },
131        scopes: vec![
132            item("keyword, storage", &p.keyword, false),
133            item("string", &p.string, false),
134            item("constant.numeric, constant.language", &p.number, false),
135            item("comment", &p.comment, true),
136            item("entity.name.function, support.function", &p.function, false),
137            item(
138                "entity.name.type, support.type, support.class",
139                &p.type_,
140                false,
141            ),
142        ],
143    }
144}
145
146/// Highlight a complete fenced code block and return the indented, ANSI-tinted
147/// multi-line string ready for `push_markdown_body`.
148pub fn highlight_block(
149    lang_hint: Option<&str>,
150    source: &str,
151    caps: TerminalCaps,
152) -> String {
153    if !caps.colors {
154        return indent_plain(source);
155    }
156    if let Some(lang) = lang_hint {
157        if let Some(tinted) = highlight_with_syntect(source, lang) {
158            return indent_lines(&tinted);
159        }
160    }
161    indent_plain(source)
162}
163
164/// Highlight `source` using syntect's regex-based highlighter. Returns
165/// `Some(tinted)` on success or `None` if the language isn't recognized.
166/// Panics inside syntect are caught and converted to `None` so the renderer
167/// can't crash a streaming reply.
168fn highlight_with_syntect(source: &str, lang: &str) -> Option<String> {
169    use std::panic::{catch_unwind, AssertUnwindSafe};
170
171    let source_owned = source.to_string();
172    let lang_owned = lang.to_string();
173    let result = catch_unwind(AssertUnwindSafe(move || -> Option<String> {
174        let ps = syntax_set();
175        let syntax = ps
176            .find_syntax_by_token(&lang_owned)
177            .or_else(|| ps.find_syntax_by_token(&lang_owned.to_lowercase()))?;
178        let theme = atomcode_theme();
179        let mut h = HighlightLines::new(syntax, theme);
180
181        let mut out = String::with_capacity(source_owned.len() + 64);
182        for (i, line) in source_owned.split('\n').enumerate() {
183            if i > 0 {
184                out.push('\n');
185            }
186            // syntect expects newline-terminated input for context; add one
187            // and strip from the per-chunk text so we control line breaks.
188            let line_with_nl = format!("{}\n", line);
189            let ranges = h.highlight_line(&line_with_nl, ps).ok()?;
190            for (style, text) in ranges {
191                let text = text.trim_end_matches('\n');
192                if text.is_empty() {
193                    continue;
194                }
195                let c = style.foreground;
196                if c.a == 0 {
197                    // Sentinel default fg -> emit text without ANSI wrapping.
198                    out.push_str(text);
199                } else {
200                    let italic = style.font_style.contains(FontStyle::ITALIC);
201                    if italic {
202                        out.push_str("\x1b[3m");
203                    }
204                    out.push_str(&format!("\x1b[38;2;{};{};{}m", c.r, c.g, c.b));
205                    out.push_str(text);
206                    out.push_str(theme::RESET);
207                }
208            }
209        }
210        Some(out)
211    }));
212    match result {
213        Ok(opt) => opt,
214        Err(_) => {
215            crate::tuix_trace!("HL", "syntect panicked while highlighting lang={}", lang);
216            None
217        }
218    }
219}
220
221/// Per-line "  " indent for the no-color / unknown-lang path (matches
222/// pre-existing `format!("  {}", line)` behavior in `markdown.rs`).
223fn indent_plain(source: &str) -> String {
224    let mut out = String::with_capacity(source.len() + 32);
225    let mut first = true;
226    for line in source.split('\n') {
227        if !first {
228            out.push('\n');
229        }
230        out.push_str("  ");
231        out.push_str(line);
232        first = false;
233    }
234    out
235}
236
237/// Per-line "  " indent for tinted output. ANSI escapes ride along inside
238/// each line — terminals don't count escape bytes as columns.
239fn indent_lines(tinted: &str) -> String {
240    let mut out = String::with_capacity(tinted.len() + 32);
241    let mut first = true;
242    for line in tinted.split('\n') {
243        if !first {
244            out.push('\n');
245        }
246        out.push_str("  ");
247        out.push_str(line);
248        first = false;
249    }
250    out
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use crate::terminal::{EnvView, TerminalCaps};
257
258    fn caps_color() -> TerminalCaps {
259        TerminalCaps::from_env(EnvView {
260            is_stdout_tty: true,
261            term: Some("xterm-256color".to_string()),
262            colorterm: Some("truecolor".to_string()),
263            lang: Some("en_US.UTF-8".to_string()),
264            ..Default::default()
265        })
266    }
267
268    fn caps_nocolor() -> TerminalCaps {
269        TerminalCaps::from_env(EnvView {
270            is_stdout_tty: true,
271            no_color: true,
272            term: Some("xterm".to_string()),
273            ..Default::default()
274        })
275    }
276
277    #[test]
278    fn no_color_bypasses_highlight_returns_plain_indented() {
279        let out = highlight_block(Some("rust"), "let x = 1;", caps_nocolor());
280        assert_eq!(out, "  let x = 1;");
281        assert!(!out.contains('\x1b'), "no_color path must emit zero ANSI");
282    }
283
284    #[test]
285    fn no_color_multiline_each_line_indented() {
286        let out = highlight_block(Some("rust"), "let x = 1;\nlet y = 2;", caps_nocolor());
287        assert_eq!(out, "  let x = 1;\n  let y = 2;");
288    }
289
290    #[test]
291    fn missing_lang_tag_falls_back_to_plain_indent() {
292        let out = highlight_block(None, "x = 42", caps_color());
293        assert_eq!(out, "  x = 42");
294        assert!(!out.contains('\x1b'), "no lang tag means no ANSI for now");
295    }
296
297    #[test]
298    fn unknown_lang_via_stub_falls_back_to_plain_indent() {
299        // Task 3 stubs highlight_with_syntect to None; dispatch lands in
300        // plain-indent for every lang. Task 4 upgrades the rust case below
301        // to assert keyword highlighting; this test stays as the proof
302        // that unknown / unrecognized langs always degrade safely.
303        let out = highlight_block(Some("frobnicate"), "x = 42", caps_color());
304        assert_eq!(out, "  x = 42");
305    }
306
307    #[test]
308    fn empty_source_returns_indent_only() {
309        let out = highlight_block(None, "", caps_nocolor());
310        assert_eq!(out, "  ");
311    }
312
313    #[test]
314    fn trailing_newline_preserved_in_output() {
315        // source "a\n" -> "  a\n  " (split on \n yields ["a", ""]).
316        // This pins the per-line indent contract for stream-formed input.
317        let out = highlight_block(None, "a\n", caps_nocolor());
318        assert_eq!(out, "  a\n  ");
319    }
320
321    #[test]
322    fn rust_keyword_gets_keyword_color() {
323        // `fn` and `let` should be highlighted as keywords via syntect.
324        // Theme maps `keyword` AND `storage` scopes to KEYWORD color, so
325        // both flow-control keywords and storage keywords land in purple.
326        let out = highlight_block(Some("rust"), "fn main() { let x = 1; }", caps_color());
327        assert!(
328            out.contains(theme::keyword()),
329            "expected keyword color in tinted rust output, got: {:?}",
330            out
331        );
332    }
333
334    #[test]
335    fn python_keyword_gets_keyword_color() {
336        let out = highlight_block(Some("python"), "def foo():\n    return 1", caps_color());
337        assert!(
338            out.contains(theme::keyword()),
339            "expected keyword color in tinted python output, got: {:?}",
340            out
341        );
342    }
343
344    #[test]
345    fn rust_string_literal_gets_string_color() {
346        let out = highlight_block(Some("rust"), r#"let s = "hello";"#, caps_color());
347        assert!(
348            out.contains(theme::string()),
349            "expected string color: {:?}",
350            out
351        );
352    }
353
354    #[test]
355    fn rust_number_gets_number_color() {
356        let out = highlight_block(Some("rust"), "let n = 42;", caps_color());
357        assert!(
358            out.contains(theme::number()),
359            "expected number color: {:?}",
360            out
361        );
362    }
363
364    #[test]
365    fn rust_comment_gets_comment_color() {
366        let out = highlight_block(Some("rust"), "// a comment\nlet x = 1;", caps_color());
367        // COMMENT is "\x1b[3;38;2;124;132;153m" — italic prefix is part of the constant,
368        // BUT we may emit italic separately. Check for the truecolor body of COMMENT.
369        let comment_body = "\x1b[38;2;124;132;153m";
370        let comment_full = theme::comment();
371        assert!(
372            out.contains(comment_body) || out.contains(comment_full),
373            "expected comment color in some form: {:?}",
374            out
375        );
376    }
377
378    #[test]
379    fn rust_multiline_string_classified_as_single_string() {
380        // syntect must keep multi-line context — both lines of a multi-line
381        // raw string should be inside the string-color span.
382        let src = "let s = \"line1\nline2\";";
383        let out = highlight_block(Some("rust"), src, caps_color());
384        let lines: Vec<_> = out.split('\n').collect();
385        assert_eq!(lines.len(), 2, "expected 2 output lines, got: {:?}", out);
386        assert!(lines[0].contains(theme::string()), "line0 missing string color: {:?}", lines[0]);
387        assert!(lines[1].contains(theme::string()), "line1 missing string color: {:?}", lines[1]);
388    }
389
390    #[test]
391    fn malformed_input_does_not_panic_returns_plain_indent() {
392        // Deeply nested / garbage input historically tripped some highlighters.
393        // Our catch_unwind wrapper must keep us alive and return plain indent.
394        let nasty = "(".repeat(10_000);
395        let out = highlight_block(Some("rust"), &nasty, caps_color());
396        assert!(out.starts_with("  "), "must still produce indented output: {:?}", &out[..50.min(out.len())]);
397    }
398
399    #[test]
400    fn unknown_lang_after_syntect_returns_plain_indent() {
401        // syntect's find_syntax_by_token returns None for unknown -> plain indent.
402        let out = highlight_block(Some("frobnicate-xyz-not-a-language"), "x = 42", caps_color());
403        assert_eq!(out, "  x = 42");
404    }
405}