facet_showcase/
highlighter.rs

1//! Syntax highlighting support for showcases.
2
3use std::cell::RefCell;
4use std::collections::HashMap;
5use std::future::Future;
6use std::sync::LazyLock;
7use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker};
8
9use arborium::highlights::{HIGHLIGHTS, tag_for_capture};
10use arborium::theme::{self, Theme};
11use arborium::{Grammar, GrammarProvider, HighlightConfig, Injection, Span, StaticProvider};
12use miette_arborium::MietteHighlighter;
13use owo_colors::OwoColorize;
14
15const INDENT: &str = "    ";
16
17/// Supported languages for syntax highlighting.
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum Language {
20    /// JSON format
21    Json,
22    /// YAML format
23    Yaml,
24    /// XML format
25    Xml,
26    /// KDL format
27    Kdl,
28    /// Rust code (for type definitions)
29    Rust,
30}
31
32impl Language {
33    /// Returns the file extension used to look up the syntax.
34    pub fn extension(self) -> &'static str {
35        match self {
36            Language::Json => "json",
37            Language::Yaml => "yaml",
38            Language::Xml => "xml",
39            Language::Kdl => "kdl",
40            Language::Rust => "rs",
41        }
42    }
43
44    /// Returns a human-readable name for the language.
45    pub fn name(self) -> &'static str {
46        match self {
47            Language::Json => "JSON",
48            Language::Yaml => "YAML",
49            Language::Xml => "XML",
50            Language::Kdl => "KDL",
51            Language::Rust => "Rust",
52        }
53    }
54
55    fn arborium_name(self) -> &'static str {
56        match self {
57            Language::Json => "json",
58            Language::Yaml => "yaml",
59            Language::Xml => "xml",
60            Language::Kdl => "kdl",
61            Language::Rust => "rust",
62        }
63    }
64}
65
66/// Syntax highlighter using Tokyo Night theme powered by arborium.
67pub struct Highlighter {
68    engine: RefCell<ArboriumEngine>,
69    theme: Theme,
70}
71
72impl Default for Highlighter {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl Highlighter {
79    /// Create a new highlighter with the Tokyo Night theme.
80    pub fn new() -> Self {
81        Self {
82            engine: RefCell::new(ArboriumEngine::new()),
83            theme: theme::builtin::tokyo_night().clone(),
84        }
85    }
86
87    /// KDL grammars ship with arborium, so this is a no-op retained for API compatibility.
88    pub fn with_kdl_syntaxes(self, _syntax_dir: &str) -> Self {
89        self
90    }
91
92    /// Get a reference to the theme.
93    pub fn theme(&self) -> &Theme {
94        &self.theme
95    }
96
97    /// Highlight code and return terminal-escaped string.
98    pub fn highlight_to_terminal(&self, code: &str, lang: Language) -> String {
99        match self.collect_segments(code, lang) {
100            Some(segments) => {
101                render_segments_to_terminal(&segments, &self.theme, LineNumberMode::None)
102            }
103            None => self.plain_text_with_indent(code),
104        }
105    }
106
107    /// Highlight code with line numbers for terminal output.
108    pub fn highlight_to_terminal_with_line_numbers(&self, code: &str, lang: Language) -> String {
109        match self.collect_segments(code, lang) {
110            Some(segments) => {
111                render_segments_to_terminal(&segments, &self.theme, LineNumberMode::Numbers)
112            }
113            None => self.plain_text_with_line_numbers(code),
114        }
115    }
116
117    /// Build a miette highlighter using arborium.
118    pub fn build_miette_highlighter(&self, _lang: Language) -> MietteHighlighter {
119        MietteHighlighter::new()
120    }
121
122    /// Highlight code and return HTML with inline styles.
123    pub fn highlight_to_html(&self, code: &str, lang: Language) -> String {
124        match self.collect_segments(code, lang) {
125            Some(segments) => render_segments_to_html(&segments, &self.theme),
126            None => wrap_plain_text_html(code, &self.theme),
127        }
128    }
129
130    fn collect_segments<'a>(&'a self, code: &'a str, lang: Language) -> Option<Vec<Segment<'a>>> {
131        let mut engine = self.engine.borrow_mut();
132        let spans = engine.collect_spans(lang.arborium_name(), code)?;
133        Some(segments_from_spans(code, spans))
134    }
135
136    fn plain_text_with_indent(&self, code: &str) -> String {
137        let mut output = String::new();
138        for line in code.lines() {
139            output.push_str(INDENT);
140            output.push_str(line);
141            output.push('\n');
142        }
143        output
144    }
145
146    fn plain_text_with_line_numbers(&self, code: &str) -> String {
147        use owo_colors::OwoColorize;
148
149        let mut output = String::new();
150        for (i, line) in code.lines().enumerate() {
151            output.push_str(&format!(
152                "{} {} {}\n",
153                format!("{:3}", i + 1).dimmed(),
154                "│".dimmed(),
155                line
156            ));
157        }
158        output
159    }
160}
161
162/// Escape HTML special characters.
163pub fn html_escape(s: &str) -> String {
164    s.replace('&', "&amp;")
165        .replace('<', "&lt;")
166        .replace('>', "&gt;")
167        .replace('"', "&quot;")
168}
169
170/// Convert ANSI escape codes to HTML spans with inline styles.
171/// Uses non-breaking spaces to preserve alignment in monospace output.
172pub fn ansi_to_html(input: &str) -> String {
173    let mut output = String::new();
174    let mut chars = input.chars().peekable();
175    let mut in_span = false;
176
177    while let Some(c) = chars.next() {
178        if c == '\x1b' && chars.peek() == Some(&'[') {
179            chars.next(); // consume '['
180
181            // Parse the escape sequence
182            let mut seq = String::new();
183            while let Some(&ch) = chars.peek() {
184                if ch.is_ascii_digit() || ch == ';' {
185                    seq.push(chars.next().unwrap());
186                } else {
187                    break;
188                }
189            }
190
191            // Consume the final character (usually 'm')
192            let final_char = chars.next();
193
194            if final_char == Some('m') {
195                // Close any existing span
196                if in_span {
197                    output.push_str("</span>");
198                    in_span = false;
199                }
200
201                // Parse the style
202                if let Some(style) = parse_ansi_style(&seq)
203                    && !style.is_empty()
204                {
205                    output.push_str(&format!("<span style=\"{style}\">"));
206                    in_span = true;
207                }
208            }
209        } else if c == '<' {
210            output.push_str("&lt;");
211        } else if c == '>' {
212            output.push_str("&gt;");
213        } else if c == '&' {
214            output.push_str("&amp;");
215        } else if c == '`' {
216            // Escape backticks to prevent markdown interpretation
217            output.push_str("&#96;");
218        } else if c == ' ' {
219            // Use non-breaking space to preserve alignment
220            output.push('\u{00A0}');
221        } else {
222            output.push(c);
223        }
224    }
225
226    if in_span {
227        output.push_str("</span>");
228    }
229
230    output
231}
232
233/// Parse ANSI style codes and return CSS style string.
234fn parse_ansi_style(seq: &str) -> Option<String> {
235    if seq.is_empty() || seq == "0" {
236        return Some(String::new()); // Reset
237    }
238
239    let parts: Vec<&str> = seq.split(';').collect();
240    let mut styles = Vec::new();
241
242    let mut i = 0;
243    while i < parts.len() {
244        match parts[i] {
245            "0" => return Some(String::new()), // Reset
246            "1" => styles.push("font-weight:bold".to_string()),
247            "2" => styles.push("opacity:0.7".to_string()), // Dim
248            "3" => styles.push("font-style:italic".to_string()),
249            "4" => styles.push("text-decoration:underline".to_string()),
250            "30" => styles.push("color:#000".to_string()),
251            "31" => styles.push("color:#e06c75".to_string()), // Red
252            "32" => styles.push("color:#98c379".to_string()), // Green
253            "33" => styles.push("color:#e5c07b".to_string()), // Yellow
254            "34" => styles.push("color:#61afef".to_string()), // Blue
255            "35" => styles.push("color:#c678dd".to_string()), // Magenta
256            "36" => styles.push("color:#56b6c2".to_string()), // Cyan
257            "37" => styles.push("color:#abb2bf".to_string()), // White
258            "38" => {
259                // Extended color
260                if i + 1 < parts.len() && parts[i + 1] == "2" {
261                    // 24-bit RGB
262                    if i + 4 < parts.len() {
263                        let r = parts[i + 2];
264                        let g = parts[i + 3];
265                        let b = parts[i + 4];
266                        styles.push(format!("color:rgb({r},{g},{b})"));
267                        i += 4;
268                    }
269                } else if i + 1 < parts.len()
270                    && parts[i + 1] == "5"
271                    && i + 2 < parts.len()
272                    && let Ok(n) = parts[i + 2].parse::<u8>()
273                {
274                    let color = ansi_256_to_rgb(n);
275                    styles.push(format!("color:{color}"));
276                    i += 2;
277                }
278            }
279            "39" => styles.push("color:inherit".to_string()),
280            "40" => styles.push("background-color:#000".to_string()),
281            "41" => styles.push("background-color:#e06c75".to_string()),
282            "42" => styles.push("background-color:#98c379".to_string()),
283            "43" => styles.push("background-color:#e5c07b".to_string()),
284            "44" => styles.push("background-color:#61afef".to_string()),
285            "45" => styles.push("background-color:#c678dd".to_string()),
286            "46" => styles.push("background-color:#56b6c2".to_string()),
287            "47" => styles.push("background-color:#abb2bf".to_string()),
288            "48" => {
289                if i + 1 < parts.len() && parts[i + 1] == "2" {
290                    if i + 4 < parts.len() {
291                        let r = parts[i + 2];
292                        let g = parts[i + 3];
293                        let b = parts[i + 4];
294                        styles.push(format!("background-color:rgb({r},{g},{b})"));
295                        i += 4;
296                    }
297                } else if i + 1 < parts.len()
298                    && parts[i + 1] == "5"
299                    && i + 2 < parts.len()
300                    && let Ok(n) = parts[i + 2].parse::<u8>()
301                {
302                    let color = ansi_256_to_rgb(n);
303                    styles.push(format!("background-color:{color}"));
304                    i += 2;
305                }
306            }
307            "49" => styles.push("background-color:transparent".to_string()),
308            "90" => styles.push("color:#5c6370".to_string()), // Bright black (dim)
309            "91" => styles.push("color:#e06c75".to_string()), // Bright red
310            "92" => styles.push("color:#98c379".to_string()),
311            "93" => styles.push("color:#e5c07b".to_string()), // Bright yellow
312            "94" => styles.push("color:#61afef".to_string()),
313            "95" => styles.push("color:#c678dd".to_string()), // Bright magenta
314            "96" => styles.push("color:#56b6c2".to_string()),
315            "97" => styles.push("color:#fff".to_string()), // Bright white
316            _ => {}
317        }
318        i += 1;
319    }
320
321    if styles.is_empty() {
322        None
323    } else {
324        Some(styles.join(";"))
325    }
326}
327
328fn ansi_256_to_rgb(n: u8) -> &'static str {
329    match n {
330        0 => "#000000",
331        1 => "#800000",
332        2 => "#008000",
333        3 => "#808000",
334        4 => "#000080",
335        5 => "#800080",
336        6 => "#008080",
337        7 => "#c0c0c0",
338        8 => "#808080",
339        9 => "#ff0000",
340        10 => "#00ff00",
341        11 => "#ffff00",
342        12 => "#0000ff",
343        13 => "#ff00ff",
344        14 => "#00ffff",
345        15 => "#ffffff",
346        _ => "#888888",
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::Language;
353
354    #[test]
355    fn xml_language_metadata_is_exposed() {
356        assert_eq!(Language::Xml.name(), "XML");
357        assert_eq!(Language::Xml.extension(), "xml");
358    }
359}
360
361// ============================================================================
362// Internal helpers
363// ============================================================================
364
365struct ArboriumEngine {
366    provider: StaticProvider,
367    config: HighlightConfig,
368}
369
370impl ArboriumEngine {
371    fn new() -> Self {
372        Self {
373            provider: StaticProvider::new(),
374            config: HighlightConfig::default(),
375        }
376    }
377
378    fn collect_spans(&mut self, language: &str, source: &str) -> Option<Vec<Span>> {
379        let grammar = self.get_grammar(language)?;
380        let result = grammar.parse(source);
381        let mut spans = result.spans;
382        if !result.injections.is_empty() {
383            self.process_injections(
384                source,
385                result.injections,
386                0,
387                self.config.max_injection_depth,
388                &mut spans,
389            );
390        }
391        Some(spans)
392    }
393
394    fn process_injections(
395        &mut self,
396        source: &str,
397        injections: Vec<Injection>,
398        base_offset: u32,
399        remaining_depth: u32,
400        spans: &mut Vec<Span>,
401    ) {
402        if remaining_depth == 0 {
403            return;
404        }
405
406        for injection in injections {
407            let start = injection.start as usize;
408            let end = injection.end as usize;
409
410            if start >= end || end > source.len() {
411                continue;
412            }
413
414            let injected_text = &source[start..end];
415            let Some(grammar) = self.get_grammar_optional(&injection.language) else {
416                continue;
417            };
418
419            let result = grammar.parse(injected_text);
420            spans.extend(result.spans.into_iter().map(|mut span| {
421                span.start += base_offset + injection.start;
422                span.end += base_offset + injection.start;
423                span
424            }));
425
426            if !result.injections.is_empty() {
427                self.process_injections(
428                    injected_text,
429                    result.injections,
430                    base_offset + injection.start,
431                    remaining_depth - 1,
432                    spans,
433                );
434            }
435        }
436    }
437
438    fn get_grammar(
439        &mut self,
440        language: &str,
441    ) -> Option<&mut <StaticProvider as arborium::GrammarProvider>::Grammar> {
442        self.poll_provider(language)
443    }
444
445    fn get_grammar_optional(
446        &mut self,
447        language: &str,
448    ) -> Option<&mut <StaticProvider as arborium::GrammarProvider>::Grammar> {
449        self.poll_provider(language)
450    }
451
452    fn poll_provider(
453        &mut self,
454        language: &str,
455    ) -> Option<&mut <StaticProvider as arborium::GrammarProvider>::Grammar> {
456        let future = self.provider.get(language);
457        let mut future = std::pin::pin!(future);
458        let waker = noop_waker();
459        let mut cx = Context::from_waker(&waker);
460        match future.as_mut().poll(&mut cx) {
461            Poll::Ready(result) => result,
462            Poll::Pending => None,
463        }
464    }
465}
466
467#[derive(Clone, Copy, PartialEq, Eq)]
468enum LineNumberMode {
469    None,
470    Numbers,
471}
472
473struct Segment<'a> {
474    text: &'a str,
475    tag: Option<&'static str>,
476}
477
478fn render_segments_to_terminal(
479    segments: &[Segment<'_>],
480    theme: &Theme,
481    mode: LineNumberMode,
482) -> String {
483    let mut output = String::new();
484    let mut active_code: Option<String> = None;
485    let mut line = 1usize;
486    let mut at_line_start = true;
487
488    for segment in segments {
489        let target_code = segment
490            .tag
491            .and_then(|tag| ansi_for_tag(theme, tag))
492            .filter(|s| !s.is_empty());
493
494        if target_code != active_code {
495            output.push_str(Theme::ANSI_RESET);
496            if let Some(code) = &target_code {
497                output.push_str(code);
498            }
499            active_code = target_code;
500        }
501
502        for ch in segment.text.chars() {
503            if at_line_start {
504                output.push_str(Theme::ANSI_RESET);
505                output.push_str(&line_prefix(mode, line));
506                if let Some(code) = &active_code {
507                    output.push_str(code);
508                }
509                at_line_start = false;
510            }
511            output.push(ch);
512            if ch == '\n' {
513                at_line_start = true;
514                line += 1;
515            }
516        }
517    }
518
519    output.push_str(Theme::ANSI_RESET);
520    if !output.ends_with('\n') {
521        output.push('\n');
522    }
523    output
524}
525
526fn render_segments_to_html(segments: &[Segment<'_>], theme: &Theme) -> String {
527    let mut body = String::new();
528    for segment in segments {
529        let escaped = html_escape(segment.text);
530        if let Some(tag) = segment.tag {
531            if let Some(style) = css_for_tag(theme, tag) {
532                body.push_str("<span style=\"");
533                body.push_str(&style);
534                body.push_str("\">");
535                body.push_str(&escaped);
536                body.push_str("</span>");
537            } else {
538                body.push_str(&escaped);
539            }
540        } else {
541            body.push_str(&escaped);
542        }
543    }
544    wrap_with_pre(body, theme)
545}
546
547fn wrap_plain_text_html(code: &str, theme: &Theme) -> String {
548    wrap_with_pre(html_escape(code), theme)
549}
550
551fn wrap_with_pre(content: String, theme: &Theme) -> String {
552    let mut styles = Vec::new();
553    if let Some(bg) = theme.background {
554        styles.push(format!("background-color:{};", bg.to_hex()));
555    }
556    if let Some(fg) = theme.foreground {
557        styles.push(format!("color:{};", fg.to_hex()));
558    }
559    styles.push("padding:12px;".to_string());
560    styles.push("border-radius:8px;".to_string());
561    styles.push(
562        "font-family:var(--facet-mono, SFMono-Regular, Consolas, 'Liberation Mono', monospace);"
563            .to_string(),
564    );
565    styles.push("font-size:0.9rem;".to_string());
566    styles.push("overflow:auto;".to_string());
567    format!(
568        "<pre style=\"{}\"><code>{}</code></pre>",
569        styles.join(" "),
570        content
571    )
572}
573
574fn line_prefix(mode: LineNumberMode, line: usize) -> String {
575    match mode {
576        LineNumberMode::None => INDENT.to_string(),
577        LineNumberMode::Numbers => format!("{} {} ", format!("{:3}", line).dimmed(), "│".dimmed()),
578    }
579}
580
581fn segments_from_spans<'a>(source: &'a str, spans: Vec<Span>) -> Vec<Segment<'a>> {
582    if source.is_empty() {
583        return vec![Segment {
584            text: "",
585            tag: None,
586        }];
587    }
588
589    let normalized = normalize_and_coalesce(dedup_spans(spans));
590    if normalized.is_empty() {
591        return vec![Segment {
592            text: source,
593            tag: None,
594        }];
595    }
596
597    let mut events: Vec<(u32, bool, usize)> = Vec::new();
598    for (idx, span) in normalized.iter().enumerate() {
599        events.push((span.start, true, idx));
600        events.push((span.end, false, idx));
601    }
602    events.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
603
604    let mut segments = Vec::new();
605    let mut last_pos = 0usize;
606    let mut stack: Vec<usize> = Vec::new();
607
608    for (pos, is_start, idx) in events {
609        let pos = pos as usize;
610        if pos > last_pos && pos <= source.len() {
611            let text = &source[last_pos..pos];
612            let tag = stack.last().map(|&active| normalized[active].tag);
613            segments.push(Segment { text, tag });
614            last_pos = pos;
615        }
616
617        if is_start {
618            stack.push(idx);
619        } else if let Some(position) = stack.iter().rposition(|&active| active == idx) {
620            stack.remove(position);
621        }
622    }
623
624    if last_pos < source.len() {
625        let tag = stack.last().map(|&active| normalized[active].tag);
626        segments.push(Segment {
627            text: &source[last_pos..],
628            tag,
629        });
630    }
631
632    segments
633}
634
635fn dedup_spans(mut spans: Vec<Span>) -> Vec<Span> {
636    spans.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)));
637    let mut deduped = HashMap::new();
638    for span in spans {
639        let key = (span.start, span.end);
640        let new_has_style = tag_for_capture(&span.capture).is_some();
641        deduped
642            .entry(key)
643            .and_modify(|existing: &mut Span| {
644                let existing_has_style = tag_for_capture(&existing.capture).is_some();
645                if new_has_style || !existing_has_style {
646                    *existing = span.clone();
647                }
648            })
649            .or_insert(span);
650    }
651    deduped.into_values().collect()
652}
653
654struct NormalizedSpan {
655    start: u32,
656    end: u32,
657    tag: &'static str,
658}
659
660fn normalize_and_coalesce(spans: Vec<Span>) -> Vec<NormalizedSpan> {
661    let mut normalized: Vec<NormalizedSpan> = spans
662        .into_iter()
663        .filter_map(|span| {
664            let tag = tag_for_capture(&span.capture)?;
665            Some(NormalizedSpan {
666                start: span.start,
667                end: span.end,
668                tag,
669            })
670        })
671        .collect();
672
673    if normalized.is_empty() {
674        return normalized;
675    }
676
677    normalized.sort_by_key(|s| (s.start, s.end));
678    let mut coalesced: Vec<NormalizedSpan> = Vec::with_capacity(normalized.len());
679
680    for span in normalized {
681        if let Some(last) = coalesced.last_mut()
682            && span.tag == last.tag
683            && span.start <= last.end
684        {
685            last.end = last.end.max(span.end);
686            continue;
687        }
688        coalesced.push(span);
689    }
690
691    coalesced
692}
693
694static TAG_TO_INDEX: LazyLock<HashMap<&'static str, usize>> = LazyLock::new(|| {
695    let mut map = HashMap::new();
696    for (idx, highlight) in HIGHLIGHTS.iter().enumerate() {
697        if !highlight.tag.is_empty() {
698            map.insert(highlight.tag, idx);
699        }
700    }
701    map
702});
703
704fn css_for_tag(theme: &Theme, tag: &str) -> Option<String> {
705    let index = find_style_index(theme, tag)?;
706    let style = theme.style(index)?;
707    if style.is_empty() {
708        return None;
709    }
710
711    let mut parts = Vec::new();
712    if let Some(fg) = style.fg {
713        parts.push(format!("color:{};", fg.to_hex()));
714    }
715    if let Some(bg) = style.bg {
716        parts.push(format!("background-color:{};", bg.to_hex()));
717    }
718    if style.modifiers.bold {
719        parts.push("font-weight:bold;".to_string());
720    }
721    if style.modifiers.italic {
722        parts.push("font-style:italic;".to_string());
723    }
724    let mut decorations = Vec::new();
725    if style.modifiers.underline {
726        decorations.push("underline");
727    }
728    if style.modifiers.strikethrough {
729        decorations.push("line-through");
730    }
731    if !decorations.is_empty() {
732        parts.push(format!("text-decoration:{};", decorations.join(" ")));
733    }
734
735    if parts.is_empty() {
736        None
737    } else {
738        Some(parts.join(" "))
739    }
740}
741
742fn ansi_for_tag(theme: &Theme, tag: &str) -> Option<String> {
743    let index = find_style_index(theme, tag)?;
744    let ansi = theme.ansi_style(index);
745    if ansi.is_empty() { None } else { Some(ansi) }
746}
747
748fn find_style_index(theme: &Theme, tag: &str) -> Option<usize> {
749    let mut current = tag.strip_prefix("a-").unwrap_or(tag);
750    loop {
751        let &idx = TAG_TO_INDEX.get(current)?;
752        if theme
753            .style(idx)
754            .map(|style| !style.is_empty())
755            .unwrap_or(false)
756        {
757            return Some(idx);
758        }
759        let parent = HIGHLIGHTS[idx].parent_tag;
760        if parent.is_empty() {
761            return None;
762        }
763        current = parent;
764    }
765}
766
767fn noop_waker() -> Waker {
768    const VTABLE: RawWakerVTable = RawWakerVTable::new(|_| RAW_WAKER, |_| {}, |_| {}, |_| {});
769    const RAW_WAKER: RawWaker = RawWaker::new(std::ptr::null(), &VTABLE);
770    unsafe { Waker::from_raw(RAW_WAKER) }
771}