1use std::cell::RefCell;
4use std::collections::HashMap;
5use std::future::Future;
6use std::sync::LazyLock;
7use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker};
8
9use arborium::highlights::{HIGHLIGHTS, tag_for_capture};
10use arborium::theme::{self, Theme};
11use arborium::{Grammar, GrammarProvider, HighlightConfig, Injection, Span, StaticProvider};
12use miette_arborium::MietteHighlighter;
13use owo_colors::OwoColorize;
14
15const INDENT: &str = " ";
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum Language {
20 Json,
22 Yaml,
24 Xml,
26 Kdl,
28 Rust,
30}
31
32impl Language {
33 pub fn extension(self) -> &'static str {
35 match self {
36 Language::Json => "json",
37 Language::Yaml => "yaml",
38 Language::Xml => "xml",
39 Language::Kdl => "kdl",
40 Language::Rust => "rs",
41 }
42 }
43
44 pub fn name(self) -> &'static str {
46 match self {
47 Language::Json => "JSON",
48 Language::Yaml => "YAML",
49 Language::Xml => "XML",
50 Language::Kdl => "KDL",
51 Language::Rust => "Rust",
52 }
53 }
54
55 fn arborium_name(self) -> &'static str {
56 match self {
57 Language::Json => "json",
58 Language::Yaml => "yaml",
59 Language::Xml => "xml",
60 Language::Kdl => "kdl",
61 Language::Rust => "rust",
62 }
63 }
64}
65
66pub struct Highlighter {
68 engine: RefCell<ArboriumEngine>,
69 theme: Theme,
70}
71
72impl Default for Highlighter {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl Highlighter {
79 pub fn new() -> Self {
81 Self {
82 engine: RefCell::new(ArboriumEngine::new()),
83 theme: theme::builtin::tokyo_night().clone(),
84 }
85 }
86
87 pub fn with_kdl_syntaxes(self, _syntax_dir: &str) -> Self {
89 self
90 }
91
92 pub fn theme(&self) -> &Theme {
94 &self.theme
95 }
96
97 pub fn highlight_to_terminal(&self, code: &str, lang: Language) -> String {
99 match self.collect_segments(code, lang) {
100 Some(segments) => {
101 render_segments_to_terminal(&segments, &self.theme, LineNumberMode::None)
102 }
103 None => self.plain_text_with_indent(code),
104 }
105 }
106
107 pub fn highlight_to_terminal_with_line_numbers(&self, code: &str, lang: Language) -> String {
109 match self.collect_segments(code, lang) {
110 Some(segments) => {
111 render_segments_to_terminal(&segments, &self.theme, LineNumberMode::Numbers)
112 }
113 None => self.plain_text_with_line_numbers(code),
114 }
115 }
116
117 pub fn build_miette_highlighter(&self, _lang: Language) -> MietteHighlighter {
119 MietteHighlighter::new()
120 }
121
122 pub fn highlight_to_html(&self, code: &str, lang: Language) -> String {
124 match self.collect_segments(code, lang) {
125 Some(segments) => render_segments_to_html(&segments, &self.theme),
126 None => wrap_plain_text_html(code, &self.theme),
127 }
128 }
129
130 fn collect_segments<'a>(&'a self, code: &'a str, lang: Language) -> Option<Vec<Segment<'a>>> {
131 let mut engine = self.engine.borrow_mut();
132 let spans = engine.collect_spans(lang.arborium_name(), code)?;
133 Some(segments_from_spans(code, spans))
134 }
135
136 fn plain_text_with_indent(&self, code: &str) -> String {
137 let mut output = String::new();
138 for line in code.lines() {
139 output.push_str(INDENT);
140 output.push_str(line);
141 output.push('\n');
142 }
143 output
144 }
145
146 fn plain_text_with_line_numbers(&self, code: &str) -> String {
147 use owo_colors::OwoColorize;
148
149 let mut output = String::new();
150 for (i, line) in code.lines().enumerate() {
151 output.push_str(&format!(
152 "{} {} {}\n",
153 format!("{:3}", i + 1).dimmed(),
154 "│".dimmed(),
155 line
156 ));
157 }
158 output
159 }
160}
161
162pub fn html_escape(s: &str) -> String {
164 s.replace('&', "&")
165 .replace('<', "<")
166 .replace('>', ">")
167 .replace('"', """)
168}
169
170pub fn ansi_to_html(input: &str) -> String {
173 let mut output = String::new();
174 let mut chars = input.chars().peekable();
175 let mut in_span = false;
176
177 while let Some(c) = chars.next() {
178 if c == '\x1b' && chars.peek() == Some(&'[') {
179 chars.next(); let mut seq = String::new();
183 while let Some(&ch) = chars.peek() {
184 if ch.is_ascii_digit() || ch == ';' {
185 seq.push(chars.next().unwrap());
186 } else {
187 break;
188 }
189 }
190
191 let final_char = chars.next();
193
194 if final_char == Some('m') {
195 if in_span {
197 output.push_str("</span>");
198 in_span = false;
199 }
200
201 if let Some(style) = parse_ansi_style(&seq)
203 && !style.is_empty()
204 {
205 output.push_str(&format!("<span style=\"{style}\">"));
206 in_span = true;
207 }
208 }
209 } else if c == '<' {
210 output.push_str("<");
211 } else if c == '>' {
212 output.push_str(">");
213 } else if c == '&' {
214 output.push_str("&");
215 } else if c == '`' {
216 output.push_str("`");
218 } else if c == ' ' {
219 output.push('\u{00A0}');
221 } else {
222 output.push(c);
223 }
224 }
225
226 if in_span {
227 output.push_str("</span>");
228 }
229
230 output
231}
232
233fn parse_ansi_style(seq: &str) -> Option<String> {
235 if seq.is_empty() || seq == "0" {
236 return Some(String::new()); }
238
239 let parts: Vec<&str> = seq.split(';').collect();
240 let mut styles = Vec::new();
241
242 let mut i = 0;
243 while i < parts.len() {
244 match parts[i] {
245 "0" => return Some(String::new()), "1" => styles.push("font-weight:bold".to_string()),
247 "2" => styles.push("opacity:0.7".to_string()), "3" => styles.push("font-style:italic".to_string()),
249 "4" => styles.push("text-decoration:underline".to_string()),
250 "30" => styles.push("color:#000".to_string()),
251 "31" => styles.push("color:#e06c75".to_string()), "32" => styles.push("color:#98c379".to_string()), "33" => styles.push("color:#e5c07b".to_string()), "34" => styles.push("color:#61afef".to_string()), "35" => styles.push("color:#c678dd".to_string()), "36" => styles.push("color:#56b6c2".to_string()), "37" => styles.push("color:#abb2bf".to_string()), "38" => {
259 if i + 1 < parts.len() && parts[i + 1] == "2" {
261 if i + 4 < parts.len() {
263 let r = parts[i + 2];
264 let g = parts[i + 3];
265 let b = parts[i + 4];
266 styles.push(format!("color:rgb({r},{g},{b})"));
267 i += 4;
268 }
269 } else if i + 1 < parts.len()
270 && parts[i + 1] == "5"
271 && i + 2 < parts.len()
272 && let Ok(n) = parts[i + 2].parse::<u8>()
273 {
274 let color = ansi_256_to_rgb(n);
275 styles.push(format!("color:{color}"));
276 i += 2;
277 }
278 }
279 "39" => styles.push("color:inherit".to_string()),
280 "40" => styles.push("background-color:#000".to_string()),
281 "41" => styles.push("background-color:#e06c75".to_string()),
282 "42" => styles.push("background-color:#98c379".to_string()),
283 "43" => styles.push("background-color:#e5c07b".to_string()),
284 "44" => styles.push("background-color:#61afef".to_string()),
285 "45" => styles.push("background-color:#c678dd".to_string()),
286 "46" => styles.push("background-color:#56b6c2".to_string()),
287 "47" => styles.push("background-color:#abb2bf".to_string()),
288 "48" => {
289 if i + 1 < parts.len() && parts[i + 1] == "2" {
290 if i + 4 < parts.len() {
291 let r = parts[i + 2];
292 let g = parts[i + 3];
293 let b = parts[i + 4];
294 styles.push(format!("background-color:rgb({r},{g},{b})"));
295 i += 4;
296 }
297 } else if i + 1 < parts.len()
298 && parts[i + 1] == "5"
299 && i + 2 < parts.len()
300 && let Ok(n) = parts[i + 2].parse::<u8>()
301 {
302 let color = ansi_256_to_rgb(n);
303 styles.push(format!("background-color:{color}"));
304 i += 2;
305 }
306 }
307 "49" => styles.push("background-color:transparent".to_string()),
308 "90" => styles.push("color:#5c6370".to_string()), "91" => styles.push("color:#e06c75".to_string()), "92" => styles.push("color:#98c379".to_string()),
311 "93" => styles.push("color:#e5c07b".to_string()), "94" => styles.push("color:#61afef".to_string()),
313 "95" => styles.push("color:#c678dd".to_string()), "96" => styles.push("color:#56b6c2".to_string()),
315 "97" => styles.push("color:#fff".to_string()), _ => {}
317 }
318 i += 1;
319 }
320
321 if styles.is_empty() {
322 None
323 } else {
324 Some(styles.join(";"))
325 }
326}
327
328fn ansi_256_to_rgb(n: u8) -> &'static str {
329 match n {
330 0 => "#000000",
331 1 => "#800000",
332 2 => "#008000",
333 3 => "#808000",
334 4 => "#000080",
335 5 => "#800080",
336 6 => "#008080",
337 7 => "#c0c0c0",
338 8 => "#808080",
339 9 => "#ff0000",
340 10 => "#00ff00",
341 11 => "#ffff00",
342 12 => "#0000ff",
343 13 => "#ff00ff",
344 14 => "#00ffff",
345 15 => "#ffffff",
346 _ => "#888888",
347 }
348}
349
350#[cfg(test)]
351mod tests {
352 use super::Language;
353
354 #[test]
355 fn xml_language_metadata_is_exposed() {
356 assert_eq!(Language::Xml.name(), "XML");
357 assert_eq!(Language::Xml.extension(), "xml");
358 }
359}
360
361struct ArboriumEngine {
366 provider: StaticProvider,
367 config: HighlightConfig,
368}
369
370impl ArboriumEngine {
371 fn new() -> Self {
372 Self {
373 provider: StaticProvider::new(),
374 config: HighlightConfig::default(),
375 }
376 }
377
378 fn collect_spans(&mut self, language: &str, source: &str) -> Option<Vec<Span>> {
379 let grammar = self.get_grammar(language)?;
380 let result = grammar.parse(source);
381 let mut spans = result.spans;
382 if !result.injections.is_empty() {
383 self.process_injections(
384 source,
385 result.injections,
386 0,
387 self.config.max_injection_depth,
388 &mut spans,
389 );
390 }
391 Some(spans)
392 }
393
394 fn process_injections(
395 &mut self,
396 source: &str,
397 injections: Vec<Injection>,
398 base_offset: u32,
399 remaining_depth: u32,
400 spans: &mut Vec<Span>,
401 ) {
402 if remaining_depth == 0 {
403 return;
404 }
405
406 for injection in injections {
407 let start = injection.start as usize;
408 let end = injection.end as usize;
409
410 if start >= end || end > source.len() {
411 continue;
412 }
413
414 let injected_text = &source[start..end];
415 let Some(grammar) = self.get_grammar_optional(&injection.language) else {
416 continue;
417 };
418
419 let result = grammar.parse(injected_text);
420 spans.extend(result.spans.into_iter().map(|mut span| {
421 span.start += base_offset + injection.start;
422 span.end += base_offset + injection.start;
423 span
424 }));
425
426 if !result.injections.is_empty() {
427 self.process_injections(
428 injected_text,
429 result.injections,
430 base_offset + injection.start,
431 remaining_depth - 1,
432 spans,
433 );
434 }
435 }
436 }
437
438 fn get_grammar(
439 &mut self,
440 language: &str,
441 ) -> Option<&mut <StaticProvider as arborium::GrammarProvider>::Grammar> {
442 self.poll_provider(language)
443 }
444
445 fn get_grammar_optional(
446 &mut self,
447 language: &str,
448 ) -> Option<&mut <StaticProvider as arborium::GrammarProvider>::Grammar> {
449 self.poll_provider(language)
450 }
451
452 fn poll_provider(
453 &mut self,
454 language: &str,
455 ) -> Option<&mut <StaticProvider as arborium::GrammarProvider>::Grammar> {
456 let future = self.provider.get(language);
457 let mut future = std::pin::pin!(future);
458 let waker = noop_waker();
459 let mut cx = Context::from_waker(&waker);
460 match future.as_mut().poll(&mut cx) {
461 Poll::Ready(result) => result,
462 Poll::Pending => None,
463 }
464 }
465}
466
467#[derive(Clone, Copy, PartialEq, Eq)]
468enum LineNumberMode {
469 None,
470 Numbers,
471}
472
473struct Segment<'a> {
474 text: &'a str,
475 tag: Option<&'static str>,
476}
477
478fn render_segments_to_terminal(
479 segments: &[Segment<'_>],
480 theme: &Theme,
481 mode: LineNumberMode,
482) -> String {
483 let mut output = String::new();
484 let mut active_code: Option<String> = None;
485 let mut line = 1usize;
486 let mut at_line_start = true;
487
488 for segment in segments {
489 let target_code = segment
490 .tag
491 .and_then(|tag| ansi_for_tag(theme, tag))
492 .filter(|s| !s.is_empty());
493
494 if target_code != active_code {
495 output.push_str(Theme::ANSI_RESET);
496 if let Some(code) = &target_code {
497 output.push_str(code);
498 }
499 active_code = target_code;
500 }
501
502 for ch in segment.text.chars() {
503 if at_line_start {
504 output.push_str(Theme::ANSI_RESET);
505 output.push_str(&line_prefix(mode, line));
506 if let Some(code) = &active_code {
507 output.push_str(code);
508 }
509 at_line_start = false;
510 }
511 output.push(ch);
512 if ch == '\n' {
513 at_line_start = true;
514 line += 1;
515 }
516 }
517 }
518
519 output.push_str(Theme::ANSI_RESET);
520 if !output.ends_with('\n') {
521 output.push('\n');
522 }
523 output
524}
525
526fn render_segments_to_html(segments: &[Segment<'_>], theme: &Theme) -> String {
527 let mut body = String::new();
528 for segment in segments {
529 let escaped = html_escape(segment.text);
530 if let Some(tag) = segment.tag {
531 if let Some(style) = css_for_tag(theme, tag) {
532 body.push_str("<span style=\"");
533 body.push_str(&style);
534 body.push_str("\">");
535 body.push_str(&escaped);
536 body.push_str("</span>");
537 } else {
538 body.push_str(&escaped);
539 }
540 } else {
541 body.push_str(&escaped);
542 }
543 }
544 wrap_with_pre(body, theme)
545}
546
547fn wrap_plain_text_html(code: &str, theme: &Theme) -> String {
548 wrap_with_pre(html_escape(code), theme)
549}
550
551fn wrap_with_pre(content: String, theme: &Theme) -> String {
552 let mut styles = Vec::new();
553 if let Some(bg) = theme.background {
554 styles.push(format!("background-color:{};", bg.to_hex()));
555 }
556 if let Some(fg) = theme.foreground {
557 styles.push(format!("color:{};", fg.to_hex()));
558 }
559 styles.push("padding:12px;".to_string());
560 styles.push("border-radius:8px;".to_string());
561 styles.push(
562 "font-family:var(--facet-mono, SFMono-Regular, Consolas, 'Liberation Mono', monospace);"
563 .to_string(),
564 );
565 styles.push("font-size:0.9rem;".to_string());
566 styles.push("overflow:auto;".to_string());
567 format!(
568 "<pre style=\"{}\"><code>{}</code></pre>",
569 styles.join(" "),
570 content
571 )
572}
573
574fn line_prefix(mode: LineNumberMode, line: usize) -> String {
575 match mode {
576 LineNumberMode::None => INDENT.to_string(),
577 LineNumberMode::Numbers => format!("{} {} ", format!("{:3}", line).dimmed(), "│".dimmed()),
578 }
579}
580
581fn segments_from_spans<'a>(source: &'a str, spans: Vec<Span>) -> Vec<Segment<'a>> {
582 if source.is_empty() {
583 return vec![Segment {
584 text: "",
585 tag: None,
586 }];
587 }
588
589 let normalized = normalize_and_coalesce(dedup_spans(spans));
590 if normalized.is_empty() {
591 return vec![Segment {
592 text: source,
593 tag: None,
594 }];
595 }
596
597 let mut events: Vec<(u32, bool, usize)> = Vec::new();
598 for (idx, span) in normalized.iter().enumerate() {
599 events.push((span.start, true, idx));
600 events.push((span.end, false, idx));
601 }
602 events.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
603
604 let mut segments = Vec::new();
605 let mut last_pos = 0usize;
606 let mut stack: Vec<usize> = Vec::new();
607
608 for (pos, is_start, idx) in events {
609 let pos = pos as usize;
610 if pos > last_pos && pos <= source.len() {
611 let text = &source[last_pos..pos];
612 let tag = stack.last().map(|&active| normalized[active].tag);
613 segments.push(Segment { text, tag });
614 last_pos = pos;
615 }
616
617 if is_start {
618 stack.push(idx);
619 } else if let Some(position) = stack.iter().rposition(|&active| active == idx) {
620 stack.remove(position);
621 }
622 }
623
624 if last_pos < source.len() {
625 let tag = stack.last().map(|&active| normalized[active].tag);
626 segments.push(Segment {
627 text: &source[last_pos..],
628 tag,
629 });
630 }
631
632 segments
633}
634
635fn dedup_spans(mut spans: Vec<Span>) -> Vec<Span> {
636 spans.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)));
637 let mut deduped = HashMap::new();
638 for span in spans {
639 let key = (span.start, span.end);
640 let new_has_style = tag_for_capture(&span.capture).is_some();
641 deduped
642 .entry(key)
643 .and_modify(|existing: &mut Span| {
644 let existing_has_style = tag_for_capture(&existing.capture).is_some();
645 if new_has_style || !existing_has_style {
646 *existing = span.clone();
647 }
648 })
649 .or_insert(span);
650 }
651 deduped.into_values().collect()
652}
653
654struct NormalizedSpan {
655 start: u32,
656 end: u32,
657 tag: &'static str,
658}
659
660fn normalize_and_coalesce(spans: Vec<Span>) -> Vec<NormalizedSpan> {
661 let mut normalized: Vec<NormalizedSpan> = spans
662 .into_iter()
663 .filter_map(|span| {
664 let tag = tag_for_capture(&span.capture)?;
665 Some(NormalizedSpan {
666 start: span.start,
667 end: span.end,
668 tag,
669 })
670 })
671 .collect();
672
673 if normalized.is_empty() {
674 return normalized;
675 }
676
677 normalized.sort_by_key(|s| (s.start, s.end));
678 let mut coalesced: Vec<NormalizedSpan> = Vec::with_capacity(normalized.len());
679
680 for span in normalized {
681 if let Some(last) = coalesced.last_mut()
682 && span.tag == last.tag
683 && span.start <= last.end
684 {
685 last.end = last.end.max(span.end);
686 continue;
687 }
688 coalesced.push(span);
689 }
690
691 coalesced
692}
693
694static TAG_TO_INDEX: LazyLock<HashMap<&'static str, usize>> = LazyLock::new(|| {
695 let mut map = HashMap::new();
696 for (idx, highlight) in HIGHLIGHTS.iter().enumerate() {
697 if !highlight.tag.is_empty() {
698 map.insert(highlight.tag, idx);
699 }
700 }
701 map
702});
703
704fn css_for_tag(theme: &Theme, tag: &str) -> Option<String> {
705 let index = find_style_index(theme, tag)?;
706 let style = theme.style(index)?;
707 if style.is_empty() {
708 return None;
709 }
710
711 let mut parts = Vec::new();
712 if let Some(fg) = style.fg {
713 parts.push(format!("color:{};", fg.to_hex()));
714 }
715 if let Some(bg) = style.bg {
716 parts.push(format!("background-color:{};", bg.to_hex()));
717 }
718 if style.modifiers.bold {
719 parts.push("font-weight:bold;".to_string());
720 }
721 if style.modifiers.italic {
722 parts.push("font-style:italic;".to_string());
723 }
724 let mut decorations = Vec::new();
725 if style.modifiers.underline {
726 decorations.push("underline");
727 }
728 if style.modifiers.strikethrough {
729 decorations.push("line-through");
730 }
731 if !decorations.is_empty() {
732 parts.push(format!("text-decoration:{};", decorations.join(" ")));
733 }
734
735 if parts.is_empty() {
736 None
737 } else {
738 Some(parts.join(" "))
739 }
740}
741
742fn ansi_for_tag(theme: &Theme, tag: &str) -> Option<String> {
743 let index = find_style_index(theme, tag)?;
744 let ansi = theme.ansi_style(index);
745 if ansi.is_empty() { None } else { Some(ansi) }
746}
747
748fn find_style_index(theme: &Theme, tag: &str) -> Option<usize> {
749 let mut current = tag.strip_prefix("a-").unwrap_or(tag);
750 loop {
751 let &idx = TAG_TO_INDEX.get(current)?;
752 if theme
753 .style(idx)
754 .map(|style| !style.is_empty())
755 .unwrap_or(false)
756 {
757 return Some(idx);
758 }
759 let parent = HIGHLIGHTS[idx].parent_tag;
760 if parent.is_empty() {
761 return None;
762 }
763 current = parent;
764 }
765}
766
767fn noop_waker() -> Waker {
768 const VTABLE: RawWakerVTable = RawWakerVTable::new(|_| RAW_WAKER, |_| {}, |_| {}, |_| {});
769 const RAW_WAKER: RawWaker = RawWaker::new(std::ptr::null(), &VTABLE);
770 unsafe { Waker::from_raw(RAW_WAKER) }
771}