streamdown_syntax/
lib.rs

1//! Streamdown Syntax
2//!
3//! This crate provides syntax highlighting for code blocks using the syntect library.
4//! It's designed to work with streaming input (line-by-line) for real-time rendering.
5//!
6//! # Features
7//!
8//! - **Streaming highlighting** - Maintain state across lines for multi-line tokens
9//! - **Language aliases** - Map common names (py, js, ts) to proper syntax definitions
10//! - **Background override** - Override theme background for consistent code block styling
11//! - **ANSI output** - Generate 24-bit true color terminal escape codes
12//!
13//! # Example
14//!
15//! ```
16//! use streamdown_syntax::Highlighter;
17//!
18//! let highlighter = Highlighter::new();
19//!
20//! // Highlight a complete code block
21//! let code = "fn main() {\n    println!(\"Hello!\");\n}";
22//! let highlighted = highlighter.highlight_block(code, "rust");
23//!
24//! // For streaming, use HighlightState
25//! use streamdown_syntax::HighlightState;
26//! let mut hl = Highlighter::new();
27//! let mut state = hl.new_highlight_state("rust");
28//! let line1 = hl.highlight_line_with_state("fn main() {", &mut state);
29//! let line2 = hl.highlight_line_with_state("    println!(\"Hello!\");", &mut state);
30//! ```
31
32mod languages;
33
34pub use languages::{all_aliases, aliases_for, language_alias, LANGUAGE_ALIASES};
35
36use syntect::easy::HighlightLines;
37use syntect::highlighting::{Color, FontStyle, Style, Theme, ThemeSet};
38use syntect::parsing::{SyntaxReference, SyntaxSet};
39use syntect::util::as_24_bit_terminal_escaped;
40
41/// Reset ANSI escape code
42const RESET: &str = "\x1b[0m";
43
44/// Syntax highlighter for code blocks.
45///
46/// Wraps syntect to provide a streaming-friendly API with language aliases
47/// and background color override support.
48pub struct Highlighter {
49    /// Syntax definitions
50    syntax_set: SyntaxSet,
51    /// Color themes
52    theme_set: ThemeSet,
53    /// Current theme name
54    theme_name: String,
55    /// Optional background color override (RGB)
56    background_override: Option<(u8, u8, u8)>,
57}
58
59impl std::fmt::Debug for Highlighter {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        f.debug_struct("Highlighter")
62            .field("theme_name", &self.theme_name)
63            .field("background_override", &self.background_override)
64            .finish()
65    }
66}
67
68impl Default for Highlighter {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74impl Highlighter {
75    /// Create a new highlighter with the default theme (base16-ocean.dark).
76    pub fn new() -> Self {
77        Self::with_theme("base16-ocean.dark")
78    }
79
80    /// Create a highlighter with a specific theme.
81    ///
82    /// Available built-in themes:
83    /// - "base16-ocean.dark"
84    /// - "base16-ocean.light"
85    /// - "base16-eighties.dark"
86    /// - "base16-mocha.dark"
87    /// - "InspiredGitHub"
88    /// - "Solarized (dark)"
89    /// - "Solarized (light)"
90    pub fn with_theme(theme_name: &str) -> Self {
91        Self {
92            syntax_set: SyntaxSet::load_defaults_newlines(),
93            theme_set: ThemeSet::load_defaults(),
94            theme_name: theme_name.to_string(),
95            background_override: None,
96        }
97    }
98
99    /// Get a reference to the syntax set.
100    pub fn syntax_set(&self) -> &SyntaxSet {
101        &self.syntax_set
102    }
103
104    /// Get a reference to the theme set.
105    pub fn theme_set(&self) -> &ThemeSet {
106        &self.theme_set
107    }
108
109    /// Set the current theme.
110    pub fn set_theme(&mut self, theme_name: &str) {
111        self.theme_name = theme_name.to_string();
112    }
113
114    /// Get the current theme name.
115    pub fn theme_name(&self) -> &str {
116        &self.theme_name
117    }
118
119    /// Get the current theme.
120    pub fn theme(&self) -> &Theme {
121        self.theme_set
122            .themes
123            .get(&self.theme_name)
124            .unwrap_or_else(|| {
125                self.theme_set
126                    .themes
127                    .values()
128                    .next()
129                    .expect("No themes available")
130            })
131    }
132
133    /// Override the background color for highlighted output.
134    ///
135    /// This removes all token background colors and uses the specified
136    /// color for the entire code block. Pass `None` to use theme defaults.
137    ///
138    /// # Example
139    /// ```
140    /// use streamdown_syntax::Highlighter;
141    ///
142    /// let mut highlighter = Highlighter::new();
143    /// // Set a dark grey background
144    /// highlighter.set_background(Some((30, 30, 30)));
145    /// ```
146    pub fn set_background(&mut self, color: Option<(u8, u8, u8)>) {
147        self.background_override = color;
148    }
149
150    /// Get the background override color.
151    pub fn background(&self) -> Option<(u8, u8, u8)> {
152        self.background_override
153    }
154
155    /// Find syntax definition for a language name.
156    ///
157    /// This first checks for common aliases (py→Python, js→JavaScript, etc.)
158    /// and then falls back to syntect's built-in matching.
159    pub fn syntax_for_language(&self, language: &str) -> Option<&SyntaxReference> {
160        // First try our alias mapping
161        let canonical = language_alias(language);
162
163        // Try exact match first
164        if let Some(syntax) = self.syntax_set.find_syntax_by_name(canonical) {
165            return Some(syntax);
166        }
167
168        // Try token match (handles extensions like "rs", "py")
169        if let Some(syntax) = self.syntax_set.find_syntax_by_token(canonical) {
170            return Some(syntax);
171        }
172
173        // Try extension match
174        if let Some(syntax) = self.syntax_set.find_syntax_by_extension(canonical) {
175            return Some(syntax);
176        }
177
178        // Try original input
179        self.syntax_set.find_syntax_by_token(language)
180    }
181
182    /// Get the plain text syntax (for unknown languages).
183    pub fn plain_text(&self) -> &SyntaxReference {
184        self.syntax_set.find_syntax_plain_text()
185    }
186
187    /// Create a new highlight state for streaming.
188    ///
189    /// This is the preferred way to do line-by-line highlighting.
190    pub fn new_highlight_state(&self, language: &str) -> HighlightState<'_> {
191        let syntax = self
192            .syntax_for_language(language)
193            .unwrap_or_else(|| self.plain_text());
194        HighlightState::new(syntax, self.theme())
195    }
196
197    /// Highlight a single line with streaming state.
198    ///
199    /// This is the preferred method for streaming use cases. It maintains
200    /// parse state across calls to correctly handle multi-line tokens.
201    ///
202    /// # Returns
203    /// The highlighted line as an ANSI-escaped string (without trailing newline).
204    pub fn highlight_line_with_state(&self, line: &str, state: &mut HighlightState) -> String {
205        match state.highlighter.highlight_line(line, &self.syntax_set) {
206            Ok(ranges) => {
207                if self.background_override.is_some() {
208                    // Custom rendering without background colors
209                    self.styles_to_ansi(&ranges)
210                } else {
211                    // Use syntect's built-in terminal escaping
212                    let escaped = as_24_bit_terminal_escaped(&ranges, false);
213                    format!("{}{}", escaped, RESET)
214                }
215            }
216            Err(_) => line.to_string(), // Fallback on error
217        }
218    }
219
220    /// Convert syntect styles to ANSI escape codes.
221    fn styles_to_ansi(&self, ranges: &[(Style, &str)]) -> String {
222        let mut output = String::new();
223
224        for (style, text) in ranges {
225            // Skip empty text
226            if text.is_empty() {
227                continue;
228            }
229
230            let mut codes = Vec::new();
231
232            // Foreground color
233            let fg = style.foreground;
234            codes.push(format!("38;2;{};{};{}", fg.r, fg.g, fg.b));
235
236            // Skip background (we're overriding it)
237
238            // Font style
239            if style.font_style.contains(FontStyle::BOLD) {
240                codes.push("1".to_string());
241            }
242            if style.font_style.contains(FontStyle::ITALIC) {
243                codes.push("3".to_string());
244            }
245            if style.font_style.contains(FontStyle::UNDERLINE) {
246                codes.push("4".to_string());
247            }
248
249            // Build escape sequence
250            if !codes.is_empty() {
251                output.push_str(&format!("\x1b[{}m", codes.join(";")));
252            }
253
254            output.push_str(text);
255        }
256
257        // Reset at end of line
258        if !output.is_empty() {
259            output.push_str(RESET);
260        }
261
262        output
263    }
264
265    /// Highlight a complete code block.
266    ///
267    /// This is a convenience method for non-streaming use cases.
268    /// Each line is highlighted and joined with newlines.
269    pub fn highlight_block(&self, code: &str, language: &str) -> String {
270        let mut state = self.new_highlight_state(language);
271        let mut output = String::new();
272
273        for line in code.lines() {
274            output.push_str(&self.highlight_line_with_state(line, &mut state));
275            output.push('\n');
276        }
277
278        output
279    }
280
281    /// Simple highlight method (backward compatible).
282    ///
283    /// Highlights code and returns ANSI-formatted string.
284    pub fn highlight(&self, code: &str, language: Option<&str>) -> String {
285        let lang = language.unwrap_or("text");
286        self.highlight_block(code, lang)
287    }
288
289    /// List available theme names.
290    pub fn themes(&self) -> Vec<&str> {
291        self.theme_set.themes.keys().map(|s| s.as_str()).collect()
292    }
293
294    /// List available language names.
295    pub fn languages(&self) -> Vec<&str> {
296        self.syntax_set
297            .syntaxes()
298            .iter()
299            .map(|s| s.name.as_str())
300            .collect()
301    }
302
303    /// Check if a theme exists.
304    pub fn has_theme(&self, name: &str) -> bool {
305        self.theme_set.themes.contains_key(name)
306    }
307
308    /// Check if a language is supported.
309    pub fn has_language(&self, name: &str) -> bool {
310        self.syntax_for_language(name).is_some()
311    }
312}
313
314/// State for streaming syntax highlighting.
315///
316/// This maintains the parse state across lines to correctly handle
317/// multi-line tokens like block comments and strings.
318pub struct HighlightState<'a> {
319    /// Syntect's HighlightLines for stateful line-by-line highlighting
320    highlighter: HighlightLines<'a>,
321}
322
323impl<'a> HighlightState<'a> {
324    /// Create a new highlight state for a syntax and theme.
325    pub fn new(syntax: &'a SyntaxReference, theme: &'a Theme) -> Self {
326        Self {
327            highlighter: HighlightLines::new(syntax, theme),
328        }
329    }
330}
331
332/// Create a theme with overridden background color.
333///
334/// This is equivalent to Python's `override_background()` function.
335/// It modifies a theme to use a custom background color and removes
336/// all token-specific background colors.
337pub fn override_theme_background(theme: &Theme, bg: (u8, u8, u8)) -> Theme {
338    let mut new_theme = theme.clone();
339
340    // Override settings background
341    new_theme.settings.background = Some(Color {
342        r: bg.0,
343        g: bg.1,
344        b: bg.2,
345        a: 255,
346    });
347
348    // Clear all scope backgrounds
349    for item in &mut new_theme.scopes {
350        item.style.background = None;
351    }
352
353    new_theme
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359
360    #[test]
361    fn test_new_highlighter() {
362        let h = Highlighter::new();
363        assert_eq!(h.theme_name(), "base16-ocean.dark");
364    }
365
366    #[test]
367    fn test_with_theme() {
368        let h = Highlighter::with_theme("Solarized (dark)");
369        assert_eq!(h.theme_name(), "Solarized (dark)");
370    }
371
372    #[test]
373    fn test_set_background() {
374        let mut h = Highlighter::new();
375        assert!(h.background().is_none());
376
377        h.set_background(Some((30, 30, 30)));
378        assert_eq!(h.background(), Some((30, 30, 30)));
379
380        h.set_background(None);
381        assert!(h.background().is_none());
382    }
383
384    #[test]
385    fn test_syntax_for_language() {
386        let h = Highlighter::new();
387
388        // Test exact names
389        assert!(h.syntax_for_language("Rust").is_some());
390        assert!(h.syntax_for_language("Python").is_some());
391
392        // Test aliases
393        assert!(h.syntax_for_language("rust").is_some());
394        assert!(h.syntax_for_language("py").is_some());
395        assert!(h.syntax_for_language("js").is_some());
396        assert!(h.syntax_for_language("sh").is_some());
397        assert!(h.syntax_for_language("bash").is_some());
398        assert!(h.syntax_for_language("c").is_some());
399        assert!(h.syntax_for_language("cpp").is_some());
400    }
401
402    #[test]
403    fn test_highlight_block() {
404        let h = Highlighter::new();
405        let code = "fn main() {\n    println!(\"Hello\");\n}";
406        let result = h.highlight_block(code, "rust");
407
408        // Should contain ANSI escape codes
409        assert!(result.contains("\x1b["));
410        // Should contain the code
411        assert!(result.contains("main"));
412        assert!(result.contains("println"));
413    }
414
415    #[test]
416    fn test_highlight_line_streaming() {
417        let h = Highlighter::new();
418        let mut state = h.new_highlight_state("rust");
419
420        let line1 = h.highlight_line_with_state("fn main() {", &mut state);
421        let line2 = h.highlight_line_with_state("    println!(\"Hello\");", &mut state);
422        let line3 = h.highlight_line_with_state("}", &mut state);
423
424        // All should contain ANSI codes
425        assert!(line1.contains("\x1b["));
426        assert!(line2.contains("\x1b["));
427        assert!(line3.contains("\x1b["));
428    }
429
430    #[test]
431    fn test_themes() {
432        let h = Highlighter::new();
433        let themes = h.themes();
434
435        assert!(!themes.is_empty());
436        assert!(themes.contains(&"base16-ocean.dark"));
437    }
438
439    #[test]
440    fn test_languages() {
441        let h = Highlighter::new();
442        let langs = h.languages();
443
444        assert!(!langs.is_empty());
445        assert!(langs.contains(&"Rust"));
446        assert!(langs.contains(&"Python"));
447    }
448
449    #[test]
450    fn test_has_theme() {
451        let h = Highlighter::new();
452        assert!(h.has_theme("base16-ocean.dark"));
453        assert!(!h.has_theme("nonexistent-theme"));
454    }
455
456    #[test]
457    fn test_has_language() {
458        let h = Highlighter::new();
459        assert!(h.has_language("rust"));
460        assert!(h.has_language("python"));
461        assert!(h.has_language("py")); // alias
462    }
463
464    #[test]
465    fn test_override_theme_background() {
466        let h = Highlighter::new();
467        let theme = h.theme();
468        let new_theme = override_theme_background(theme, (10, 20, 30));
469
470        assert_eq!(
471            new_theme.settings.background,
472            Some(Color { r: 10, g: 20, b: 30, a: 255 })
473        );
474    }
475
476    #[test]
477    fn test_plain_text_fallback() {
478        let h = Highlighter::new();
479        let result = h.highlight_block("just some text", "unknown-lang-xyz");
480
481        // Should still produce output (plain text fallback)
482        assert!(result.contains("just some text"));
483    }
484
485    #[test]
486    fn test_multiline_token() {
487        let h = Highlighter::new();
488        let mut state = h.new_highlight_state("rust");
489
490        // Start a block comment
491        let line1 = h.highlight_line_with_state("/* this is a", &mut state);
492        let line2 = h.highlight_line_with_state("   multi-line comment */", &mut state);
493        let line3 = h.highlight_line_with_state("let x = 1;", &mut state);
494
495        // All lines should produce output
496        assert!(!line1.is_empty());
497        assert!(!line2.is_empty());
498        assert!(!line3.is_empty());
499    }
500
501    #[test]
502    fn test_background_override_styling() {
503        let mut h = Highlighter::new();
504        h.set_background(Some((30, 30, 30)));
505
506        let code = "let x = 1;";
507        let result = h.highlight_block(code, "rust");
508
509        // Should have foreground colors but no background in escape codes
510        assert!(result.contains("38;2;")); // Foreground
511        // Background codes (48;2;) should NOT be present when override is set
512        // The styling uses our custom method which skips backgrounds
513    }
514}