arborium_highlight/lib.rs
1//! Unified syntax highlighting for arborium.
2//!
3//! This crate provides the core highlighting engine that works with both:
4//! - **Statically linked Rust grammars**: For CLI tools and servers
5//! - **Dynamically loaded WASM plugins**: For browser contexts
6//!
7//! # Why Async in a Highlighting Library?
8//!
9//! You might wonder why a syntax highlighting library has async code. The answer
10//! is **browser support**.
11//!
12//! - **Parsing is synchronous**: Tree-sitter parsing cannot be async—it's a
13//! fundamentally synchronous operation that walks the syntax tree.
14//!
15//! - **Getting a grammar can be async**: In browser contexts, grammar plugins
16//! are loaded from a CDN via JavaScript's dynamic `import()`. This is
17//! inherently async since it involves network requests and WASM instantiation.
18//!
19//! In native Rust, grammars are statically linked, so the provider returns
20//! immediately. But the trait is async to support both use cases with the same
21//! code.
22//!
23//! # Architecture
24//!
25//! The highlighting system is built around two key traits:
26//!
27//! - [`Grammar`]: What a grammar can do — parse text and return spans
28//! - [`GrammarProvider`]: How grammars are obtained — this is where sync vs async differs
29//!
30//! ## The Sync-in-Async-Clothing Pattern
31//!
32//! The core highlighting logic (including injection handling) is written **once**
33//! as async code in `HighlighterCore`. Two wrappers provide the sync and async APIs:
34//!
35//! - [`SyncHighlighter`]: Polls the async future **once** and panics if it yields.
36//! This is safe for native Rust where providers return immediately.
37//!
38//! - [`AsyncHighlighter`]: Actually awaits provider calls. Use this for browser/WASM
39//! contexts where grammar loading involves network requests.
40//!
41//! This design ensures both environments share the exact same injection-handling
42//! logic, avoiding subtle bugs from duplicated code.
43//!
44//! ## When to Use Which
45//!
46//! | Context | Highlighter | Provider Example |
47//! |---------|-------------|------------------|
48//! | Native Rust | [`SyncHighlighter`] | `StaticProvider` (grammars compiled in) |
49//! | Browser WASM | [`AsyncHighlighter`] | `JsGrammarProvider` (loads from CDN) |
50//!
51//! # Quick Start
52//!
53//! ```rust,ignore
54//! use arborium_highlight::{SyncHighlighter, Grammar, GrammarProvider, ParseResult, Span};
55//! use arborium_highlight::{HighlightConfig, HtmlFormat};
56//!
57//! // Define your grammar (implements Grammar trait)
58//! struct MyGrammar { /* ... */ }
59//! impl Grammar for MyGrammar {
60//! fn parse(&mut self, text: &str) -> ParseResult {
61//! // Parse and return spans + injections
62//! ParseResult::default()
63//! }
64//! }
65//!
66//! // Define your provider (implements GrammarProvider trait)
67//! struct MyProvider { /* ... */ }
68//! impl GrammarProvider for MyProvider {
69//! type Grammar = MyGrammar;
70//! async fn get(&mut self, language: &str) -> Option<&mut Self::Grammar> {
71//! // Return grammar for language
72//! None
73//! }
74//! }
75//!
76//! // Use with default configuration (custom elements: <a-k>, <a-f>, etc.)
77//! let mut highlighter = SyncHighlighter::new(MyProvider { /* ... */ });
78//! let html = highlighter.highlight("rust", "fn main() {}");
79//! // Output: <a-k>fn</a-k> <a-f>main</a-f>() {}
80//!
81//! // Or use class-based output for compatibility with existing CSS
82//! let config = HighlightConfig {
83//! html_format: HtmlFormat::ClassNames,
84//! ..Default::default()
85//! };
86//! let mut highlighter = SyncHighlighter::with_config(MyProvider { /* ... */ }, config);
87//! let html = highlighter.highlight("rust", "fn main() {}");
88//! // Output: <span class="keyword">fn</span> <span class="function">main</span>() {}
89//! ```
90//!
91//! # HTML Output Formats
92//!
93//! Arborium supports multiple HTML output formats via [`HtmlFormat`]:
94//!
95//! - **`CustomElements`** (default): Compact custom elements like `<a-k>`, `<a-f>`, etc.
96//! - **`CustomElementsWithPrefix(prefix)`**: Custom elements with your prefix, e.g., `<code-k>`
97//! - **`ClassNames`**: Traditional `<span class="keyword">` for compatibility
98//! - **`ClassNamesWithPrefix(prefix)`**: Namespaced classes like `<span class="arb-keyword">`
99//!
100//! See [`HtmlFormat`] for examples and use cases.
101
102mod render;
103mod types;
104
105#[cfg(feature = "tree-sitter")]
106pub mod tree_sitter;
107
108pub use render::{
109 AnsiOptions, ThemedSpan, html_escape, spans_to_ansi, spans_to_ansi_with_options, spans_to_html,
110 spans_to_themed, write_spans_as_ansi, write_spans_as_html,
111};
112pub use types::{HighlightError, Injection, ParseResult, Span};
113
114#[cfg(feature = "tree-sitter")]
115pub use tree_sitter::{CompiledGrammar, GrammarConfig, GrammarError, ParseContext};
116
117// Backward compatibility aliases
118#[cfg(feature = "tree-sitter")]
119#[doc(hidden)]
120pub use tree_sitter::{TreeSitterGrammarConfig, TreeSitterGrammarError};
121
122use std::future::Future;
123use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker};
124
125/// A grammar that can parse text and produce highlight spans.
126///
127/// This is implemented by:
128/// - Tree-sitter based parsers (for Rust)
129/// - WASM plugin wrappers (for browser)
130/// - Mock implementations (for testing)
131///
132/// # Implementation Notes
133///
134/// Parsing is always synchronous. The async part of highlighting is *getting* the grammar,
135/// not using it. This is because tree-sitter parsing is inherently synchronous.
136pub trait Grammar {
137 /// Parse text and return spans + injection points.
138 ///
139 /// This is always synchronous - the async part is *getting* the grammar,
140 /// not using it.
141 fn parse(&mut self, text: &str) -> ParseResult;
142}
143
144/// Provides grammars for languages.
145///
146/// This trait abstracts over how grammars are obtained:
147///
148/// - **Static (Rust)**: Grammars are statically linked. `get()` returns
149/// immediately without awaiting.
150///
151/// - **Dynamic (WASM)**: Grammars are loaded as WASM plugins. `get()` may
152/// need to fetch and instantiate a plugin, which is async.
153///
154/// # Implementation Notes
155///
156/// For sync contexts (Rust CLI tools, servers), implement `get()` to return
157/// immediately. The `SyncHighlighter` wrapper will panic if `get()` yields.
158///
159/// For async contexts (WASM/browser), `get()` can await plugin loading.
160/// Use `AsyncHighlighter` wrapper.
161pub trait GrammarProvider {
162 /// The grammar type this provider returns.
163 type Grammar: Grammar;
164
165 /// Get a grammar for a language.
166 ///
167 /// Returns `None` if the language is not supported.
168 ///
169 /// # Sync vs Async
170 ///
171 /// This is an async method, but for sync providers (static Rust grammars),
172 /// it should return `Ready` immediately without yielding. The caller
173 /// (SyncHighlighter) will poll once and panic if it gets `Pending`.
174 ///
175 /// # Send Bound
176 ///
177 /// On native targets, the future must be `Send` for compatibility with
178 /// async runtimes. On WASM, `Send` is not required (single-threaded).
179 #[cfg(not(target_arch = "wasm32"))]
180 fn get(&mut self, language: &str) -> impl Future<Output = Option<&mut Self::Grammar>> + Send;
181
182 /// Get a grammar for a language (WASM version without Send bound).
183 #[cfg(target_arch = "wasm32")]
184 fn get(&mut self, language: &str) -> impl Future<Output = Option<&mut Self::Grammar>>;
185}
186
187/// HTML output format for syntax highlighting.
188#[derive(Debug, Clone, PartialEq, Eq)]
189pub enum HtmlFormat {
190 /// Custom elements with default prefix: `<a-k>`, `<a-f>`, etc. (default)
191 ///
192 /// This is the most compact format and leverages custom HTML elements.
193 ///
194 /// # Example
195 /// ```html
196 /// <a-k>fn</a-k> <a-f>main</a-f>()
197 /// ```
198 CustomElements,
199
200 /// Custom elements with custom prefix: `<prefix-k>`, `<prefix-f>`, etc.
201 ///
202 /// Useful for branding or avoiding conflicts with other custom elements.
203 ///
204 /// # Example
205 /// ```html
206 /// <!-- With prefix "code" -->
207 /// <code-k>fn</code-k> <code-f>main</code-f>()
208 /// ```
209 CustomElementsWithPrefix(String),
210
211 /// Traditional class-based spans: `<span class="keyword">`, etc.
212 ///
213 /// Compatible with existing tooling that expects class-based markup.
214 ///
215 /// # Example
216 /// ```html
217 /// <span class="keyword">fn</span> <span class="function">main</span>()
218 /// ```
219 ClassNames,
220
221 /// Class-based spans with custom prefix: `<span class="prefix-keyword">`, etc.
222 ///
223 /// Useful for namespacing CSS classes.
224 ///
225 /// # Example
226 /// ```html
227 /// <!-- With prefix "arb" -->
228 /// <span class="arb-keyword">fn</span> <span class="arb-function">main</span>()
229 /// ```
230 ClassNamesWithPrefix(String),
231}
232
233impl Default for HtmlFormat {
234 fn default() -> Self {
235 Self::CustomElements
236 }
237}
238
239/// Configuration for highlighting.
240#[derive(Debug, Clone)]
241pub struct HighlightConfig {
242 /// Maximum depth for processing language injections.
243 ///
244 /// - `0`: No injections (just primary language)
245 /// - `3`: Default, handles most cases
246 /// - Higher: For deeply nested content
247 pub max_injection_depth: u32,
248
249 /// HTML output format (custom elements vs class-based spans).
250 pub html_format: HtmlFormat,
251}
252
253impl Default for HighlightConfig {
254 fn default() -> Self {
255 Self {
256 max_injection_depth: 3,
257 html_format: HtmlFormat::default(),
258 }
259 }
260}
261
262/// Internal async implementation - handles all the hard work.
263///
264/// The core logic is written once as async, then wrapped by `SyncHighlighter`
265/// (which polls once and panics if it yields) and `AsyncHighlighter` (which
266/// actually awaits).
267struct HighlighterCore<P: GrammarProvider> {
268 provider: P,
269 config: HighlightConfig,
270}
271
272impl<P: GrammarProvider> HighlighterCore<P> {
273 fn new(provider: P) -> Self {
274 Self {
275 provider,
276 config: HighlightConfig::default(),
277 }
278 }
279
280 fn with_config(provider: P, config: HighlightConfig) -> Self {
281 Self { provider, config }
282 }
283
284 /// Highlight and return raw spans for the full document,
285 /// including any recursively processed injections.
286 async fn highlight_spans(
287 &mut self,
288 language: &str,
289 source: &str,
290 ) -> Result<Vec<Span>, HighlightError> {
291 // 1. Get the primary grammar
292 let grammar = self
293 .provider
294 .get(language)
295 .await
296 .ok_or_else(|| HighlightError::UnsupportedLanguage(language.into()))?;
297
298 // 2. Parse the primary language
299 let result = grammar.parse(source);
300
301 // 3. Collect all spans (including from injections)
302 let mut all_spans = result.spans;
303
304 // 4. Process injections recursively
305 if self.config.max_injection_depth > 0 {
306 self.process_injections(
307 source,
308 result.injections,
309 0,
310 self.config.max_injection_depth,
311 &mut all_spans,
312 )
313 .await;
314 }
315
316 Ok(all_spans)
317 }
318
319 /// The main highlight function - written once, used by both wrappers.
320 async fn highlight(&mut self, language: &str, source: &str) -> Result<String, HighlightError> {
321 let spans = self.highlight_spans(language, source).await?;
322 Ok(spans_to_html(source, spans, &self.config.html_format))
323 }
324
325 /// Process injections recursively.
326 async fn process_injections(
327 &mut self,
328 source: &str,
329 injections: Vec<Injection>,
330 base_offset: u32,
331 remaining_depth: u32,
332 all_spans: &mut Vec<Span>,
333 ) {
334 if remaining_depth == 0 {
335 return;
336 }
337
338 for injection in injections {
339 let start = injection.start as usize;
340 let end = injection.end as usize;
341
342 if end <= source.len() && start < end {
343 // Try to get grammar for injected language
344 if let Some(inj_grammar) = self.provider.get(&injection.language).await {
345 let injected_text = &source[start..end];
346 let result = inj_grammar.parse(injected_text);
347
348 // Adjust offsets and add spans
349 let adjusted_spans: Vec<Span> = result
350 .spans
351 .into_iter()
352 .map(|mut s| {
353 s.start += base_offset + injection.start;
354 s.end += base_offset + injection.start;
355 s
356 })
357 .collect();
358 all_spans.extend(adjusted_spans);
359
360 // Recurse into nested injections
361 if !result.injections.is_empty() {
362 // Box the recursive call to avoid infinite type size
363 Box::pin(self.process_injections(
364 injected_text,
365 result.injections,
366 base_offset + injection.start,
367 remaining_depth - 1,
368 all_spans,
369 ))
370 .await;
371 }
372 }
373 // If grammar not available, skip this injection silently
374 }
375 }
376 }
377}
378
379/// Synchronous highlighter for Rust contexts.
380///
381/// Uses a sync provider where `get()` returns immediately.
382/// Panics if the provider ever yields (returns Pending).
383///
384/// # Example
385///
386/// ```rust,ignore
387/// use arborium_highlight::{SyncHighlighter, StaticProvider};
388///
389/// let mut highlighter = SyncHighlighter::new(StaticProvider::new());
390/// let html = highlighter.highlight("rust", "fn main() {}")?;
391/// ```
392pub struct SyncHighlighter<P: GrammarProvider> {
393 core: HighlighterCore<P>,
394}
395
396impl<P: GrammarProvider> SyncHighlighter<P> {
397 /// Create a new synchronous highlighter with default configuration.
398 pub fn new(provider: P) -> Self {
399 Self {
400 core: HighlighterCore::new(provider),
401 }
402 }
403
404 /// Create a new synchronous highlighter with custom configuration.
405 pub fn with_config(provider: P, config: HighlightConfig) -> Self {
406 Self {
407 core: HighlighterCore::with_config(provider, config),
408 }
409 }
410
411 /// Get a mutable reference to the underlying provider.
412 pub fn provider_mut(&mut self) -> &mut P {
413 &mut self.core.provider
414 }
415
416 /// Highlight source code synchronously and return HTML.
417 ///
418 /// # Panics
419 ///
420 /// Panics if the provider's `get()` method yields (returns Pending).
421 /// This indicates a bug - sync providers should never yield.
422 pub fn highlight(&mut self, language: &str, source: &str) -> Result<String, HighlightError> {
423 let future = self.core.highlight(language, source);
424
425 // Pin the future on the stack
426 let mut future = std::pin::pin!(future);
427
428 // Create a no-op waker (we're not actually async)
429 let waker = noop_waker();
430 let mut cx = Context::from_waker(&waker);
431
432 // Poll once - sync providers complete immediately
433 match future.as_mut().poll(&mut cx) {
434 Poll::Ready(result) => result,
435 Poll::Pending => {
436 panic!(
437 "SyncHighlighter: provider yielded. Use AsyncHighlighter for async providers."
438 )
439 }
440 }
441 }
442
443 /// Highlight source code synchronously and return ANSI-colored text
444 /// using the provided theme.
445 ///
446 /// This uses the same span computation as HTML output but renders
447 /// with ANSI escape sequences.
448 pub fn highlight_to_ansi(
449 &mut self,
450 language: &str,
451 source: &str,
452 theme: &arborium_theme::Theme,
453 ) -> Result<String, HighlightError> {
454 self.highlight_to_ansi_with_options(language, source, theme, &AnsiOptions::default())
455 }
456
457 /// Highlight source code synchronously and return ANSI-colored text
458 /// using the provided theme and explicit ANSI rendering options.
459 pub fn highlight_to_ansi_with_options(
460 &mut self,
461 language: &str,
462 source: &str,
463 theme: &arborium_theme::Theme,
464 options: &AnsiOptions,
465 ) -> Result<String, HighlightError> {
466 let future = self.core.highlight_spans(language, source);
467
468 let mut future = std::pin::pin!(future);
469 let waker = noop_waker();
470 let mut cx = Context::from_waker(&waker);
471
472 match future.as_mut().poll(&mut cx) {
473 Poll::Ready(Ok(spans)) => Ok(spans_to_ansi_with_options(source, spans, theme, options)),
474 Poll::Ready(Err(e)) => Err(e),
475 Poll::Pending => {
476 panic!(
477 "SyncHighlighter: provider yielded. Use AsyncHighlighter for async providers."
478 )
479 }
480 }
481 }
482}
483
484/// Asynchronous highlighter for WASM/browser contexts.
485///
486/// Uses an async provider where `get()` may need to load plugins.
487///
488/// # Example
489///
490/// ```rust,ignore
491/// use arborium_highlight::{AsyncHighlighter, WasmPluginProvider};
492///
493/// let mut highlighter = AsyncHighlighter::new(WasmPluginProvider::new());
494/// let html = highlighter.highlight("rust", "fn main() {}").await?;
495/// ```
496pub struct AsyncHighlighter<P: GrammarProvider> {
497 core: HighlighterCore<P>,
498}
499
500impl<P: GrammarProvider> AsyncHighlighter<P> {
501 /// Create a new asynchronous highlighter with default configuration.
502 pub fn new(provider: P) -> Self {
503 Self {
504 core: HighlighterCore::new(provider),
505 }
506 }
507
508 /// Create a new asynchronous highlighter with custom configuration.
509 pub fn with_config(provider: P, config: HighlightConfig) -> Self {
510 Self {
511 core: HighlighterCore::with_config(provider, config),
512 }
513 }
514
515 /// Get a mutable reference to the underlying provider.
516 pub fn provider_mut(&mut self) -> &mut P {
517 &mut self.core.provider
518 }
519
520 /// Highlight source code asynchronously.
521 pub async fn highlight(
522 &mut self,
523 language: &str,
524 source: &str,
525 ) -> Result<String, HighlightError> {
526 self.core.highlight(language, source).await
527 }
528}
529
530/// Create a no-op waker for sync polling.
531fn noop_waker() -> Waker {
532 const VTABLE: RawWakerVTable = RawWakerVTable::new(
533 |_| RAW_WAKER, // clone
534 |_| {}, // wake
535 |_| {}, // wake_by_ref
536 |_| {}, // drop
537 );
538 const RAW_WAKER: RawWaker = RawWaker::new(std::ptr::null(), &VTABLE);
539
540 unsafe { Waker::from_raw(RAW_WAKER) }
541}
542
543#[cfg(test)]
544mod tests {
545 use super::*;
546 use std::collections::HashMap;
547
548 /// Mock provider for testing - sync, returns immediately
549 struct MockProvider {
550 grammars: HashMap<&'static str, MockGrammar>,
551 }
552
553 impl GrammarProvider for MockProvider {
554 type Grammar = MockGrammar;
555
556 #[cfg(not(target_arch = "wasm32"))]
557 async fn get(&mut self, language: &str) -> Option<&mut Self::Grammar> {
558 self.grammars.get_mut(language)
559 }
560
561 #[cfg(target_arch = "wasm32")]
562 async fn get(&mut self, language: &str) -> Option<&mut Self::Grammar> {
563 self.grammars.get_mut(language)
564 }
565 }
566
567 struct MockGrammar {
568 result: ParseResult,
569 }
570
571 impl Grammar for MockGrammar {
572 fn parse(&mut self, _text: &str) -> ParseResult {
573 self.result.clone()
574 }
575 }
576
577 #[test]
578 fn test_basic_highlighting() {
579 let provider = MockProvider {
580 grammars: [(
581 "test",
582 MockGrammar {
583 result: ParseResult {
584 spans: vec![Span {
585 start: 0,
586 end: 2,
587 capture: "keyword".into(),
588 pattern_index: 0,
589 }],
590 injections: vec![],
591 },
592 },
593 )]
594 .into(),
595 };
596
597 let mut highlighter = SyncHighlighter::new(provider);
598 let html = highlighter.highlight("test", "fn").unwrap();
599 assert_eq!(html, "<a-k>fn</a-k>");
600 }
601
602 #[test]
603 fn test_injection() {
604 let provider = MockProvider {
605 grammars: [
606 (
607 "outer",
608 MockGrammar {
609 result: ParseResult {
610 spans: vec![],
611 injections: vec![Injection {
612 start: 0,
613 end: 5,
614 language: "inner".into(),
615 include_children: false,
616 }],
617 },
618 },
619 ),
620 (
621 "inner",
622 MockGrammar {
623 result: ParseResult {
624 spans: vec![Span {
625 start: 0,
626 end: 5,
627 capture: "string".into(),
628 pattern_index: 0,
629 }],
630 injections: vec![],
631 },
632 },
633 ),
634 ]
635 .into(),
636 };
637
638 let mut highlighter = SyncHighlighter::new(provider);
639 let html = highlighter.highlight("outer", "hello").unwrap();
640 assert_eq!(html, "<a-s>hello</a-s>");
641 }
642
643 #[test]
644 fn test_unsupported_language() {
645 let provider = MockProvider {
646 grammars: HashMap::new(),
647 };
648
649 let mut highlighter = SyncHighlighter::new(provider);
650 let result = highlighter.highlight("unknown", "code");
651 assert!(matches!(
652 result,
653 Err(HighlightError::UnsupportedLanguage(_))
654 ));
655 }
656
657 #[test]
658 fn test_reuse_with_shorter_text() {
659 // Regression test: reusing a highlighter with a shorter string
660 // after a longer string should not panic with slice bounds errors.
661 // This tests that we don't incorrectly use cached tree state.
662 let provider = MockProvider {
663 grammars: [(
664 "test",
665 MockGrammar {
666 result: ParseResult {
667 spans: vec![Span {
668 start: 0,
669 end: 2,
670 capture: "keyword".into(),
671 pattern_index: 0,
672 }],
673 injections: vec![],
674 },
675 },
676 )]
677 .into(),
678 };
679
680 let mut highlighter = SyncHighlighter::new(provider);
681
682 // First: longer string
683 let _ = highlighter.highlight("test", "longer string here");
684
685 // Second: shorter string - should not panic
686 let _ = highlighter.highlight("test", "short");
687 }
688
689 #[test]
690 fn test_span_coalescing() {
691 let spans = vec![
692 Span {
693 start: 0,
694 end: 3,
695 capture: "keyword".into(),
696 pattern_index: 0,
697 },
698 Span {
699 start: 3,
700 end: 7,
701 capture: "keyword.function".into(),
702 pattern_index: 0,
703 },
704 ];
705 let html = spans_to_html("keyword", spans, &HtmlFormat::default());
706 assert_eq!(html, "<a-k>keyword</a-k>");
707 }
708}