Skip to main content

oxidoc_highlight/
lib.rs

1pub mod escape;
2mod lang;
3pub mod scanner;
4pub mod token;
5
6use token::render;
7
8/// Tokenize `code` in the given language, returning structured tokens with byte ranges.
9///
10/// Unknown languages return an empty token list. Use [`highlight`] for HTML output instead.
11pub fn scan(code: &str, language: &str) -> Vec<token::Token> {
12    lang::scan(code, language)
13}
14
15/// Highlight `code` in the given language, returning HTML with `<span class="tok-*">` tokens.
16///
17/// Unknown languages return HTML-escaped plain text (no spans).
18/// Empty input returns an empty string.
19pub fn highlight(code: &str, lang: &str) -> String {
20    if code.is_empty() {
21        return String::new();
22    }
23    let tokens = lang::scan(code, lang);
24    render(code, &tokens)
25}
26
27/// List all supported language identifiers.
28pub fn supported_languages() -> Vec<&'static str> {
29    lang::supported()
30}
31
32/// Check if a language is supported.
33pub fn is_supported(lang: &str) -> bool {
34    lang::get_scanner(lang).is_some()
35}
36
37// ── Wasm bindings ────────────────────────────────────────────────────
38
39#[cfg(feature = "wasm")]
40mod wasm {
41    use wasm_bindgen::prelude::*;
42
43    #[wasm_bindgen(js_name = "highlight")]
44    pub fn highlight_wasm(code: &str, lang: &str) -> String {
45        crate::highlight(code, lang)
46    }
47
48    #[wasm_bindgen(js_name = "supportedLanguages")]
49    pub fn supported_languages_wasm() -> Vec<String> {
50        crate::supported_languages()
51            .into_iter()
52            .map(String::from)
53            .collect()
54    }
55
56    #[wasm_bindgen(js_name = "isSupported")]
57    pub fn is_supported_wasm(lang: &str) -> bool {
58        crate::is_supported(lang)
59    }
60}
61
62#[cfg(test)]
63mod tests {
64    use super::*;
65
66    #[test]
67    fn empty_input() {
68        assert_eq!(highlight("", "rust"), "");
69    }
70
71    #[test]
72    fn unknown_language_escapes_html() {
73        assert_eq!(highlight("<b>hi</b>", "unknown"), "&lt;b&gt;hi&lt;/b&gt;");
74    }
75
76    #[test]
77    fn round_trip_strip_spans() {
78        let code = "let x = 42; // test\nfn foo() {}";
79        let html = highlight(code, "rust");
80        // Strip all spans
81        let stripped = html
82            .replace(|_: char| false, "") // no-op
83            .split("<span")
84            .map(|s| {
85                if let Some(pos) = s.find('>') {
86                    &s[pos + 1..]
87                } else {
88                    s
89                }
90            })
91            .collect::<Vec<_>>()
92            .join("")
93            .replace("</span>", "");
94        // The stripped text should equal the HTML-escaped original
95        let mut expected = String::new();
96        crate::escape::escape_html(code, &mut expected);
97        assert_eq!(stripped, expected);
98    }
99
100    #[test]
101    fn supported_languages_nonempty() {
102        assert!(supported_languages().len() > 10);
103    }
104
105    #[test]
106    fn is_supported_works() {
107        assert!(is_supported("rust"));
108        assert!(is_supported("rs"));
109        assert!(is_supported("javascript"));
110        assert!(!is_supported("brainfuck"));
111    }
112
113    #[test]
114    fn unicode_no_panic() {
115        // Should not panic on unicode input
116        let _ = highlight("let 变量 = \"你好世界\";", "rust");
117        let _ = highlight("const 🎉 = true;", "javascript");
118    }
119
120    #[test]
121    fn crlf_handling() {
122        let code = "let x = 1;\r\nlet y = 2;\r\n";
123        let html = highlight(code, "rust");
124        assert!(html.contains("tok-keyword"));
125        assert!(html.contains("\r\n"));
126    }
127
128    #[test]
129    fn spec_example() {
130        let out = highlight("let x = 42;", "rust");
131        assert!(out.contains("<span class=\"tok-keyword\">let</span>"));
132        assert!(out.contains("<span class=\"tok-operator\">=</span>"));
133        assert!(out.contains("<span class=\"tok-number\">42</span>"));
134        assert!(out.contains("<span class=\"tok-punctuation\">;</span>"));
135    }
136}