cmark_syntax/
lib.rs

1// This file is part of cmark-syntax. This program comes with ABSOLUTELY NO WARRANTY;
2// This is free software, and you are welcome to redistribute it under the
3// conditions of the GNU General Public License version 3.0.
4//
5// You should have received a copy of the GNU General Public License
6// along with cmark-syntax.  If not, see <http://www.gnu.org/licenses/>
7#![doc = include_str!("../README.md")]
8use logos::Logos;
9use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag, TagEnd};
10
11/// Definition of syntaxes of various languages.
12pub mod languages;
13
14/// A type of token that can be highlighted.
15pub trait Highlight: Sized + for<'a> Logos<'a, Source = str> {
16    /// Name of the language of this highlighter.
17    const LANG: &'static str;
18
19    /// The token denoting the start, before input.
20    const START: Self;
21
22    /// Determine the kind of a token from the current and the previous token.
23    fn kind(tokens: &[Self; 2]) -> Kind;
24}
25
26/// Possible kind of a token in the highlighted syntax.
27#[derive(Clone, Copy, PartialEq, Eq)]
28pub enum Kind {
29    /// Not contained in any tags.
30    None,
31    /// Rendered among `u` tags.
32    Glyph,
33    /// Rendered among `span` tags.
34    Literal,
35    /// Rendered among `var` tags.
36    Identifier,
37    /// Rendered among `em` tags.
38    SpecialIdentifier,
39    /// Rendered among `strong` tags.
40    StrongIdentifier,
41    /// Rendered among `b` tags.
42    Keyword,
43    /// Rendered among `i` tags.
44    Comment,
45}
46
47static HIGHLIGHT_CLASS: [Option<&'static str>; 8] = {
48    let mut classes = [None; 8];
49
50    classes[Kind::Glyph as usize] = Some("glyph");
51    classes[Kind::Literal as usize] = Some("literal");
52    classes[Kind::Identifier as usize] = Some("identifier");
53    classes[Kind::SpecialIdentifier as usize] = Some("special-identifier");
54    classes[Kind::StrongIdentifier as usize] = Some("strong-identifier");
55    classes[Kind::Keyword as usize] = Some("keyword");
56    classes[Kind::Comment as usize] = Some("comment");
57
58    classes
59};
60
61/// A preprocessor that highlights syntax in `pulldown_cmark` events.
62#[derive(Debug, Default)]
63pub struct SyntaxPreprocessor<'a, I: Iterator<Item = Event<'a>>> {
64    parent: I,
65}
66
67impl<'a, I: Iterator<Item = Event<'a>>> SyntaxPreprocessor<'a, I> {
68    /// Create a new syntax preprocessor from `parent`.
69    pub fn new(parent: I) -> Self {
70        Self { parent }
71    }
72}
73
74impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SyntaxPreprocessor<'a, I> {
75    type Item = Event<'a>;
76
77    #[inline]
78    fn next(&mut self) -> Option<Self::Item> {
79        let lang = match self.parent.next()? {
80            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) if !lang.is_empty() => lang,
81            #[cfg(feature = "latex2mathml")]
82            Event::InlineMath(c) => {
83                return Some(Event::Html(
84                    latex2mathml::latex_to_mathml(
85                        c.as_ref(),
86                        latex2mathml::DisplayStyle::Inline,
87                    )
88                    .unwrap_or_else(|e| e.to_string())
89                    .into(),
90                ));
91            }
92            #[cfg(feature = "latex2mathml")]
93            Event::DisplayMath(c) => {
94                return Some(Event::Html(
95                    latex2mathml::latex_to_mathml(
96                        c.as_ref(),
97                        latex2mathml::DisplayStyle::Block,
98                    )
99                    .unwrap_or_else(|e| e.to_string())
100                    .into(),
101                ));
102            }
103            other => return Some(other),
104        };
105
106        let next = self.parent.next();
107        let code = match next {
108            Some(Event::Text(c)) => {
109                let mut code = c;
110                loop {
111                    match self.parent.next() {
112                        Some(Event::Text(ref c)) => {
113                            code = {
114                                let mut s = code.into_string();
115                                s.push_str(c);
116                                CowStr::Boxed(s.into())
117                            }
118                        }
119                        Some(Event::End(TagEnd::CodeBlock)) | None => break,
120                        Some(e) => {
121                            return Some(Event::Text(
122                                format!("Unexpected markdown event {:#?}", e).into(),
123                            ))
124                        }
125                    }
126                }
127                code
128            }
129            Some(Event::End(TagEnd::CodeBlock)) | None => CowStr::Borrowed(""),
130            Some(e) => {
131                return Some(Event::Text(
132                    format!("Unexpected markdown event {:#?}", e).into(),
133                ))
134            }
135        };
136
137        let mut html = String::with_capacity(code.len() + code.len() / 4 + 60);
138        html.push_str("<pre><code class=\"language-");
139        html.push_str(lang.as_ref());
140        html.push_str("\">");
141
142        match lang.as_ref() {
143            "rust" | "rs" => highlight::<languages::Rust>(&code, &mut html),
144            "js" | "javascript" => highlight::<languages::JavaScript>(&code, &mut html),
145            "toml" => highlight::<languages::Toml>(&code, &mut html),
146            "sh" | "shell" | "bash" => highlight::<languages::Sh>(&code, &mut html),
147            _ => write_escaped(&mut html, &code),
148        }
149
150        html.push_str("</code></pre>");
151
152        Some(Event::Html(html.into()))
153    }
154}
155
156/// Write with escaping special HTML characters
157#[inline]
158fn write_escaped(s: &mut String, part: &str) {
159    let mut start = 0;
160
161    for (idx, byte) in part.bytes().enumerate() {
162        let replace = match byte {
163            b'<' => "&lt;",
164            b'>' => "&gt;",
165            b'&' => "&amp;",
166            b'"' => "&quot;",
167            _ => continue,
168        };
169        s.push_str(&part[start..idx]);
170        s.push_str(replace);
171
172        start = idx + 1;
173    }
174
175    s.push_str(&part[start..]);
176}
177
178/// Highlight the code in `source`, placing the output into `buf`.
179#[inline]
180pub fn highlight<'a, Token>(source: &'a str, buf: &mut String)
181where
182    Token: Highlight + Eq + Copy,
183    <Token as Logos<'a>>::Extras: Default,
184{
185    let mut lex = Token::lexer(source);
186    let mut open = Kind::None;
187    let mut last = 0usize;
188    let mut tokens = [Token::START; 2];
189
190    while let Some(token) = lex.next() {
191        if tokens[1] != Token::START {
192            tokens[0] = tokens[1];
193        }
194        tokens[1] = token.unwrap_or(Token::START);
195
196        let kind = Token::kind(&tokens);
197
198        if open != kind {
199            // Close previous tag
200            if open != Kind::None {
201                buf.push_str("</span>");
202            }
203
204            // Include trivia
205            write_escaped(buf, &source[last..lex.span().start]);
206
207            // Open new tag
208            if let Some(tag) = HIGHLIGHT_CLASS[kind as usize] {
209                buf.push_str("<span class=\"");
210                buf.push_str(tag);
211                buf.push_str("\">");
212            }
213
214            open = kind;
215
216            write_escaped(buf, lex.slice());
217        } else {
218            // Include trivia
219            write_escaped(buf, &source[last..lex.span().end]);
220        }
221
222        last = lex.span().end;
223    }
224
225    // Close tail tag
226    if open != Kind::None {
227        buf.push_str("</span>");
228    }
229}