markdown_it/parser/inline/builtin/
skip_text.rs

1//! Skip text characters for text token, place those to pending buffer
2//! and increment current pos
3//!
4use regex::{self, Regex};
5
6use crate::parser::inline::{InlineRule, InlineState};
7use crate::{MarkdownIt, Node, NodeValue, Renderer};
8
9#[derive(Debug)]
10/// Plain text AST node.
11pub struct Text {
12    pub content: String
13}
14
15impl NodeValue for Text {
16    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
17        fmt.text(&self.content);
18    }
19}
20
21#[derive(Debug)]
22/// Escaped text AST node (backslash escapes and entities).
23pub struct TextSpecial {
24    pub content: String,
25    pub markup: String,
26    pub info: &'static str,
27}
28
29impl NodeValue for TextSpecial {
30    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
31        fmt.text(&self.content);
32    }
33}
34
35pub fn add(md: &mut MarkdownIt) {
36    md.inline.add_rule::<TextScanner>()
37        .before_all();
38}
39
40#[derive(Debug)]
41pub(crate) enum TextScannerImpl {
42    SkipPunct,
43    SkipRegex(Regex),
44}
45
46/// Rule to skip pure text
47/// '{}$%@~+=:' reserved for extensions
48///
49/// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
50///
51/// !!!! Don't confuse with "Markdown ASCII Punctuation" chars
52/// http://spec.commonmark.org/0.15/#ascii-punctuation-character
53///
54pub struct TextScanner;
55
56impl TextScanner {
57    fn choose_text_impl(charmap: Vec<char>) -> TextScannerImpl {
58        let mut can_use_punct = true;
59        for ch in charmap.iter() {
60            match ch {
61                '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' |
62                ':' | '<' | '=' | '>' | '@' | '[' | '\\' | ']' | '^' |
63                '_' | '`' | '{' | '}' | '~' => {},
64                _ => {
65                    can_use_punct = false;
66                    break;
67                }
68            }
69        }
70
71        if can_use_punct {
72            TextScannerImpl::SkipPunct
73        } else {
74            TextScannerImpl::SkipRegex(
75                Regex::new(
76                    // [] panics on "unclosed character class", but it cannot happen here
77                    // (we'd use punct rule instead)
78                    &format!("^[^{}]+", charmap.into_iter().map(
79                        |c| regex::escape(&c.to_string())
80                    ).collect::<String>())
81                ).unwrap()
82            )
83        }
84    }
85
86    fn find_text_length(state: &mut InlineState) -> usize {
87        let text_impl = state.md.inline.text_impl.get_or_init(
88            || Self::choose_text_impl(state.md.inline.text_charmap.keys().copied().collect())
89        );
90
91        let mut len = 0;
92
93        match text_impl {
94            TextScannerImpl::SkipPunct => {
95                let mut chars = state.src[state.pos..state.pos_max].chars();
96
97                loop {
98                    match chars.next() {
99                        Some(
100                            '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' |
101                            ':' | '<' | '=' | '>' | '@' | '[' | '\\' | ']' | '^' |
102                            '_' | '`' | '{' | '}' | '~'
103                        ) => {
104                            break;
105                        }
106                        Some(chr) => {
107                            len += chr.len_utf8();
108                        }
109                        None => { break; }
110                    }
111                }
112            }
113            TextScannerImpl::SkipRegex(re) => {
114                if let Some(capture) = re.find(&state.src[state.pos..state.pos_max]) {
115                    len = capture.end();
116                }
117            }
118        }
119
120        len
121    }
122}
123
124impl InlineRule for TextScanner {
125    const MARKER: char = '\0';
126
127    fn check(state: &mut InlineState) -> Option<usize> {
128        let len = Self::find_text_length(state);
129        if len == 0 { return None; }
130        Some(len)
131    }
132
133    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
134        let len = Self::find_text_length(state);
135        if len == 0 { return None; }
136        state.trailing_text_push(state.pos, state.pos + len);
137        state.pos += len;
138        Some((Node::default(), 0))
139    }
140}