markdown_that/parser/inline/builtin/
skip_text.rs

1//! Skip text characters for text token, place those to pending buffer
2//! and increment current pos
3//!
4use regex::{self, Regex};
5
6use crate::parser::inline::{InlineRule, InlineState};
7use crate::{MarkdownThat, Node, NodeValue, Renderer};
8
9#[derive(Debug)]
10/// Plain text AST node.
11pub struct Text {
12    pub content: String,
13}
14
15impl NodeValue for Text {
16    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
17        fmt.text(&self.content);
18    }
19}
20
21#[derive(Debug)]
22/// Escaped text AST node (backslash escapes and entities).
23pub struct TextSpecial {
24    pub content: String,
25    pub markup: String,
26    pub info: &'static str,
27}
28
29impl NodeValue for TextSpecial {
30    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
31        fmt.text(&self.content);
32    }
33}
34
35pub fn add(md: &mut MarkdownThat) {
36    md.inline.add_rule::<TextScanner>().before_all();
37}
38
39#[derive(Debug)]
40pub(crate) enum TextScannerImpl {
41    SkipPunct,
42    SkipRegex(Regex),
43}
44
45/// Rule to skip pure text
46/// '{}$%@~+=:' reserved for extensions
47///
48/// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
49///
50/// !!!! Don't confuse with "Markdown ASCII Punctuation" chars
51/// http://spec.commonmark.org/0.15/#ascii-punctuation-character
52///
53pub struct TextScanner;
54
55impl TextScanner {
56    fn choose_text_impl(charmap: Vec<char>) -> TextScannerImpl {
57        let mut can_use_punct = true;
58        for ch in charmap.iter() {
59            match ch {
60                '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | ':' | '<' | '=' | '>'
61                | '@' | '[' | '\\' | ']' | '^' | '_' | '`' | '{' | '}' | '~' => {}
62                _ => {
63                    can_use_punct = false;
64                    break;
65                }
66            }
67        }
68
69        if can_use_punct {
70            TextScannerImpl::SkipPunct
71        } else {
72            TextScannerImpl::SkipRegex(
73                Regex::new(
74                    // [] panics on "unclosed character class", but it cannot happen here
75                    // (we'd use punct rule instead)
76                    &format!(
77                        "^[^{}]+",
78                        charmap
79                            .into_iter()
80                            .map(|c| regex::escape(&c.to_string()))
81                            .collect::<String>()
82                    ),
83                )
84                .unwrap(),
85            )
86        }
87    }
88
89    fn find_text_length(state: &mut InlineState) -> usize {
90        let text_impl = state.md.inline.text_impl.get_or_init(|| {
91            Self::choose_text_impl(state.md.inline.text_charmap.keys().copied().collect())
92        });
93
94        let mut len = 0;
95
96        match text_impl {
97            TextScannerImpl::SkipPunct => {
98                let mut chars = state.src[state.pos..state.pos_max].chars();
99
100                loop {
101                    match chars.next() {
102                        Some(
103                            '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | ':' | '<' | '='
104                            | '>' | '@' | '[' | '\\' | ']' | '^' | '_' | '`' | '{' | '}' | '~',
105                        ) => {
106                            break;
107                        }
108                        Some(chr) => {
109                            len += chr.len_utf8();
110                        }
111                        None => {
112                            break;
113                        }
114                    }
115                }
116            }
117            TextScannerImpl::SkipRegex(re) => {
118                if let Some(capture) = re.find(&state.src[state.pos..state.pos_max]) {
119                    len = capture.end();
120                }
121            }
122        }
123
124        len
125    }
126}
127
128impl InlineRule for TextScanner {
129    const MARKER: char = '\0';
130
131    fn check(state: &mut InlineState) -> Option<usize> {
132        let len = Self::find_text_length(state);
133        if len == 0 {
134            return None;
135        }
136        Some(len)
137    }
138
139    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
140        let len = Self::find_text_length(state);
141        if len == 0 {
142            return None;
143        }
144        state.trailing_text_push(state.pos, state.pos + len);
145        state.pos += len;
146        Some((Node::default(), 0))
147    }
148}