1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Skip text characters for text token, place those to pending buffer
// and increment current pos
//
use regex::{self, Regex};
use crate::{MarkdownIt, Node, NodeValue, Renderer};
use crate::parser::inline::{InlineRule, InlineState};

#[derive(Debug)]
/// Plain text AST node.
pub struct Text {
    pub content: String
}

impl NodeValue for Text {
    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
        fmt.text(&self.content);
    }
}

#[derive(Debug)]
/// Escaped text AST node (backslash escapes and entities).
pub struct TextSpecial {
    pub content: String,
    pub markup: String,
    pub info: &'static str,
}

impl NodeValue for TextSpecial {
    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
        fmt.text(&self.content);
    }
}

pub fn add(md: &mut MarkdownIt) {
    md.inline.add_rule::<TextScanner>()
        .before_all();
}

#[derive(Debug)]
pub(crate) enum TextScannerImpl {
    SkipPunct,
    SkipRegex(Regex),
}

// Rule to skip pure text
// '{}$%@~+=:' reserved for extentions
//
// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
//
// !!!! Don't confuse with "Markdown ASCII Punctuation" chars
// http://spec.commonmark.org/0.15/#ascii-punctuation-character
//
pub struct TextScanner;
impl InlineRule for TextScanner {
    const MARKER: char = '\0';

    fn run(state: &mut InlineState, silent: bool) -> bool {
        let text_impl = state.md.inline.text_impl.get_or_init(
            || choose_text_impl(state.md.inline.text_charmap.keys().copied().collect())
        );

        let mut pos = state.pos;

        match text_impl {
            TextScannerImpl::SkipPunct => {
                let mut chars = state.src[pos..state.pos_max].chars();

                loop {
                    match chars.next() {
                        Some(
                            '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' |
                            ':' | '<' | '=' | '>' | '@' | '[' | '\\' | ']' | '^' |
                            '_' | '`' | '{' | '}' | '~'
                        ) => {
                            break;
                        }
                        Some(chr) => {
                            pos += chr.len_utf8();
                        }
                        None => { break; }
                    }
                }
            }
            TextScannerImpl::SkipRegex(re) => {
                if let Some(capture) = re.find(&state.src[pos..state.pos_max]) {
                    pos += capture.end();
                }
            }
        }

        if pos == state.pos { return false; }

        if !silent { state.trailing_text_push(state.pos, pos); }
        state.pos = pos;

        true
    }
}

fn choose_text_impl(charmap: Vec<char>) -> TextScannerImpl {
    let mut can_use_punct = true;
    for ch in charmap.iter() {
        match ch {
            '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' |
            ':' | '<' | '=' | '>' | '@' | '[' | '\\' | ']' | '^' |
            '_' | '`' | '{' | '}' | '~' => {},
            _ => {
                can_use_punct = false;
                break;
            }
        }
    }

    if can_use_punct {
        TextScannerImpl::SkipPunct
    } else {
        TextScannerImpl::SkipRegex(
            Regex::new(
                // [] panics on "unclosed character class", but it cannot happen here
                // (we'd use punct rule instead)
                &format!("^[^{}]+", charmap.into_iter().map(
                    |c| regex::escape(&c.to_string())
                ).collect::<String>())
            ).unwrap()
        )
    }
}