1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Skip text characters for text token, place those to pending buffer
// and increment current pos
//
use regex::{self, Regex};

use crate::parser::inline::{InlineRule, InlineState};
use crate::{MarkdownIt, Node, NodeValue, Renderer};

#[derive(Debug)]
/// Plain text AST node.
pub struct Text {
    pub content: String
}

impl NodeValue for Text {
    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
        fmt.text(&self.content);
    }
}

#[derive(Debug)]
/// Escaped text AST node (backslash escapes and entities).
pub struct TextSpecial {
    pub content: String,
    pub markup: String,
    pub info: &'static str,
}

impl NodeValue for TextSpecial {
    fn render(&self, _: &Node, fmt: &mut dyn Renderer) {
        fmt.text(&self.content);
    }
}

pub fn add(md: &mut MarkdownIt) {
    md.inline.add_rule::<TextScanner>()
        .before_all();
}

#[derive(Debug)]
pub(crate) enum TextScannerImpl {
    SkipPunct,
    SkipRegex(Regex),
}

// Rule to skip pure text
// '{}$%@~+=:' reserved for extentions
//
// !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~
//
// !!!! Don't confuse with "Markdown ASCII Punctuation" chars
// http://spec.commonmark.org/0.15/#ascii-punctuation-character
//
pub struct TextScanner;

impl TextScanner {
    fn choose_text_impl(charmap: Vec<char>) -> TextScannerImpl {
        let mut can_use_punct = true;
        for ch in charmap.iter() {
            match ch {
                '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' |
                ':' | '<' | '=' | '>' | '@' | '[' | '\\' | ']' | '^' |
                '_' | '`' | '{' | '}' | '~' => {},
                _ => {
                    can_use_punct = false;
                    break;
                }
            }
        }

        if can_use_punct {
            TextScannerImpl::SkipPunct
        } else {
            TextScannerImpl::SkipRegex(
                Regex::new(
                    // [] panics on "unclosed character class", but it cannot happen here
                    // (we'd use punct rule instead)
                    &format!("^[^{}]+", charmap.into_iter().map(
                        |c| regex::escape(&c.to_string())
                    ).collect::<String>())
                ).unwrap()
            )
        }
    }

    fn find_text_length(state: &mut InlineState) -> usize {
        let text_impl = state.md.inline.text_impl.get_or_init(
            || Self::choose_text_impl(state.md.inline.text_charmap.keys().copied().collect())
        );

        let mut len = 0;

        match text_impl {
            TextScannerImpl::SkipPunct => {
                let mut chars = state.src[state.pos..state.pos_max].chars();

                loop {
                    match chars.next() {
                        Some(
                            '\n' | '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' |
                            ':' | '<' | '=' | '>' | '@' | '[' | '\\' | ']' | '^' |
                            '_' | '`' | '{' | '}' | '~'
                        ) => {
                            break;
                        }
                        Some(chr) => {
                            len += chr.len_utf8();
                        }
                        None => { break; }
                    }
                }
            }
            TextScannerImpl::SkipRegex(re) => {
                if let Some(capture) = re.find(&state.src[state.pos..state.pos_max]) {
                    len = capture.end();
                }
            }
        }

        len
    }
}

impl InlineRule for TextScanner {
    const MARKER: char = '\0';

    fn check(state: &mut InlineState) -> Option<usize> {
        let len = Self::find_text_length(state);
        if len == 0 { return None; }
        Some(len)
    }

    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
        let len = Self::find_text_length(state);
        if len == 0 { return None; }
        state.trailing_text_push(state.pos, state.pos + len);
        state.pos += len;
        Some((Node::default(), 0))
    }
}