markdown_it/parser/inline/
mod.rs

1//! Inline rule chain
2use once_cell::sync::OnceCell;
3use std::collections::HashMap;
4
5mod state;
6pub use state::*;
7
8mod rule;
9pub use rule::*;
10
11#[doc(hidden)]
12pub mod builtin;
13
14pub use builtin::inline_parser::InlineRoot;
15pub use builtin::skip_text::{Text, TextSpecial};
16use builtin::skip_text::TextScannerImpl;
17
18use crate::{MarkdownIt, Node};
19use crate::common::TypeKey;
20use crate::common::ruler::Ruler;
21use crate::parser::extset::{InlineRootExtSet, RootExtSet};
22
23use super::node::NodeEmpty;
24
25type RuleFns = (
26    fn (&mut InlineState) -> Option<usize>,
27    fn (&mut InlineState) -> Option<(Node, usize)>,
28);
29
30#[derive(Debug, Default)]
31/// Inline-level tokenizer.
32pub struct InlineParser {
33    ruler: Ruler<TypeKey, RuleFns>,
34    text_charmap: HashMap<char, Vec<TypeKey>>,
35    text_impl: OnceCell<TextScannerImpl>,
36}
37
38impl InlineParser {
39    pub fn new() -> Self {
40        Self::default()
41    }
42
43    /// Skip single token by running all rules in validation mode;
44    /// returns `true` if any rule reported success
45    ///
46    pub fn skip_token(&self, state: &mut InlineState) {
47        stacker::maybe_grow(64*1024, 1024*1024, || {
48            let mut ok = None;
49
50            if state.level < state.md.max_nesting {
51                for rule in self.ruler.iter() {
52                    ok = rule.0(state);
53                    if ok.is_some() {
54                        break;
55                    }
56                }
57            } else {
58                // Too much nesting, just skip until the end of the paragraph.
59                //
60                // NOTE: this will cause links to behave incorrectly in the following case,
61                //       when an amount of `[` is exactly equal to `maxNesting + 1`:
62                //
63                //       [[[[[[[[[[[[[[[[[[[[[foo]()
64                //
65                // TODO: remove this workaround when CM standard will allow nested links
66                //       (we can replace it by preventing links from being parsed in
67                //       validation mode)
68                //
69                state.pos = state.pos_max;
70            }
71
72            if let Some(len) = ok {
73                state.pos += len;
74            } else {
75                let ch = state.src[state.pos..state.pos_max].chars().next().unwrap();
76                state.pos += ch.len_utf8();
77            }
78        });
79    }
80
81    /// Generate tokens for input range
82    ///
83    pub fn tokenize(&self, state: &mut InlineState) {
84        stacker::maybe_grow(64*1024, 1024*1024, || {
85            let end = state.pos_max;
86
87            while state.pos < end {
88                // Try all possible rules.
89                // On success, rule should:
90                //
91                // - update `state.pos`
92                // - update `state.tokens`
93                // - return true
94                let mut ok = None;
95
96                if state.level < state.md.max_nesting {
97                    for rule in self.ruler.iter() {
98                        ok = rule.1(state);
99                        if ok.is_some() {
100                            break;
101                        }
102                    }
103                }
104
105                if let Some((mut node, len)) = ok {
106                    state.pos += len;
107                    if !node.is::<NodeEmpty>() {
108                        node.srcmap = state.get_map(state.pos - len, state.pos);
109                        state.node.children.push(node);
110                        if state.pos >= end { break; }
111                    }
112                    continue;
113                }
114
115                let ch = state.src[state.pos..state.pos_max].chars().next().unwrap();
116                let len = ch.len_utf8();
117                state.trailing_text_push(state.pos, state.pos + len);
118                state.pos += len;
119            }
120        });
121    }
122
123    /// Process input string and push inline tokens into `out_tokens`
124    ///
125    pub fn parse(
126        &self,
127        src: String,
128        srcmap: Vec<(usize, usize)>,
129        node: Node,
130        md: &MarkdownIt,
131        root_ext: &mut RootExtSet,
132        inline_ext: &mut InlineRootExtSet,
133    ) -> Node {
134        let mut state = InlineState::new(src, srcmap, md, root_ext, inline_ext, node);
135        self.tokenize(&mut state);
136        state.node
137    }
138
139    pub fn add_rule<T: InlineRule>(&mut self) -> RuleBuilder<RuleFns> {
140        if T::MARKER != '\0' {
141            let charvec = self.text_charmap.entry(T::MARKER).or_default();
142            charvec.push(TypeKey::of::<T>());
143        }
144
145        let item = self.ruler.add(TypeKey::of::<T>(), (T::check, T::run));
146        RuleBuilder::new(item)
147    }
148
149    pub fn has_rule<T: InlineRule>(&mut self) -> bool {
150        self.ruler.contains(TypeKey::of::<T>())
151    }
152
153    pub fn remove_rule<T: InlineRule>(&mut self) {
154        if T::MARKER != '\0' {
155            let mut charvec = self.text_charmap.remove(&T::MARKER).unwrap_or_default();
156            charvec.retain(|x| *x != TypeKey::of::<T>());
157            self.text_charmap.insert(T::MARKER, charvec);
158        }
159
160        self.ruler.remove(TypeKey::of::<T>());
161    }
162}