markdown_that/parser/inline/
mod.rs

1//! Inline rule chain
2
3use std::collections::HashMap;
4use std::sync::OnceLock;
5
6mod state;
7pub use state::*;
8
9mod rule;
10pub use rule::*;
11
12#[doc(hidden)]
13pub mod builtin;
14
15pub use builtin::inline_parser::InlineRoot;
16use builtin::skip_text::TextScannerImpl;
17pub use builtin::skip_text::{Text, TextSpecial};
18
19use crate::common::TypeKey;
20use crate::common::ruler::Ruler;
21use crate::parser::extset::{InlineRootExtSet, RootExtSet};
22use crate::{MarkdownThat, Node};
23
24use super::node::NodeEmpty;
25
26type RuleFns = (
27    fn(&mut InlineState) -> Option<usize>,
28    fn(&mut InlineState) -> Option<(Node, usize)>,
29);
30
31#[derive(Debug, Default)]
32/// Inline-level tokenizer.
33pub struct InlineParser {
34    ruler: Ruler<TypeKey, RuleFns>,
35    text_charmap: HashMap<char, Vec<TypeKey>>,
36    text_impl: OnceLock<TextScannerImpl>,
37}
38
39impl InlineParser {
40    pub fn new() -> Self {
41        Self::default()
42    }
43
44    /// Skip a single token by running all rules in validation mode;
45    /// returns `true` if any rule reported success
46    ///
47    pub fn skip_token(&self, state: &mut InlineState) {
48        stacker::maybe_grow(64 * 1024, 1024 * 1024, || {
49            let mut ok = None;
50
51            if state.level < state.md.max_nesting {
52                for rule in self.ruler.iter() {
53                    ok = rule.0(state);
54                    if ok.is_some() {
55                        break;
56                    }
57                }
58            } else {
59                // Too much nesting, just skip until the end of the paragraph.
60                //
61                // NOTE: this will cause links to behave incorrectly in the following case,
62                //       when an amount of `[` is exactly equal to `maxNesting + 1`:
63                //
64                //       [[[[[[[[[[[[[[[[[[[[[foo]()
65                //
66                // TODO: remove this workaround when CM standard will allow nested links
67                //       (we can replace it by preventing links from being parsed in
68                //       validation mode)
69                //
70                state.pos = state.pos_max;
71            }
72
73            if let Some(len) = ok {
74                state.pos += len;
75            } else {
76                let ch = state.src[state.pos..state.pos_max].chars().next().unwrap();
77                state.pos += ch.len_utf8();
78            }
79        });
80    }
81
82    /// Generate tokens for input range
83    ///
84    pub fn tokenize(&self, state: &mut InlineState) {
85        stacker::maybe_grow(64 * 1024, 1024 * 1024, || {
86            let end = state.pos_max;
87
88            while state.pos < end {
89                // Try all possible rules.
90                // On success, the rule should:
91                //
92                // - update `state.pos`
93                // - update `state.tokens`
94                // - return true
95                let mut ok = None;
96
97                if state.level < state.md.max_nesting {
98                    for rule in self.ruler.iter() {
99                        ok = rule.1(state);
100                        if ok.is_some() {
101                            break;
102                        }
103                    }
104                }
105
106                if let Some((mut node, len)) = ok {
107                    state.pos += len;
108                    if !node.is::<NodeEmpty>() {
109                        node.srcmap = state.get_map(state.pos - len, state.pos);
110                        state.node.children.push(node);
111                        if state.pos >= end {
112                            break;
113                        }
114                    }
115                    continue;
116                }
117
118                let ch = state.src[state.pos..state.pos_max].chars().next().unwrap();
119                let len = ch.len_utf8();
120                state.trailing_text_push(state.pos, state.pos + len);
121                state.pos += len;
122            }
123        });
124    }
125
126    /// Process input string and push inline tokens into `out_tokens`
127    ///
128    pub fn parse(
129        &self,
130        src: String,
131        srcmap: Vec<(usize, usize)>,
132        node: Node,
133        md: &MarkdownThat,
134        root_ext: &mut RootExtSet,
135        inline_ext: &mut InlineRootExtSet,
136    ) -> Node {
137        let mut state = InlineState::new(src, srcmap, md, root_ext, inline_ext, node);
138        self.tokenize(&mut state);
139        state.node
140    }
141
142    pub fn add_rule<T: InlineRule>(&mut self) -> RuleBuilder<RuleFns> {
143        if T::MARKER != '\0' {
144            let charvec = self.text_charmap.entry(T::MARKER).or_default();
145            charvec.push(TypeKey::of::<T>());
146        }
147
148        let item = self.ruler.add(TypeKey::of::<T>(), (T::check, T::run));
149        RuleBuilder::new(item)
150    }
151
152    pub fn has_rule<T: InlineRule>(&mut self) -> bool {
153        self.ruler.contains(TypeKey::of::<T>())
154    }
155
156    pub fn remove_rule<T: InlineRule>(&mut self) {
157        if T::MARKER != '\0' {
158            let mut charvec = self.text_charmap.remove(&T::MARKER).unwrap_or_default();
159            charvec.retain(|x| *x != TypeKey::of::<T>());
160            self.text_charmap.insert(T::MARKER, charvec);
161        }
162
163        self.ruler.remove(TypeKey::of::<T>());
164    }
165}