markdown_it/parser/inline/
state.rs

1// Inline parser state
2//
3use crate::common::sourcemap::SourcePos;
4use crate::common::utils::is_punct_char;
5use crate::parser::extset::{InlineRootExtSet, RootExtSet};
6use crate::parser::inline::Text;
7use crate::{MarkdownIt, Node};
8
9#[derive(Debug, Clone, Copy)]
10/// Information about emphasis delimiter run returned from [InlineState::scan_delims].
11pub struct DelimiterRun {
12    /// Starting marker character.
13    pub marker: char,
14
15    /// Boolean flag that determines if this delimiter could open an emphasis.
16    pub can_open: bool,
17
18    /// Boolean flag that determines if this delimiter could open an emphasis.
19    pub can_close: bool,
20
21    /// Total length of scanned delimiters.
22    pub length: usize,
23}
24
25#[derive(Debug)]
26#[readonly::make]
27/// Sandbox object containing data required to parse inline structures.
28pub struct InlineState<'a, 'b> where 'b: 'a {
29    /// Markdown source.
30    #[readonly]
31    pub src: String,
32
33    /// Link to parser instance.
34    #[readonly]
35    pub md: &'a MarkdownIt,
36
37    /// Current node, your rule is supposed to add children to it.
38    pub node: Node,
39
40    /// For each line, it holds offset of the start of the line in original
41    /// markdown source and offset of the start of the line in `src`.
42    pub srcmap: Vec<(usize, usize)>,
43    pub root_ext: &'b mut RootExtSet,
44    pub inline_ext: &'b mut InlineRootExtSet,
45
46    /// Current byte offset in `src`, it must respect char boundaries.
47    pub pos: usize,
48
49    /// Maximum allowed byte offset in `src`, it must respect char boundaries.
50    pub pos_max: usize,
51
52    /// Counter used to disable inline linkifier execution
53    /// inside raw html and markdown links.
54    pub link_level: i32,
55
56    /// Counter used to prevent recursion by image and link rules.
57    pub level: u32,
58}
59
60impl<'a, 'b> InlineState<'a, 'b> {
61    pub fn new(
62        src: String,
63        srcmap: Vec<(usize, usize)>,
64        md: &'a MarkdownIt,
65        root_ext: &'b mut RootExtSet,
66        inline_ext: &'b mut InlineRootExtSet,
67        node: Node,
68    ) -> Self {
69        let mut result = Self {
70            pos:        0,
71            pos_max:    src.len(),
72            src,
73            srcmap,
74            root_ext,
75            inline_ext,
76            md,
77            node,
78            link_level: 0,
79            level:      0,
80        };
81
82        result.trim_src();
83        result
84    }
85
86    fn trim_src(&mut self) {
87        let mut chars = self.src.as_bytes().iter();
88        while let Some(b' ' | b'\t') = chars.next_back() {
89            self.pos_max -= 1;
90        }
91        while let Some(b' ' | b'\t') = chars.next() {
92            self.pos += 1;
93        }
94    }
95
96    pub fn trailing_text_push(&mut self, start: usize, end: usize) {
97        if let Some(text) = self.node.children.last_mut()
98                                       .and_then(|t| t.cast_mut::<Text>()) {
99            text.content.push_str(&self.src[start..end]);
100
101            if let Some(map) = self.node.children.last_mut().unwrap().srcmap {
102                let (map_start, _) = map.get_byte_offsets();
103                let map_end = self.get_source_pos_for(end);
104                self.node.children.last_mut().unwrap().srcmap = Some(SourcePos::new(map_start, map_end));
105            }
106        } else {
107            let mut node = Node::new(Text { content: self.src[start..end].to_owned() });
108            node.srcmap = self.get_map(start, end);
109            self.node.children.push(node);
110        }
111    }
112
113    pub fn trailing_text_pop(&mut self, count: usize) {
114        if count == 0 { return; }
115
116        let mut node = self.node.children.pop().unwrap();
117        let text = node.cast_mut::<Text>().unwrap();
118        if text.content.len() == count {
119            // do nothing, just remove the node
120            drop(node);
121        } else {
122            // modify the token and reinsert it later
123            text.content.truncate(text.content.len() - count);
124            if let Some(map) = node.srcmap {
125                let (map_start, map_end) = map.get_byte_offsets();
126                let map_end = self.get_source_pos_for(map_end - count);
127                node.srcmap = Some(SourcePos::new(map_start, map_end));
128            }
129            self.node.children.push(node);
130        }
131    }
132
133    #[must_use]
134    pub fn trailing_text_get(&self) -> &str {
135        if let Some(text) = self.node.children.last()
136                                .and_then(|t| t.cast::<Text>()) {
137            text.content.as_str()
138        } else {
139            ""
140        }
141    }
142
143    /// Scan a sequence of emphasis-like markers, and determine whether
144    /// it can start an emphasis sequence or end an emphasis sequence.
145    ///
146    ///  - start - position to scan from (it should point at a valid marker);
147    ///  - can_split_word - determine if these markers can be found inside a word
148    ///
149    #[must_use]
150    pub fn scan_delims(&self, start: usize, can_split_word: bool) -> DelimiterRun {
151        let mut left_flanking = true;
152        let mut right_flanking = true;
153
154        let last_char = if start > 0 {
155            self.src[..start].chars().next_back().unwrap()
156        } else {
157            // treat beginning of the line as a whitespace
158            ' '
159        };
160
161        let mut chars = self.src[start..self.pos_max].chars();
162        let marker = chars.next().unwrap();
163        let next_char;
164        let mut count = 1;
165
166        loop {
167            match chars.next() {
168                None => {
169                    next_char = ' ';
170                    break;
171                }
172                Some(x) => {
173                    if x != marker {
174                        // treat end of the line as a whitespace
175                        next_char = x;
176                        break;
177                    }
178                }
179            }
180            count += 1;
181        }
182
183        let is_last_punct_char = last_char.is_ascii_punctuation() || is_punct_char(last_char);
184        let is_next_punct_char = next_char.is_ascii_punctuation() || is_punct_char(next_char);
185
186        let is_last_whitespace = last_char.is_whitespace();
187        let is_next_whitespace = next_char.is_whitespace();
188
189        #[allow(clippy::collapsible_if)]
190        if is_next_whitespace {
191            left_flanking = false;
192        } else if is_next_punct_char {
193            if !(is_last_whitespace || is_last_punct_char) {
194                left_flanking = false;
195            }
196        }
197
198        #[allow(clippy::collapsible_if)]
199        if is_last_whitespace {
200            right_flanking = false;
201        } else if is_last_punct_char {
202            if !(is_next_whitespace || is_next_punct_char) {
203                right_flanking = false;
204            }
205        }
206
207        let can_open;
208        let can_close;
209
210        if !can_split_word {
211            can_open  = left_flanking  && (!right_flanking || is_last_punct_char);
212            can_close = right_flanking && (!left_flanking  || is_next_punct_char);
213        } else {
214            can_open  = left_flanking;
215            can_close = right_flanking;
216        }
217
218        DelimiterRun {
219            marker,
220            can_open,
221            can_close,
222            length: count
223        }
224    }
225
226    #[must_use]
227    fn get_source_pos_for(&self, pos: usize) -> usize {
228        let line = match self.srcmap.binary_search_by(|x| x.0.cmp(&pos)) {
229            Ok(x) => x,
230            Err(x) => x - 1,
231        };
232        self.srcmap[line].1 + (pos - self.srcmap[line].0)
233    }
234
235    #[must_use]
236    pub fn get_map(&self, start_pos: usize, end_pos: usize) -> Option<SourcePos> {
237        debug_assert!(start_pos <= end_pos);
238
239        Some(SourcePos::new(
240            self.get_source_pos_for(start_pos),
241            self.get_source_pos_for(end_pos)
242        ))
243    }
244}