markdown_that/parser/inline/
state.rs

1// Inline parser state
2//
3use crate::common::sourcemap::SourcePos;
4use crate::common::utils::is_punct_char;
5use crate::parser::extset::{InlineRootExtSet, RootExtSet};
6use crate::parser::inline::Text;
7use crate::{MarkdownThat, Node};
8
9#[derive(Debug, Clone, Copy)]
10/// Information about emphasis delimiter run returned from [InlineState::scan_delims].
11pub struct DelimiterRun {
12    /// Starting marker character.
13    pub marker: char,
14
15    /// Boolean flag that determines if this delimiter could open an emphasis.
16    pub can_open: bool,
17
18    /// Boolean flag that determines if this delimiter could open an emphasis.
19    pub can_close: bool,
20
21    /// Total length of scanned delimiters.
22    pub length: usize,
23}
24
25#[derive(Debug)]
26#[readonly::make]
27/// Sandbox object containing data required to parse inline structures.
28pub struct InlineState<'a, 'b>
29where
30    'b: 'a,
31{
32    /// Markdown source.
33    #[readonly]
34    pub src: String,
35
36    /// Link to parser instance.
37    #[readonly]
38    pub md: &'a MarkdownThat,
39
40    /// Current node, your rule is supposed to add children to it.
41    pub node: Node,
42
43    /// For each line, it holds an offset of the start of the line in an original
44    /// Markdown source and offset of the start of the line in `src`.
45    pub srcmap: Vec<(usize, usize)>,
46    pub root_ext: &'b mut RootExtSet,
47    pub inline_ext: &'b mut InlineRootExtSet,
48
49    /// Current byte offset in `src`, it must respect char boundaries.
50    pub pos: usize,
51
52    /// Maximum allowed byte offset in `src`, it must respect char boundaries.
53    pub pos_max: usize,
54
55    /// Counter used to disable inline linkifier execution
56    /// inside raw HTML and Markdown links.
57    pub link_level: i32,
58
59    /// Counter used to prevent recursion by image and link rules.
60    pub level: u32,
61}
62
63impl<'a, 'b> InlineState<'a, 'b> {
64    pub fn new(
65        src: String,
66        srcmap: Vec<(usize, usize)>,
67        md: &'a MarkdownThat,
68        root_ext: &'b mut RootExtSet,
69        inline_ext: &'b mut InlineRootExtSet,
70        node: Node,
71    ) -> Self {
72        let mut result = Self {
73            pos: 0,
74            pos_max: src.len(),
75            src,
76            srcmap,
77            root_ext,
78            inline_ext,
79            md,
80            node,
81            link_level: 0,
82            level: 0,
83        };
84
85        result.trim_src();
86        result
87    }
88
89    fn trim_src(&mut self) {
90        let mut chars = self.src.as_bytes().iter();
91        while let Some(b' ' | b'\t') = chars.next_back() {
92            self.pos_max -= 1;
93        }
94        while let Some(b' ' | b'\t') = chars.next() {
95            self.pos += 1;
96        }
97    }
98
99    pub fn trailing_text_push(&mut self, start: usize, end: usize) {
100        if let Some(text) = self
101            .node
102            .children
103            .last_mut()
104            .and_then(|t| t.cast_mut::<Text>())
105        {
106            text.content.push_str(&self.src[start..end]);
107
108            if let Some(map) = self.node.children.last_mut().unwrap().srcmap {
109                let (map_start, _) = map.get_byte_offsets();
110                let map_end = self.get_source_pos_for(end);
111                self.node.children.last_mut().unwrap().srcmap =
112                    Some(SourcePos::new(map_start, map_end));
113            }
114        } else {
115            let mut node = Node::new(Text {
116                content: self.src[start..end].to_owned(),
117            });
118            node.srcmap = self.get_map(start, end);
119            self.node.children.push(node);
120        }
121    }
122
123    pub fn trailing_text_pop(&mut self, count: usize) {
124        if count == 0 {
125            return;
126        }
127
128        let mut node = self.node.children.pop().unwrap();
129        let text = node.cast_mut::<Text>().unwrap();
130        if text.content.len() == count {
131            // do nothing, just remove the node
132            drop(node);
133        } else {
134            // modify the token and reinsert it later
135            text.content.truncate(text.content.len() - count);
136            if let Some(map) = node.srcmap {
137                let (map_start, map_end) = map.get_byte_offsets();
138                let map_end = self.get_source_pos_for(map_end - count);
139                node.srcmap = Some(SourcePos::new(map_start, map_end));
140            }
141            self.node.children.push(node);
142        }
143    }
144
145    #[must_use]
146    pub fn trailing_text_get(&self) -> &str {
147        if let Some(text) = self.node.children.last().and_then(|t| t.cast::<Text>()) {
148            text.content.as_str()
149        } else {
150            ""
151        }
152    }
153
154    /// Scan a sequence of emphasis-like markers and determine whether
155    /// it can start an emphasis sequence or end an emphasis sequence.
156    ///
157    ///  - Start - position to scan from (it should point at a valid marker);
158    ///  - can_split_word - determine if these markers can be found inside a word
159    ///
160    #[must_use]
161    pub fn scan_delims(&self, start: usize, can_split_word: bool) -> DelimiterRun {
162        let mut left_flanking = true;
163        let mut right_flanking = true;
164
165        let last_char = if start > 0 {
166            self.src[..start].chars().next_back().unwrap()
167        } else {
168            // treat the beginning of the line as whitespace
169            ' '
170        };
171
172        let mut chars = self.src[start..self.pos_max].chars();
173        let marker = chars.next().unwrap();
174        let next_char;
175        let mut count = 1;
176
177        loop {
178            match chars.next() {
179                None => {
180                    next_char = ' ';
181                    break;
182                }
183                Some(x) => {
184                    if x != marker {
185                        // treat the end of the line as whitespace
186                        next_char = x;
187                        break;
188                    }
189                }
190            }
191            count += 1;
192        }
193
194        let is_last_punct_char = last_char.is_ascii_punctuation() || is_punct_char(last_char);
195        let is_next_punct_char = next_char.is_ascii_punctuation() || is_punct_char(next_char);
196
197        let is_last_whitespace = last_char.is_whitespace();
198        let is_next_whitespace = next_char.is_whitespace();
199
200        #[allow(clippy::collapsible_if)]
201        if is_next_whitespace {
202            left_flanking = false;
203        } else if is_next_punct_char {
204            if !(is_last_whitespace || is_last_punct_char) {
205                left_flanking = false;
206            }
207        }
208
209        #[allow(clippy::collapsible_if)]
210        if is_last_whitespace {
211            right_flanking = false;
212        } else if is_last_punct_char {
213            if !(is_next_whitespace || is_next_punct_char) {
214                right_flanking = false;
215            }
216        }
217
218        let can_open;
219        let can_close;
220
221        if !can_split_word {
222            can_open = left_flanking && (!right_flanking || is_last_punct_char);
223            can_close = right_flanking && (!left_flanking || is_next_punct_char);
224        } else {
225            can_open = left_flanking;
226            can_close = right_flanking;
227        }
228
229        DelimiterRun {
230            marker,
231            can_open,
232            can_close,
233            length: count,
234        }
235    }
236
237    #[must_use]
238    fn get_source_pos_for(&self, pos: usize) -> usize {
239        let line = self
240            .srcmap
241            .binary_search_by(|x| x.0.cmp(&pos))
242            .unwrap_or_else(|x| x - 1);
243        self.srcmap[line].1 + (pos - self.srcmap[line].0)
244    }
245
246    #[must_use]
247    pub fn get_map(&self, start_pos: usize, end_pos: usize) -> Option<SourcePos> {
248        debug_assert!(start_pos <= end_pos);
249
250        Some(SourcePos::new(
251            self.get_source_pos_for(start_pos),
252            self.get_source_pos_for(end_pos),
253        ))
254    }
255}