markdown_it/parser/block/
state.rs

1// Parser state class
2//
3use crate::common::sourcemap::SourcePos;
4use crate::common::utils::calc_right_whitespace_with_tabstops;
5use crate::parser::extset::RootExtSet;
6use crate::{MarkdownIt, Node};
7
8#[derive(Debug)]
9#[readonly::make]
10/// Sandbox object containing data required to parse block structures.
11pub struct BlockState<'a, 'b> where 'b: 'a {
12    /// Markdown source.
13    #[readonly]
14    pub src: &'b str,
15
16    /// Link to parser instance.
17    #[readonly]
18    pub md: &'a MarkdownIt,
19
20    pub root_ext: &'b mut RootExtSet,
21
22    /// Current node, your rule is supposed to add children to it.
23    pub node: Node,
24
25    pub line_offsets: Vec<LineOffset>,
26
27    /// Current block content indent (for example, if we are
28    /// inside a list, it would be positioned after list marker).
29    pub blk_indent: usize,
30
31    /// Current line in src.
32    pub line: usize,
33
34    /// Maximum allowed line in src.
35    pub line_max: usize,
36
37    /// True if there are no empty lines between paragraphs, used to
38    /// toggle loose/tight mode for lists.
39    pub tight: bool,
40
41    /// indent of the current list block.
42    pub list_indent: Option<u32>,
43
44    pub level: u32,
45}
46
47/// Holds start/end/etc. positions for a specific source text line.
48#[derive(Debug, Clone)]
49pub struct LineOffset {
50    /// `line_start` is the actual start of the line.
51    ///
52    ///     # const IGNORE : &str = stringify! {
53    ///     "  >  blockquote\r\n"
54    ///      ^-- it will always point here (must not be modified by rules)
55    ///     # };
56    pub line_start: usize,
57
58    /// `line_end` is first newline character after the line,
59    /// or position after string length if there aren't any newlines left.
60    ///
61    ///     # const IGNORE : &str = stringify! {
62    ///     "  >  blockquote\r\n"
63    ///                     ^-- it will point here
64    ///     # };
65    pub line_end: usize,
66
67    /// `first_nonspace` is the byte offset of the first non-space character in
68    /// the current line.
69    ///
70    ///     # const IGNORE : &str = stringify! {
71    ///     "   >  blockquote\r\n"
72    ///            ^-- it will point here when paragraph is parsed
73    ///         ^----- it is initially pointed here
74    ///     # };
75    ///
76    /// It will be modified by rules (list and blockquote), chars before it
77    /// must be treated as whitespaces.
78    ///
79    pub first_nonspace: usize,
80
81    /// `indent_nonspace` is the indent (amount of virtual spaces from start)
82    /// of first non-space character in the current line, taking into account
83    /// tab expansion.
84    ///
85    /// For example, in case of ` \t foo`, indent is 5 (tab ends at multiple of 4,
86    /// then one space after it). Only tabs and spaces are counted for it,
87    /// so no funny unicode business (if cmark supported unicode spaces, they'd
88    /// be counted as 1 each regardless of utf8 width).
89    ///
90    /// You should compare `indent_nonspace` with `state.blkindent` when determining
91    /// real indent after taking into account lists.
92    ///
93    /// Most block rules in commonmark are indented 0..=3, and >=4 is code block.
94    /// Special value of ident_nonspace=-1 is used by this library as a sign
95    /// that this rule can only be a paragraph continuation (used in blockquotes),
96    /// so you must take into account that any math can end up negative.
97    ///
98    pub indent_nonspace: i32,
99}
100
101impl<'a, 'b> BlockState<'a, 'b> {
102    pub fn new(src: &'b str, md: &'a MarkdownIt, root_ext: &'b mut RootExtSet, node: Node) -> Self {
103        let mut result = Self {
104            src,
105            md,
106            root_ext,
107            node,
108            line_offsets: Vec::new(),
109            blk_indent: 0,
110            line: 0,
111            line_max: 0,
112            tight: false,
113            list_indent: None,
114            level: 0,
115        };
116
117        result.generate_caches();
118        result
119    }
120
121    fn generate_caches(&mut self) {
122        // Create caches
123        // Generate markers.
124        let mut chars = self.src.chars().peekable();
125        let mut indent_found = false;
126        let mut indent = 0;
127        let mut offset = 0;
128        let mut start = 0;
129        let mut pos = 0;
130
131        loop {
132            match chars.next() {
133                Some(ch @ (' ' | '\t')) if !indent_found => {
134                    indent += 1;
135                    offset += if ch == '\t' { 4 - offset % 4 } else { 1 };
136                    pos += 1;
137                }
138                ch @ (Some('\n' | '\r') | None) => {
139                    self.line_offsets.push(LineOffset {
140                        line_start: start,
141                        line_end: pos,
142                        first_nonspace: start + indent,
143                        indent_nonspace: offset,
144                    });
145
146                    if ch == Some('\r') && chars.peek() == Some(&'\n') {
147                        // treat CR+LF as one linebreak
148                        chars.next();
149                        pos += 1;
150                    }
151
152                    indent_found = false;
153                    indent = 0;
154                    offset = 0;
155                    start = pos + 1;
156                    pos += 1;
157
158                    if ch.is_none() || chars.peek().is_none() {
159                        break;
160                    }
161                }
162                Some(ch) => {
163                    indent_found = true;
164                    pos += ch.len_utf8();
165                }
166            }
167        }
168
169        self.line_max = self.line_offsets.len();
170    }
171
172    #[must_use]
173    pub fn test_rules_at_line(&mut self) -> bool {
174        for rule in self.md.block.ruler.iter() {
175            if rule.0(self).is_some() {
176                return true;
177            }
178        }
179        false
180    }
181
182    #[must_use]
183    #[inline]
184    pub fn is_empty(&self, line: usize) -> bool {
185        if let Some(offsets) = self.line_offsets.get(line) {
186            offsets.first_nonspace >= offsets.line_end
187        } else {
188            false
189        }
190    }
191
192    pub fn skip_empty_lines(&self, from: usize) -> usize {
193        let mut line = from;
194        while line != self.line_max && self.is_empty(line) {
195            line += 1;
196        }
197        line
198    }
199
200    /// return line indent of specific line, taking into account blockquotes and lists;
201    /// it may be negative if a text has less indentation than current list item
202    #[must_use]
203    #[inline]
204    pub fn line_indent(&self, line: usize) -> i32 {
205        if line < self.line_max {
206            self.line_offsets[line].indent_nonspace - self.blk_indent as i32
207        } else {
208            0
209        }
210    }
211
212    /// return a single line, trimming initial spaces
213    #[must_use]
214    #[inline]
215    pub fn get_line(&self, line: usize) -> &str {
216        if line < self.line_max {
217            let pos = self.line_offsets[line].first_nonspace;
218            let max = self.line_offsets[line].line_end;
219            &self.src[pos..max]
220        } else {
221            ""
222        }
223    }
224
225    /// Cut a range of lines begin..end (not including end) from the source without preceding indent.
226    /// Returns a string (lines) plus a mapping (start of each line in result -> start of each line in source).
227    pub fn get_lines(&self, begin: usize, end: usize, indent: usize, keep_last_lf: bool) -> (String, Vec<(usize, usize)>) {
228        debug_assert!(begin <= end);
229
230        let mut line = begin;
231        let mut result = String::new();
232        let mut mapping = Vec::new();
233
234        while line < end {
235            let offsets = &self.line_offsets[line];
236            let last = offsets.line_end;
237            let add_last_lf = line + 1 < end || keep_last_lf;
238
239            let (num_spaces, first) = calc_right_whitespace_with_tabstops(
240                &self.src[offsets.line_start..offsets.first_nonspace],
241                offsets.indent_nonspace - indent as i32
242            );
243
244            mapping.push(( result.len(), offsets.line_start+first ));
245            result += &" ".repeat(num_spaces);
246            result += &self.src[offsets.line_start+first..last];
247            if add_last_lf { result.push('\n'); }
248            line += 1;
249        }
250
251        ( result, mapping )
252    }
253
254    #[must_use]
255    #[inline]
256    pub fn get_map(&self, start_line: usize, end_line: usize) -> Option<SourcePos> {
257        debug_assert!(start_line <= end_line);
258
259        Some(SourcePos::new(
260            self.line_offsets[start_line].first_nonspace,
261            self.line_offsets[end_line].line_end
262        ))
263    }
264
265    #[must_use]
266    #[inline]
267    pub fn get_map_from_offsets(&self, start_pos: usize, end_pos: usize) -> Option<SourcePos> {
268        debug_assert!(start_pos <= end_pos);
269
270        Some(SourcePos::new(start_pos, end_pos))
271    }
272}