markdown_it/parser/block/state.rs
1// Parser state class
2//
3use crate::common::sourcemap::SourcePos;
4use crate::common::utils::calc_right_whitespace_with_tabstops;
5use crate::parser::extset::RootExtSet;
6use crate::{MarkdownIt, Node};
7
8#[derive(Debug)]
9#[readonly::make]
10/// Sandbox object containing data required to parse block structures.
11pub struct BlockState<'a, 'b> where 'b: 'a {
12 /// Markdown source.
13 #[readonly]
14 pub src: &'b str,
15
16 /// Link to parser instance.
17 #[readonly]
18 pub md: &'a MarkdownIt,
19
20 pub root_ext: &'b mut RootExtSet,
21
22 /// Current node, your rule is supposed to add children to it.
23 pub node: Node,
24
25 pub line_offsets: Vec<LineOffset>,
26
27 /// Current block content indent (for example, if we are
28 /// inside a list, it would be positioned after list marker).
29 pub blk_indent: usize,
30
31 /// Current line in src.
32 pub line: usize,
33
34 /// Maximum allowed line in src.
35 pub line_max: usize,
36
37 /// True if there are no empty lines between paragraphs, used to
38 /// toggle loose/tight mode for lists.
39 pub tight: bool,
40
41 /// indent of the current list block.
42 pub list_indent: Option<u32>,
43
44 pub level: u32,
45}
46
47/// Holds start/end/etc. positions for a specific source text line.
48#[derive(Debug, Clone)]
49pub struct LineOffset {
50 /// `line_start` is the actual start of the line.
51 ///
52 /// # const IGNORE : &str = stringify! {
53 /// " > blockquote\r\n"
54 /// ^-- it will always point here (must not be modified by rules)
55 /// # };
56 pub line_start: usize,
57
58 /// `line_end` is first newline character after the line,
59 /// or position after string length if there aren't any newlines left.
60 ///
61 /// # const IGNORE : &str = stringify! {
62 /// " > blockquote\r\n"
63 /// ^-- it will point here
64 /// # };
65 pub line_end: usize,
66
67 /// `first_nonspace` is the byte offset of the first non-space character in
68 /// the current line.
69 ///
70 /// # const IGNORE : &str = stringify! {
71 /// " > blockquote\r\n"
72 /// ^-- it will point here when paragraph is parsed
73 /// ^----- it is initially pointed here
74 /// # };
75 ///
76 /// It will be modified by rules (list and blockquote), chars before it
77 /// must be treated as whitespaces.
78 ///
79 pub first_nonspace: usize,
80
81 /// `indent_nonspace` is the indent (amount of virtual spaces from start)
82 /// of first non-space character in the current line, taking into account
83 /// tab expansion.
84 ///
85 /// For example, in case of ` \t foo`, indent is 5 (tab ends at multiple of 4,
86 /// then one space after it). Only tabs and spaces are counted for it,
87 /// so no funny unicode business (if cmark supported unicode spaces, they'd
88 /// be counted as 1 each regardless of utf8 width).
89 ///
90 /// You should compare `indent_nonspace` with `state.blkindent` when determining
91 /// real indent after taking into account lists.
92 ///
93 /// Most block rules in commonmark are indented 0..=3, and >=4 is code block.
94 /// Special value of ident_nonspace=-1 is used by this library as a sign
95 /// that this rule can only be a paragraph continuation (used in blockquotes),
96 /// so you must take into account that any math can end up negative.
97 ///
98 pub indent_nonspace: i32,
99}
100
101impl<'a, 'b> BlockState<'a, 'b> {
102 pub fn new(src: &'b str, md: &'a MarkdownIt, root_ext: &'b mut RootExtSet, node: Node) -> Self {
103 let mut result = Self {
104 src,
105 md,
106 root_ext,
107 node,
108 line_offsets: Vec::new(),
109 blk_indent: 0,
110 line: 0,
111 line_max: 0,
112 tight: false,
113 list_indent: None,
114 level: 0,
115 };
116
117 result.generate_caches();
118 result
119 }
120
121 fn generate_caches(&mut self) {
122 // Create caches
123 // Generate markers.
124 let mut chars = self.src.chars().peekable();
125 let mut indent_found = false;
126 let mut indent = 0;
127 let mut offset = 0;
128 let mut start = 0;
129 let mut pos = 0;
130
131 loop {
132 match chars.next() {
133 Some(ch @ (' ' | '\t')) if !indent_found => {
134 indent += 1;
135 offset += if ch == '\t' { 4 - offset % 4 } else { 1 };
136 pos += 1;
137 }
138 ch @ (Some('\n' | '\r') | None) => {
139 self.line_offsets.push(LineOffset {
140 line_start: start,
141 line_end: pos,
142 first_nonspace: start + indent,
143 indent_nonspace: offset,
144 });
145
146 if ch == Some('\r') && chars.peek() == Some(&'\n') {
147 // treat CR+LF as one linebreak
148 chars.next();
149 pos += 1;
150 }
151
152 indent_found = false;
153 indent = 0;
154 offset = 0;
155 start = pos + 1;
156 pos += 1;
157
158 if ch.is_none() || chars.peek().is_none() {
159 break;
160 }
161 }
162 Some(ch) => {
163 indent_found = true;
164 pos += ch.len_utf8();
165 }
166 }
167 }
168
169 self.line_max = self.line_offsets.len();
170 }
171
172 #[must_use]
173 pub fn test_rules_at_line(&mut self) -> bool {
174 for rule in self.md.block.ruler.iter() {
175 if rule.0(self).is_some() {
176 return true;
177 }
178 }
179 false
180 }
181
182 #[must_use]
183 #[inline]
184 pub fn is_empty(&self, line: usize) -> bool {
185 if let Some(offsets) = self.line_offsets.get(line) {
186 offsets.first_nonspace >= offsets.line_end
187 } else {
188 false
189 }
190 }
191
192 pub fn skip_empty_lines(&self, from: usize) -> usize {
193 let mut line = from;
194 while line != self.line_max && self.is_empty(line) {
195 line += 1;
196 }
197 line
198 }
199
200 /// return line indent of specific line, taking into account blockquotes and lists;
201 /// it may be negative if a text has less indentation than current list item
202 #[must_use]
203 #[inline]
204 pub fn line_indent(&self, line: usize) -> i32 {
205 if line < self.line_max {
206 self.line_offsets[line].indent_nonspace - self.blk_indent as i32
207 } else {
208 0
209 }
210 }
211
212 /// return a single line, trimming initial spaces
213 #[must_use]
214 #[inline]
215 pub fn get_line(&self, line: usize) -> &str {
216 if line < self.line_max {
217 let pos = self.line_offsets[line].first_nonspace;
218 let max = self.line_offsets[line].line_end;
219 &self.src[pos..max]
220 } else {
221 ""
222 }
223 }
224
225 /// Cut a range of lines begin..end (not including end) from the source without preceding indent.
226 /// Returns a string (lines) plus a mapping (start of each line in result -> start of each line in source).
227 pub fn get_lines(&self, begin: usize, end: usize, indent: usize, keep_last_lf: bool) -> (String, Vec<(usize, usize)>) {
228 debug_assert!(begin <= end);
229
230 let mut line = begin;
231 let mut result = String::new();
232 let mut mapping = Vec::new();
233
234 while line < end {
235 let offsets = &self.line_offsets[line];
236 let last = offsets.line_end;
237 let add_last_lf = line + 1 < end || keep_last_lf;
238
239 let (num_spaces, first) = calc_right_whitespace_with_tabstops(
240 &self.src[offsets.line_start..offsets.first_nonspace],
241 offsets.indent_nonspace - indent as i32
242 );
243
244 mapping.push(( result.len(), offsets.line_start+first ));
245 result += &" ".repeat(num_spaces);
246 result += &self.src[offsets.line_start+first..last];
247 if add_last_lf { result.push('\n'); }
248 line += 1;
249 }
250
251 ( result, mapping )
252 }
253
254 #[must_use]
255 #[inline]
256 pub fn get_map(&self, start_line: usize, end_line: usize) -> Option<SourcePos> {
257 debug_assert!(start_line <= end_line);
258
259 Some(SourcePos::new(
260 self.line_offsets[start_line].first_nonspace,
261 self.line_offsets[end_line].line_end
262 ))
263 }
264
265 #[must_use]
266 #[inline]
267 pub fn get_map_from_offsets(&self, start_pos: usize, end_pos: usize) -> Option<SourcePos> {
268 debug_assert!(start_pos <= end_pos);
269
270 Some(SourcePos::new(start_pos, end_pos))
271 }
272}