marco_core/parser/inlines/
mod.rs1pub mod shared;
10
11pub mod cm_autolink_parser;
13pub mod cm_backslash_escape_parser;
14pub mod cm_code_span_parser;
15pub mod cm_emphasis_parser;
16pub mod cm_entity_reference_parser;
17pub mod cm_image_parser;
18pub mod cm_inline_html_parser;
19pub mod cm_line_breaks_parser;
20pub mod cm_link_parser;
21pub mod cm_reference_link_parser;
22pub mod cm_strong_emphasis_parser;
23pub mod cm_strong_parser;
24pub mod gfm_autolink_literal_parser;
25pub mod gfm_footnote_reference_parser;
26pub mod gfm_strikethrough_parser;
27pub mod marco_dash_strikethrough_parser;
28pub mod marco_emoji_shortcode_parser;
29pub mod marco_inline_footnote_parser;
30pub mod marco_mark_parser;
31pub mod marco_platform_mentions_parser;
32pub mod marco_subscript_arrow_parser;
33pub mod marco_subscript_parser;
34pub mod marco_superscript_parser;
35pub mod marco_task_checkbox_inline_parser;
36pub mod math_display_parser;
37pub mod math_inline_parser;
38pub mod text_parser;
39
40pub use cm_autolink_parser::parse_autolink;
42pub use cm_backslash_escape_parser::parse_backslash_escape;
43pub use cm_code_span_parser::parse_code_span;
44pub use cm_emphasis_parser::parse_emphasis;
45pub use cm_entity_reference_parser::parse_entity_reference;
46pub use cm_image_parser::parse_image;
47pub use cm_inline_html_parser::parse_inline_html;
48pub use cm_line_breaks_parser::{parse_hard_line_break, parse_soft_line_break};
49pub use cm_link_parser::parse_link;
50pub use cm_reference_link_parser::parse_reference_link;
51pub use cm_strong_emphasis_parser::parse_strong_emphasis;
52pub use cm_strong_parser::parse_strong;
53pub use gfm_autolink_literal_parser::parse_gfm_autolink_literal;
54pub use gfm_footnote_reference_parser::parse_footnote_reference;
55pub use gfm_strikethrough_parser::parse_strikethrough;
56pub use marco_dash_strikethrough_parser::parse_dash_strikethrough;
57pub use marco_emoji_shortcode_parser::parse_emoji_shortcode;
58pub use marco_inline_footnote_parser::parse_inline_footnote;
59pub use marco_mark_parser::parse_mark;
60pub use marco_platform_mentions_parser::parse_platform_mention;
61pub use marco_subscript_arrow_parser::parse_subscript_arrow;
62pub use marco_subscript_parser::parse_subscript;
63pub use marco_superscript_parser::parse_superscript;
64pub use marco_task_checkbox_inline_parser::parse_task_checkbox_inline;
65pub use math_display_parser::parse_display_math;
66pub use math_inline_parser::parse_inline_math;
67pub use text_parser::{parse_special_as_text, parse_text};
68
69use super::ast::{Node, NodeKind};
70use nom::bytes::complete::take;
71use shared::{opt_span, GrammarSpan};
72
73pub fn parse_inlines_from_span(span: GrammarSpan) -> Result<Vec<Node>, Box<dyn std::error::Error>> {
77 log::debug!(
78 "Parsing inline elements in span at line {}: {:?}",
79 span.location_line(),
80 span.fragment()
81 );
82
83 let mut nodes = Vec::with_capacity(8);
84 let mut remaining = span;
85
86 const MAX_ITERATIONS: usize = 1000;
88 let mut iteration_count = 0;
89 let mut last_offset = 0;
90
91 while !remaining.fragment().is_empty() {
92 iteration_count += 1;
93 if iteration_count > MAX_ITERATIONS {
94 log::error!("Inline parser exceeded MAX_ITERATIONS ({})", MAX_ITERATIONS);
95 break;
96 }
97
98 let start_pos = remaining.location_offset();
99
100 if start_pos == last_offset && iteration_count > 1 {
102 log::error!(
103 "Inline parser not making progress at offset {}, forcing skip",
104 start_pos
105 );
106 let skip = remaining
108 .fragment()
109 .chars()
110 .next()
111 .map(|c| c.len_utf8())
112 .unwrap_or(1);
113 if let Ok((rest, _)) = take::<_, _, nom::error::Error<_>>(skip)(remaining) {
114 remaining = rest;
115 last_offset = remaining.location_offset();
116 continue;
117 } else {
118 break;
119 }
120 }
121 last_offset = start_pos;
122
123 let first_byte = remaining.fragment().as_bytes()[0];
136 let is_non_special_ascii = first_byte < 0x80
137 && !matches!(
138 first_byte,
139 b'*' | b'_'
140 | b'`'
141 | b'['
142 | b'<'
143 | b'!'
144 | b'&'
145 | b'\n'
146 | b'\\'
147 | b'$'
148 | b'^'
149 | b'~'
150 | b'='
151 | b'-'
152 );
153 let safe_to_fast_path = is_non_special_ascii
155 && if first_byte == b' ' {
156 let frag = remaining.fragment().as_bytes();
157 let sp = frag.iter().take_while(|&&b| b == b' ').count();
158 !(sp >= 2 && frag.get(sp) == Some(&b'\n'))
159 } else {
160 true
161 };
162 if safe_to_fast_path {
163 if let Ok((rest, node)) = parse_text(remaining) {
164 nodes.push(node);
165 remaining = rest;
166 continue;
167 }
168 }
172
173 if let Ok((rest, node)) = parse_code_span(remaining) {
175 nodes.push(node);
176 remaining = rest;
177 continue;
178 }
179
180 if crate::parser::shared::parse_math_enabled() {
182 if let Ok((rest, node)) = parse_display_math(remaining) {
183 nodes.push(node);
184 remaining = rest;
185 continue;
186 }
187
188 if let Ok((rest, node)) = parse_inline_math(remaining) {
190 nodes.push(node);
191 remaining = rest;
192 continue;
193 }
194 }
195
196 if let Ok((rest, node)) = parse_backslash_escape(remaining) {
198 nodes.push(node);
199 remaining = rest;
200 continue;
201 }
202
203 if let Ok((rest, node)) = parse_strikethrough(remaining) {
206 nodes.push(node);
207 remaining = rest;
208 continue;
209 }
210
211 if let Ok((rest, node)) = parse_dash_strikethrough(remaining) {
212 nodes.push(node);
213 remaining = rest;
214 continue;
215 }
216
217 if let Ok((rest, node)) = parse_mark(remaining) {
218 nodes.push(node);
219 remaining = rest;
220 continue;
221 }
222
223 if let Some(run_len) = intraword_underscore_run_len(&nodes, remaining.fragment()) {
232 if let Ok((rest, consumed)) = take::<_, _, nom::error::Error<_>>(run_len)(remaining) {
233 nodes.push(Node {
234 kind: NodeKind::Text("_".repeat(run_len)),
235 span: opt_span(consumed),
236 children: Vec::new(),
237 });
238 remaining = rest;
239 continue;
240 }
241 }
242
243 if let Ok((rest, node)) = parse_strong_emphasis(remaining) {
247 nodes.push(node);
248 remaining = rest;
249 continue;
250 }
251
252 if let Ok((rest, node)) = parse_strong(remaining) {
254 nodes.push(node);
255 remaining = rest;
256 continue;
257 }
258
259 if let Ok((rest, node)) = parse_emphasis(remaining) {
261 nodes.push(node);
262 remaining = rest;
263 continue;
264 }
265
266 if let Ok((rest, (ref_node, def_node))) = parse_inline_footnote(remaining) {
269 nodes.push(ref_node);
270 nodes.push(def_node);
271 remaining = rest;
272 continue;
273 }
274
275 if let Ok((rest, node)) = parse_superscript(remaining) {
276 nodes.push(node);
277 remaining = rest;
278 continue;
279 }
280
281 if let Ok((rest, node)) = parse_subscript_arrow(remaining) {
282 nodes.push(node);
283 remaining = rest;
284 continue;
285 }
286
287 if let Ok((rest, node)) = parse_subscript(remaining) {
288 nodes.push(node);
289 remaining = rest;
290 continue;
291 }
292
293 if let Ok((rest, node)) = parse_gfm_autolink_literal(remaining) {
295 nodes.push(node);
296 remaining = rest;
297 continue;
298 }
299
300 if let Ok((rest, node)) = parse_autolink(remaining) {
302 nodes.push(node);
303 remaining = rest;
304 continue;
305 }
306
307 if let Ok((rest, node)) = parse_footnote_reference(remaining) {
310 nodes.push(node);
311 remaining = rest;
312 continue;
313 }
314
315 if is_task_checkbox_inline_start_boundary_ok(&nodes, remaining.fragment()) {
318 if let Ok((rest, node)) = parse_task_checkbox_inline(remaining) {
319 nodes.push(node);
320 remaining = rest;
321 continue;
322 }
323 }
324
325 if let Ok((rest, node)) = parse_image(remaining) {
327 nodes.push(node);
328 remaining = rest;
329 continue;
330 }
331
332 if let Ok((rest, node)) = parse_link(remaining) {
334 nodes.push(node);
335 remaining = rest;
336 continue;
337 }
338
339 if let Ok((rest, node)) = parse_reference_link(remaining) {
341 nodes.push(node);
342 remaining = rest;
343 continue;
344 }
345
346 if let Ok((rest, node)) = parse_inline_html(remaining) {
348 nodes.push(node);
349 remaining = rest;
350 continue;
351 }
352
353 if let Ok((rest, node)) = parse_hard_line_break(remaining) {
355 log::debug!(
356 "Parsed hard line break at offset {}",
357 remaining.location_offset()
358 );
359 nodes.push(node);
360 remaining = rest;
361 continue;
362 }
363
364 if let Ok((rest, node)) = parse_soft_line_break(remaining) {
366 nodes.push(node);
367 remaining = rest;
368 continue;
369 }
370
371 if let Ok((rest, node)) = parse_entity_reference(remaining) {
373 nodes.push(node);
374 remaining = rest;
375 continue;
376 }
377
378 if let Ok((rest, node)) = parse_emoji_shortcode(remaining) {
380 nodes.push(node);
381 remaining = rest;
382 continue;
383 }
384
385 if let Ok((rest, node)) = parse_platform_mention(remaining) {
387 nodes.push(node);
388 remaining = rest;
389 continue;
390 }
391
392 if let Ok((rest, node)) = parse_text(remaining) {
394 nodes.push(node);
395 remaining = rest;
396 continue;
397 }
398
399 if let Ok((rest, node)) = parse_special_as_text(remaining) {
401 nodes.push(node);
402 remaining = rest;
403 continue;
404 }
405
406 log::error!(
409 "Inline parser unable to make progress at offset {}",
410 start_pos
411 );
412 break;
413 }
414
415 log::debug!("Parsed {} inline nodes", nodes.len());
416 Ok(nodes)
417}
418
419fn intraword_underscore_run_len(nodes: &[Node], fragment: &str) -> Option<usize> {
420 if !fragment.starts_with('_') {
421 return None;
422 }
423
424 let prev = last_emitted_char(nodes)?;
425 if !prev.is_alphanumeric() {
426 return None;
427 }
428
429 let run_len = fragment.chars().take_while(|&c| c == '_').count();
430 let after = fragment.chars().nth(run_len)?;
431 if !after.is_alphanumeric() {
432 return None;
433 }
434
435 Some(run_len)
436}
437
438fn is_task_checkbox_inline_start_boundary_ok(nodes: &[Node], fragment: &str) -> bool {
439 if !fragment.starts_with('[') {
440 return false;
441 }
442
443 match last_emitted_char(nodes) {
446 None => true,
447 Some(prev) => !(prev.is_alphanumeric() || prev == '_'),
448 }
449}
450
451fn last_emitted_char(nodes: &[Node]) -> Option<char> {
452 nodes.iter().rev().find_map(last_char_in_node)
453}
454
455fn last_char_in_node(node: &Node) -> Option<char> {
456 match &node.kind {
457 NodeKind::Text(t) => t.chars().last(),
458 _ => node.children.iter().rev().find_map(last_char_in_node),
460 }
461}
462
463pub fn parse_inlines(text: &str) -> Result<Vec<Node>, Box<dyn std::error::Error>> {
467 parse_inlines_from_span(GrammarSpan::new(text))
468}
469
470#[cfg(test)]
471mod tests {
472 use super::*;
473
474 #[test]
475 fn smoke_test_triple_delimiter_parses_as_single_node() {
476 let nodes = parse_inlines("***hi***").expect("inline parse failed");
477 assert_eq!(nodes.len(), 1);
478 assert!(matches!(
479 nodes[0].kind,
480 crate::parser::ast::NodeKind::StrongEmphasis
481 ));
482 }
483
484 #[test]
485 fn smoke_test_extension_inlines_parse_mid_line() {
486 let nodes = parse_inlines(
487 "This is ^sup^ and ~sub~ and ˅sub2˅ and ==mark== and ~~del~~ and --del2--.",
488 )
489 .expect("inline parse failed");
490
491 use crate::parser::ast::NodeKind;
492
493 assert!(nodes
494 .iter()
495 .any(|n| matches!(n.kind, NodeKind::Superscript)));
496 assert!(nodes.iter().any(|n| matches!(n.kind, NodeKind::Subscript)));
497 assert!(nodes.iter().any(|n| matches!(n.kind, NodeKind::Mark)));
498 assert!(nodes
499 .iter()
500 .any(|n| matches!(n.kind, NodeKind::Strikethrough)));
501 }
502}