1use crate::options::{Dialect, ParserOptions};
13use crate::syntax::SyntaxKind;
14use rowan::GreenNodeBuilder;
15
16use super::inline_ir::{
17 BracketPlan, ConstructDispo, ConstructPlan, DelimChar, EmphasisKind, EmphasisPlan,
18};
19
20use super::bookdown::{
22 try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
23};
24use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
25use super::citations::{
26 emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
27 try_parse_bracketed_citation,
28};
29use super::code_spans::{emit_code_span, try_parse_code_span};
30use super::emoji::{emit_emoji, try_parse_emoji};
31use super::escapes::{EscapeType, emit_escape, try_parse_escape};
32use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
33use super::inline_footnotes::{
34 emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
35 try_parse_inline_footnote,
36};
37use super::inline_html::{emit_inline_html, try_parse_inline_html};
38use super::latex::{parse_latex_command, try_parse_latex_command};
39use super::links::{
40 LinkScanContext, emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link,
41 emit_reference_image, emit_reference_link, emit_unresolved_reference, try_parse_autolink,
42 try_parse_bare_uri, try_parse_inline_image, try_parse_inline_link, try_parse_reference_image,
43 try_parse_reference_link,
44};
45use super::mark::{emit_mark, try_parse_mark};
46use super::math::{
47 emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
48 emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
49 emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
50 try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
51 try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
52 try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
53};
54use super::native_spans::{emit_native_span, try_parse_native_span};
55use super::raw_inline::is_raw_inline;
56use super::shortcodes::{emit_shortcode, try_parse_shortcode};
57use super::strikeout::{emit_strikeout, try_parse_strikeout};
58use super::subscript::{emit_subscript, try_parse_subscript};
59use super::superscript::{emit_superscript, try_parse_superscript};
60
61pub fn parse_inline_text_recursive(
76 builder: &mut GreenNodeBuilder,
77 text: &str,
78 config: &ParserOptions,
79) {
80 log::trace!(
81 "Recursive inline parsing: {:?} ({} bytes)",
82 &text[..text.len().min(40)],
83 text.len()
84 );
85
86 let mask = structural_byte_mask(config);
87 if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
88 log::trace!("Recursive inline parsing complete (plain-text fast path)");
89 return;
90 }
91
92 let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
93 parse_inline_range_impl(
94 text,
95 0,
96 text.len(),
97 config,
98 builder,
99 false,
100 &plans.emphasis,
101 &plans.brackets,
102 &plans.constructs,
103 false,
104 &mask,
105 );
106
107 log::trace!("Recursive inline parsing complete");
108}
109
110pub fn parse_inline_text(
125 builder: &mut GreenNodeBuilder,
126 text: &str,
127 config: &ParserOptions,
128 suppress_inner_links: bool,
129) {
130 log::trace!(
131 "Parsing inline text (nested in link): {:?} ({} bytes)",
132 &text[..text.len().min(40)],
133 text.len()
134 );
135
136 let mask = structural_byte_mask(config);
137 if try_emit_plain_text_fast_path_with_mask(builder, text, &mask) {
138 return;
139 }
140
141 let plans = super::inline_ir::build_full_plans(text, 0, text.len(), config);
142 parse_inline_range_impl(
143 text,
144 0,
145 text.len(),
146 config,
147 builder,
148 true,
149 &plans.emphasis,
150 &plans.brackets,
151 &plans.constructs,
152 suppress_inner_links,
153 &mask,
154 );
155}
156
157fn try_emit_plain_text_fast_path_with_mask(
171 builder: &mut GreenNodeBuilder,
172 text: &str,
173 mask: &[bool; 256],
174) -> bool {
175 if text.is_empty() {
176 return false;
177 }
178 for &b in text.as_bytes() {
179 if mask[b as usize] {
180 return false;
181 }
182 }
183 builder.token(SyntaxKind::TEXT.into(), text);
184 true
185}
186
187fn structural_byte_mask(config: &ParserOptions) -> [bool; 256] {
192 let mut mask = [false; 256];
193 let exts = &config.extensions;
194 let pandoc = config.dialect == Dialect::Pandoc;
195
196 mask[b'\n' as usize] = true;
202 mask[b'\r' as usize] = true;
203 mask[b'\\' as usize] = true;
204 mask[b'`' as usize] = true;
205 mask[b'*' as usize] = true;
206 mask[b'_' as usize] = true;
207
208 if exts.inline_links
214 || exts.reference_links
215 || exts.inline_images
216 || exts.bracketed_spans
217 || exts.footnotes
218 || exts.citations
219 {
220 mask[b'[' as usize] = true;
221 mask[b']' as usize] = true;
222 }
223 if exts.inline_images || exts.reference_links {
224 mask[b'!' as usize] = true;
225 }
226
227 if exts.autolinks || exts.raw_html || exts.native_spans {
229 mask[b'<' as usize] = true;
230 }
231
232 if exts.inline_footnotes || exts.superscript {
235 mask[b'^' as usize] = true;
236 }
237
238 if exts.citations || exts.quarto_crossrefs {
245 mask[b'@' as usize] = true;
246 if pandoc {
247 mask[b'-' as usize] = true;
248 }
249 }
250
251 if exts.tex_math_dollars || exts.tex_math_gfm {
253 mask[b'$' as usize] = true;
254 }
255
256 if exts.subscript || exts.strikeout {
258 mask[b'~' as usize] = true;
259 }
260
261 if exts.mark {
262 mask[b'=' as usize] = true;
263 }
264 if exts.emoji {
265 mask[b':' as usize] = true;
266 }
267 if exts.bookdown_references {
268 mask[b'(' as usize] = true;
269 }
270 mask[b'{' as usize] = true;
274
275 if exts.autolink_bare_uris {
284 for b in b'a'..=b'z' {
285 mask[b as usize] = true;
286 }
287 for b in b'A'..=b'Z' {
288 mask[b as usize] = true;
289 }
290 }
291
292 mask
293}
294
295fn is_emoji_boundary(text: &str, pos: usize) -> bool {
296 if pos > 0 {
297 let prev = text.as_bytes()[pos - 1] as char;
298 if prev.is_ascii_alphanumeric() || prev == '_' {
299 return false;
300 }
301 }
302 true
303}
304
305#[inline]
306fn advance_char_boundary(text: &str, pos: usize, end: usize) -> usize {
307 if pos >= end || pos >= text.len() {
308 return pos;
309 }
310 let ch_len = text[pos..]
311 .chars()
312 .next()
313 .map_or(1, std::primitive::char::len_utf8);
314 (pos + ch_len).min(end)
315}
316
317#[allow(clippy::too_many_arguments)]
318fn parse_inline_range_impl(
319 text: &str,
320 start: usize,
321 end: usize,
322 config: &ParserOptions,
323 builder: &mut GreenNodeBuilder,
324 nested_in_link: bool,
325 plan: &EmphasisPlan,
326 bracket_plan: &BracketPlan,
327 construct_plan: &ConstructPlan,
328 suppress_inner_links: bool,
329 mask: &[bool; 256],
330) {
331 log::trace!(
332 "parse_inline_range: start={}, end={}, text={:?}",
333 start,
334 end,
335 &text[start..end]
336 );
337 let mut pos = start;
338 let mut text_start = start;
339 let bytes = text.as_bytes();
340
341 while pos < end {
342 if !mask[bytes[pos] as usize] {
349 let mut next = pos + 1;
350 while next < end && !mask[bytes[next] as usize] {
351 next += 1;
352 }
353 pos = next;
354 if pos >= end {
355 break;
356 }
357 }
358 if let Some(dispo) = construct_plan.lookup(pos) {
367 match *dispo {
368 ConstructDispo::InlineFootnote { end: dispo_end } => {
369 if dispo_end <= end
370 && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
371 && pos + len == dispo_end
372 {
373 if pos > text_start {
374 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
375 }
376 log::trace!("IR: matched inline footnote at pos {}", pos);
377 emit_inline_footnote(builder, content, config);
378 pos += len;
379 text_start = pos;
380 continue;
381 }
382 }
383 ConstructDispo::NativeSpan { end: dispo_end } => {
384 if dispo_end <= end
385 && let Some((len, content, _attributes)) =
386 try_parse_native_span(&text[pos..])
387 && pos + len == dispo_end
388 {
389 if pos > text_start {
390 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
391 }
392 log::trace!("IR: matched native span at pos {}", pos);
393 emit_native_span(builder, &text[pos..pos + len], content, config);
394 pos += len;
395 text_start = pos;
396 continue;
397 }
398 }
399 ConstructDispo::FootnoteReference { end: dispo_end } => {
400 if dispo_end <= end
401 && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
402 && pos + len == dispo_end
403 {
404 if pos > text_start {
405 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
406 }
407 log::trace!("IR: matched footnote reference at pos {}", pos);
408 emit_footnote_reference(builder, &id);
409 pos += len;
410 text_start = pos;
411 continue;
412 }
413 }
414 ConstructDispo::BracketedCitation { end: dispo_end } => {
415 if dispo_end <= end
416 && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
417 && pos + len == dispo_end
418 {
419 if pos > text_start {
420 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
421 }
422 log::trace!("IR: matched bracketed citation at pos {}", pos);
423 emit_bracketed_citation(builder, content);
424 pos += len;
425 text_start = pos;
426 continue;
427 }
428 }
429 ConstructDispo::BareCitation { end: dispo_end } => {
430 if dispo_end <= end
431 && let Some((len, key, has_suppress)) =
432 try_parse_bare_citation(&text[pos..])
433 && pos + len == dispo_end
434 {
435 let is_crossref = config.extensions.quarto_crossrefs
436 && super::citations::is_quarto_crossref_key(key);
437 if is_crossref || config.extensions.citations {
438 if pos > text_start {
439 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
440 }
441 if is_crossref {
442 log::trace!("IR: matched Quarto crossref at pos {}: {}", pos, key);
443 super::citations::emit_crossref(builder, key, has_suppress);
444 } else {
445 log::trace!("IR: matched bare citation at pos {}: {}", pos, key);
446 emit_bare_citation(builder, key, has_suppress);
447 }
448 pos += len;
449 text_start = pos;
450 continue;
451 }
452 }
453 }
454 ConstructDispo::BracketedSpan { end: dispo_end } => {
455 if dispo_end <= end
456 && let Some((len, content, attrs)) = try_parse_bracketed_span(&text[pos..])
457 && pos + len == dispo_end
458 {
459 if pos > text_start {
460 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
461 }
462 log::trace!("IR: matched bracketed span at pos {}", pos);
463 emit_bracketed_span(builder, &content, &attrs, config);
464 pos += len;
465 text_start = pos;
466 continue;
467 }
468 }
469 }
470 }
471
472 if let Some(super::inline_ir::BracketDispo::UnresolvedReference {
501 is_image,
502 text_start: ref_text_start,
503 text_end: ref_text_end,
504 end: ref_end,
505 }) = bracket_plan.lookup(pos)
506 {
507 let is_image = *is_image;
508 let dispo_suffix_end = *ref_end;
509 let suppress = suppress_inner_links && !is_image;
510 if !suppress {
511 let ctx = LinkScanContext::from_options(config);
512 let is_commonmark = config.dialect == Dialect::CommonMark;
513 if is_image {
514 if config.extensions.inline_images
515 && let Some((len, alt_text, dest, attributes)) =
516 try_parse_inline_image(&text[pos..], ctx)
517 && pos + len >= dispo_suffix_end
518 && pos + len <= end
519 {
520 if pos > text_start {
521 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
522 }
523 log::trace!(
524 "IR: dispatcher overrode UnresolvedReference with inline image at pos {}",
525 pos
526 );
527 emit_inline_image(
528 builder,
529 &text[pos..pos + len],
530 alt_text,
531 dest,
532 attributes,
533 config,
534 );
535 pos += len;
536 text_start = pos;
537 continue;
538 }
539 } else if config.extensions.inline_links
540 && let Some((len, link_text, dest, attributes)) =
541 try_parse_inline_link(&text[pos..], is_commonmark, ctx)
542 && pos + len >= dispo_suffix_end
543 && pos + len <= end
544 {
545 if pos > text_start {
546 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
547 }
548 log::trace!(
549 "IR: dispatcher overrode UnresolvedReference with inline link at pos {}",
550 pos
551 );
552 emit_inline_link(
553 builder,
554 &text[pos..pos + len],
555 link_text,
556 dest,
557 attributes,
558 config,
559 );
560 pos += len;
561 text_start = pos;
562 continue;
563 }
564 }
565
566 let inner_text = &text[*ref_text_start..*ref_text_end];
568 let suffix_start = *ref_text_end + 1;
569 let label_suffix = if suffix_start < *ref_end {
570 Some(&text[suffix_start..*ref_end])
571 } else {
572 None
573 };
574 if pos > text_start {
575 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
576 }
577 log::trace!(
578 "IR: unresolved Pandoc reference shape at pos {}..{}",
579 pos,
580 ref_end
581 );
582 emit_unresolved_reference(builder, is_image, inner_text, label_suffix, config);
583 pos = *ref_end;
584 text_start = pos;
585 continue;
586 }
587
588 if let Some(super::inline_ir::BracketDispo::Open {
589 is_image,
590 suffix_end,
591 ..
592 }) = bracket_plan.lookup(pos)
593 {
594 let is_image = *is_image;
595 let dispo_suffix_end = *suffix_end;
596 let suppress = suppress_inner_links && !is_image;
597 if !suppress {
598 let ctx = LinkScanContext::from_options(config);
599 let allow_shortcut = config.extensions.shortcut_reference_links;
600 let is_commonmark = config.dialect == Dialect::CommonMark;
601 if is_image {
602 if config.extensions.inline_images
603 && let Some((len, alt_text, dest, attributes)) =
604 try_parse_inline_image(&text[pos..], ctx)
605 && pos + len >= dispo_suffix_end
606 && pos + len <= end
607 {
608 if pos > text_start {
609 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
610 }
611 log::trace!("IR: matched inline image at pos {}", pos);
612 emit_inline_image(
613 builder,
614 &text[pos..pos + len],
615 alt_text,
616 dest,
617 attributes,
618 config,
619 );
620 pos += len;
621 text_start = pos;
622 continue;
623 }
624 if config.extensions.reference_links
625 && let Some((len, alt_text, reference, is_shortcut)) =
626 try_parse_reference_image(&text[pos..], allow_shortcut)
627 && pos + len == dispo_suffix_end
628 && pos + len <= end
629 {
630 if pos > text_start {
631 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
632 }
633 log::trace!("IR: matched reference image at pos {}", pos);
634 emit_reference_image(builder, alt_text, &reference, is_shortcut, config);
635 pos += len;
636 text_start = pos;
637 continue;
638 }
639 } else {
640 if config.extensions.inline_links
641 && let Some((len, link_text, dest, attributes)) =
642 try_parse_inline_link(&text[pos..], is_commonmark, ctx)
643 && pos + len >= dispo_suffix_end
644 && pos + len <= end
645 {
646 if pos > text_start {
647 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
648 }
649 log::trace!("IR: matched inline link at pos {}", pos);
650 emit_inline_link(
651 builder,
652 &text[pos..pos + len],
653 link_text,
654 dest,
655 attributes,
656 config,
657 );
658 pos += len;
659 text_start = pos;
660 continue;
661 }
662 if config.extensions.reference_links
663 && let Some((len, link_text, reference, is_shortcut)) =
664 try_parse_reference_link(
665 &text[pos..],
666 allow_shortcut,
667 config.extensions.inline_links,
668 ctx,
669 )
670 && pos + len == dispo_suffix_end
671 && pos + len <= end
672 {
673 if pos > text_start {
674 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
675 }
676 log::trace!("IR: matched reference link at pos {}", pos);
677 emit_reference_link(builder, link_text, &reference, is_shortcut, config);
678 pos += len;
679 text_start = pos;
680 continue;
681 }
682 }
683 }
684 }
685
686 let byte = text.as_bytes()[pos];
687
688 if byte == b'\\' {
690 if config.extensions.tex_math_double_backslash {
692 if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
693 {
694 if pos > text_start {
695 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
696 }
697 log::trace!("Matched double backslash display math at pos {}", pos);
698 emit_double_backslash_display_math(builder, content);
699 pos += len;
700 text_start = pos;
701 continue;
702 }
703
704 if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
706 if pos > text_start {
707 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
708 }
709 log::trace!("Matched double backslash inline math at pos {}", pos);
710 emit_double_backslash_inline_math(builder, content);
711 pos += len;
712 text_start = pos;
713 continue;
714 }
715 }
716
717 if config.extensions.tex_math_single_backslash {
719 if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
720 {
721 if pos > text_start {
722 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
723 }
724 log::trace!("Matched single backslash display math at pos {}", pos);
725 emit_single_backslash_display_math(builder, content);
726 pos += len;
727 text_start = pos;
728 continue;
729 }
730
731 if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
733 if pos > text_start {
734 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
735 }
736 log::trace!("Matched single backslash inline math at pos {}", pos);
737 emit_single_backslash_inline_math(builder, content);
738 pos += len;
739 text_start = pos;
740 continue;
741 }
742 }
743
744 if config.extensions.raw_tex
746 && let Some((len, begin_marker, content, end_marker)) =
747 try_parse_math_environment(&text[pos..])
748 {
749 if pos > text_start {
750 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
751 }
752 log::trace!("Matched math environment at pos {}", pos);
753 emit_display_math_environment(builder, begin_marker, content, end_marker);
754 pos += len;
755 text_start = pos;
756 continue;
757 }
758
759 if config.extensions.bookdown_references
761 && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
762 {
763 if pos > text_start {
764 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
765 }
766 log::trace!("Matched bookdown reference at pos {}: {}", pos, label);
767 super::citations::emit_bookdown_crossref(builder, label);
768 pos += len;
769 text_start = pos;
770 continue;
771 }
772
773 if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
775 let escape_enabled = match escape_type {
776 EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
777 EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
778 EscapeType::Literal => {
779 const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!|~";
792 BASE_ESCAPABLE.contains(ch)
793 || config.extensions.all_symbols_escapable
794 || (config.dialect == crate::Dialect::CommonMark
795 && ch.is_ascii_punctuation())
796 }
797 };
798 if !escape_enabled {
799 pos = advance_char_boundary(text, pos, end);
802 continue;
803 }
804
805 if pos > text_start {
807 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
808 }
809
810 log::trace!("Matched escape at pos {}: \\{}", pos, ch);
811 emit_escape(builder, ch, escape_type);
812 pos += len;
813 text_start = pos;
814 continue;
815 }
816
817 if config.extensions.raw_tex
819 && let Some(len) = try_parse_latex_command(&text[pos..])
820 {
821 if pos > text_start {
822 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
823 }
824 log::trace!("Matched LaTeX command at pos {}", pos);
825 parse_latex_command(builder, &text[pos..], len);
826 pos += len;
827 text_start = pos;
828 continue;
829 }
830 }
831
832 if byte == b'{'
834 && pos + 1 < text.len()
835 && text.as_bytes()[pos + 1] == b'{'
836 && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
837 {
838 if pos > text_start {
839 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
840 }
841 log::trace!("Matched shortcode at pos {}: {}", pos, &name);
842 emit_shortcode(builder, &name, attrs);
843 pos += len;
844 text_start = pos;
845 continue;
846 }
847
848 if byte == b'`'
850 && let Some(m) = try_parse_inline_executable(
851 &text[pos..],
852 config.extensions.rmarkdown_inline_code,
853 config.extensions.quarto_inline_code,
854 )
855 {
856 if pos > text_start {
857 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
858 }
859 log::trace!("Matched inline executable code at pos {}", pos);
860 emit_inline_executable(builder, &m);
861 pos += m.total_len;
862 text_start = pos;
863 continue;
864 }
865
866 if byte == b'`' {
868 if let Some((len, content, backtick_count, attributes)) =
869 try_parse_code_span(&text[pos..])
870 {
871 if pos > text_start {
873 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
874 }
875
876 log::trace!(
877 "Matched code span at pos {}: {} backticks",
878 pos,
879 backtick_count
880 );
881
882 if let Some(ref attrs) = attributes
884 && config.extensions.raw_attribute
885 && let Some(format) = is_raw_inline(attrs)
886 {
887 use super::raw_inline::emit_raw_inline;
888 log::trace!("Matched raw inline span at pos {}: format={}", pos, format);
889 emit_raw_inline(builder, content, backtick_count, format);
890 } else if !config.extensions.inline_code_attributes && attributes.is_some() {
891 let code_span_len = backtick_count * 2 + content.len();
892 emit_code_span(builder, content, backtick_count, None);
893 pos += code_span_len;
894 text_start = pos;
895 continue;
896 } else {
897 emit_code_span(builder, content, backtick_count, attributes);
898 }
899
900 pos += len;
901 text_start = pos;
902 continue;
903 }
904
905 if config.dialect == Dialect::CommonMark {
914 let run_len = text[pos..].bytes().take_while(|&b| b == b'`').count();
915 pos += run_len;
916 continue;
917 }
918 }
919
920 if byte == b':'
922 && config.extensions.emoji
923 && is_emoji_boundary(text, pos)
924 && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
925 {
926 if pos > text_start {
927 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
928 }
929 log::trace!("Matched emoji at pos {}", pos);
930 emit_emoji(builder, &text[pos..pos + len]);
931 pos += len;
932 text_start = pos;
933 continue;
934 }
935
936 if byte == b'^'
941 && pos + 1 < text.len()
942 && text.as_bytes()[pos + 1] == b'['
943 && config.dialect == Dialect::CommonMark
944 && config.extensions.inline_footnotes
945 && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
946 {
947 if pos > text_start {
948 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
949 }
950 log::trace!("Matched inline footnote at pos {}", pos);
951 emit_inline_footnote(builder, content, config);
952 pos += len;
953 text_start = pos;
954 continue;
955 }
956
957 if byte == b'^'
959 && config.extensions.superscript
960 && let Some((len, content)) = try_parse_superscript(&text[pos..])
961 {
962 if pos > text_start {
963 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
964 }
965 log::trace!("Matched superscript at pos {}", pos);
966 emit_superscript(builder, content, config);
967 pos += len;
968 text_start = pos;
969 continue;
970 }
971
972 if byte == b'(' && config.extensions.bookdown_references {
974 if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
975 if pos > text_start {
976 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
977 }
978 log::trace!("Matched bookdown definition at pos {}: {}", pos, label);
979 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
980 pos += len;
981 text_start = pos;
982 continue;
983 }
984 if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
985 if pos > text_start {
986 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
987 }
988 log::trace!("Matched bookdown text reference at pos {}: {}", pos, label);
989 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
990 pos += len;
991 text_start = pos;
992 continue;
993 }
994 }
995
996 if byte == b'~'
1002 && config.extensions.strikeout
1003 && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1004 {
1005 if pos > text_start {
1006 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1007 }
1008 log::trace!("Matched strikeout at pos {}", pos);
1009 emit_strikeout(builder, content, config);
1010 pos += len;
1011 text_start = pos;
1012 continue;
1013 }
1014
1015 if byte == b'~'
1018 && config.extensions.subscript
1019 && let Some((len, content)) = try_parse_subscript(&text[pos..])
1020 {
1021 if pos > text_start {
1022 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1023 }
1024 log::trace!("Matched subscript at pos {}", pos);
1025 emit_subscript(builder, content, config);
1026 pos += len;
1027 text_start = pos;
1028 continue;
1029 }
1030
1031 if byte == b'='
1033 && config.extensions.mark
1034 && let Some((len, content)) = try_parse_mark(&text[pos..])
1035 {
1036 if pos > text_start {
1037 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1038 }
1039 log::trace!("Matched mark at pos {}", pos);
1040 emit_mark(builder, content, config);
1041 pos += len;
1042 text_start = pos;
1043 continue;
1044 }
1045
1046 if byte == b'$'
1048 && config.extensions.tex_math_gfm
1049 && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1050 {
1051 if pos > text_start {
1052 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1053 }
1054 log::trace!("Matched GFM inline math at pos {}", pos);
1055 emit_gfm_inline_math(builder, content);
1056 pos += len;
1057 text_start = pos;
1058 continue;
1059 }
1060
1061 if byte == b'$' && config.extensions.tex_math_dollars {
1063 if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1065 if pos > text_start {
1067 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1068 }
1069
1070 let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1071 log::trace!(
1072 "Matched display math at pos {}: {} dollars",
1073 pos,
1074 dollar_count
1075 );
1076
1077 let after_math = &text[pos + len..];
1083 let line_end = after_math.find('\n').unwrap_or(after_math.len());
1084 let line_segment = &after_math[..line_end];
1085 let attr_len = if config.extensions.quarto_crossrefs {
1086 use crate::parser::utils::attributes::try_parse_trailing_attributes;
1087 if let Some((_attr_block, _)) = try_parse_trailing_attributes(line_segment) {
1088 let trimmed_after = line_segment.trim_start();
1089 if let Some(open_brace_pos) = trimmed_after.find('{') {
1090 let ws_before_brace = line_segment.len() - trimmed_after.len();
1091 let attr_text_len = trimmed_after[open_brace_pos..]
1092 .find('}')
1093 .map(|close| close + 1)
1094 .unwrap_or(0);
1095 ws_before_brace + open_brace_pos + attr_text_len
1096 } else {
1097 0
1098 }
1099 } else {
1100 0
1101 }
1102 } else {
1103 0
1104 };
1105
1106 let total_len = len + attr_len;
1107 emit_display_math(builder, content, dollar_count);
1108
1109 if attr_len > 0 {
1111 use crate::parser::utils::attributes::{
1112 emit_attributes, try_parse_trailing_attributes,
1113 };
1114 let attr_text = &text[pos + len..pos + total_len];
1115 if let Some((attr_block, _text_before)) =
1116 try_parse_trailing_attributes(attr_text)
1117 {
1118 let trimmed_after = attr_text.trim_start();
1119 let ws_len = attr_text.len() - trimmed_after.len();
1120 if ws_len > 0 {
1121 builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1122 }
1123 emit_attributes(builder, &attr_block);
1124 }
1125 }
1126
1127 pos += total_len;
1128 text_start = pos;
1129 continue;
1130 }
1131
1132 if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1134 if pos > text_start {
1136 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1137 }
1138
1139 log::trace!("Matched inline math at pos {}", pos);
1140 emit_inline_math(builder, content);
1141 pos += len;
1142 text_start = pos;
1143 continue;
1144 }
1145
1146 if pos > text_start {
1149 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1150 }
1151 builder.token(SyntaxKind::TEXT.into(), "$");
1152 pos = advance_char_boundary(text, pos, end);
1153 text_start = pos;
1154 continue;
1155 }
1156
1157 if byte == b'<'
1159 && config.extensions.autolinks
1160 && let Some((len, url)) = try_parse_autolink(
1161 &text[pos..],
1162 config.dialect == crate::options::Dialect::CommonMark,
1163 )
1164 {
1165 if pos > text_start {
1166 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1167 }
1168 log::trace!("Matched autolink at pos {}", pos);
1169 emit_autolink(builder, &text[pos..pos + len], url);
1170 pos += len;
1171 text_start = pos;
1172 continue;
1173 }
1174
1175 if !nested_in_link
1176 && config.extensions.autolink_bare_uris
1177 && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1178 {
1179 if pos > text_start {
1180 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1181 }
1182 log::trace!("Matched bare URI at pos {}", pos);
1183 emit_bare_uri_link(builder, url, config);
1184 pos += len;
1185 text_start = pos;
1186 continue;
1187 }
1188
1189 if byte == b'<'
1195 && config.dialect == Dialect::CommonMark
1196 && config.extensions.native_spans
1197 && let Some((len, content, _attributes)) = try_parse_native_span(&text[pos..])
1198 {
1199 if pos > text_start {
1200 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1201 }
1202 log::trace!("Matched native span at pos {}", pos);
1203 emit_native_span(builder, &text[pos..pos + len], content, config);
1204 pos += len;
1205 text_start = pos;
1206 continue;
1207 }
1208
1209 if byte == b'<'
1213 && config.extensions.raw_html
1214 && let Some(len) = try_parse_inline_html(&text[pos..], config.dialect)
1215 {
1216 if pos > text_start {
1217 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1218 }
1219 log::trace!("Matched inline raw HTML at pos {}", pos);
1220 emit_inline_html(builder, &text[pos..pos + len]);
1221 pos += len;
1222 text_start = pos;
1223 continue;
1224 }
1225
1226 if byte == b'['
1234 && config.dialect == Dialect::CommonMark
1235 && config.extensions.footnotes
1236 && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1237 {
1238 if pos > text_start {
1239 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1240 }
1241 log::trace!("Matched footnote reference at pos {}", pos);
1242 emit_footnote_reference(builder, &id);
1243 pos += len;
1244 text_start = pos;
1245 continue;
1246 }
1247 if byte == b'['
1248 && config.dialect == Dialect::CommonMark
1249 && config.extensions.citations
1250 && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1251 {
1252 if pos > text_start {
1253 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1254 }
1255 log::trace!("Matched bracketed citation at pos {}", pos);
1256 emit_bracketed_citation(builder, content);
1257 pos += len;
1258 text_start = pos;
1259 continue;
1260 }
1261
1262 if config.dialect == Dialect::CommonMark
1268 && byte == b'['
1269 && config.extensions.bracketed_spans
1270 && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1271 {
1272 if pos > text_start {
1273 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1274 }
1275 log::trace!("Matched bracketed span at pos {}", pos);
1276 emit_bracketed_span(builder, &text_content, &attrs, config);
1277 pos += len;
1278 text_start = pos;
1279 continue;
1280 }
1281
1282 if config.dialect == Dialect::CommonMark
1288 && byte == b'@'
1289 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1290 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1291 {
1292 let is_crossref =
1293 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1294 if is_crossref || config.extensions.citations {
1295 if pos > text_start {
1296 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1297 }
1298 if is_crossref {
1299 log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1300 super::citations::emit_crossref(builder, key, has_suppress);
1301 } else {
1302 log::trace!("Matched bare citation at pos {}: {}", pos, &key);
1303 emit_bare_citation(builder, key, has_suppress);
1304 }
1305 pos += len;
1306 text_start = pos;
1307 continue;
1308 }
1309 }
1310
1311 if config.dialect == Dialect::CommonMark
1316 && byte == b'-'
1317 && pos + 1 < text.len()
1318 && text.as_bytes()[pos + 1] == b'@'
1319 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1320 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1321 {
1322 let is_crossref =
1323 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1324 if is_crossref || config.extensions.citations {
1325 if pos > text_start {
1326 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1327 }
1328 if is_crossref {
1329 log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1330 super::citations::emit_crossref(builder, key, has_suppress);
1331 } else {
1332 log::trace!("Matched suppress-author citation at pos {}: {}", pos, &key);
1333 emit_bare_citation(builder, key, has_suppress);
1334 }
1335 pos += len;
1336 text_start = pos;
1337 continue;
1338 }
1339 }
1340
1341 if byte == b'*' || byte == b'_' {
1346 match plan.lookup(pos) {
1347 Some(DelimChar::Open {
1348 len,
1349 partner,
1350 partner_len,
1351 kind,
1352 }) => {
1353 if pos > text_start {
1354 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1355 }
1356 let len = len as usize;
1357 let partner_len = partner_len as usize;
1358 let (wrapper_kind, marker_kind) = match kind {
1359 EmphasisKind::Strong => (SyntaxKind::STRONG, SyntaxKind::STRONG_MARKER),
1360 EmphasisKind::Emph => (SyntaxKind::EMPHASIS, SyntaxKind::EMPHASIS_MARKER),
1361 };
1362 builder.start_node(wrapper_kind.into());
1363 builder.token(marker_kind.into(), &text[pos..pos + len]);
1364 parse_inline_range_impl(
1365 text,
1366 pos + len,
1367 partner,
1368 config,
1369 builder,
1370 nested_in_link,
1371 plan,
1372 bracket_plan,
1373 construct_plan,
1374 suppress_inner_links,
1375 mask,
1376 );
1377 builder.token(marker_kind.into(), &text[partner..partner + partner_len]);
1378 builder.finish_node();
1379 pos = partner + partner_len;
1380 text_start = pos;
1381 continue;
1382 }
1383 Some(DelimChar::Close) => {
1384 pos += 1;
1391 continue;
1392 }
1393 Some(DelimChar::Literal) | None => {
1394 let bytes = text.as_bytes();
1400 let mut end_pos = pos + 1;
1401 while end_pos < end && bytes[end_pos] == byte {
1402 match plan.lookup(end_pos) {
1403 Some(DelimChar::Literal) | None => end_pos += 1,
1404 _ => break,
1405 }
1406 }
1407 pos = end_pos;
1408 continue;
1409 }
1410 }
1411 }
1412
1413 if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1415 let text_before = &text[text_start..pos];
1416
1417 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1419 if trailing_spaces >= 2 {
1420 let text_content = &text_before[..text_before.len() - trailing_spaces];
1422 if !text_content.is_empty() {
1423 builder.token(SyntaxKind::TEXT.into(), text_content);
1424 }
1425 let spaces = " ".repeat(trailing_spaces);
1426 builder.token(
1427 SyntaxKind::HARD_LINE_BREAK.into(),
1428 &format!("{}\r\n", spaces),
1429 );
1430 pos += 2;
1431 text_start = pos;
1432 continue;
1433 }
1434
1435 if config.extensions.hard_line_breaks {
1437 if !text_before.is_empty() {
1438 builder.token(SyntaxKind::TEXT.into(), text_before);
1439 }
1440 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1441 pos += 2;
1442 text_start = pos;
1443 continue;
1444 }
1445
1446 if !text_before.is_empty() {
1448 builder.token(SyntaxKind::TEXT.into(), text_before);
1449 }
1450 builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1451 pos += 2;
1452 text_start = pos;
1453 continue;
1454 }
1455
1456 if byte == b'\n' {
1457 let text_before = &text[text_start..pos];
1458
1459 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1461 if trailing_spaces >= 2 {
1462 let text_content = &text_before[..text_before.len() - trailing_spaces];
1464 if !text_content.is_empty() {
1465 builder.token(SyntaxKind::TEXT.into(), text_content);
1466 }
1467 let spaces = " ".repeat(trailing_spaces);
1468 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1469 pos += 1;
1470 text_start = pos;
1471 continue;
1472 }
1473
1474 if config.extensions.hard_line_breaks {
1476 if !text_before.is_empty() {
1477 builder.token(SyntaxKind::TEXT.into(), text_before);
1478 }
1479 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1480 pos += 1;
1481 text_start = pos;
1482 continue;
1483 }
1484
1485 if !text_before.is_empty() {
1487 builder.token(SyntaxKind::TEXT.into(), text_before);
1488 }
1489 builder.token(SyntaxKind::NEWLINE.into(), "\n");
1490 pos += 1;
1491 text_start = pos;
1492 continue;
1493 }
1494
1495 pos = advance_char_boundary(text, pos, end);
1497 }
1498
1499 if pos > text_start && text_start < end {
1501 log::trace!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1502 builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1503 }
1504
1505 log::trace!("parse_inline_range complete: start={}, end={}", start, end);
1506}
1507
1508#[cfg(test)]
1509mod tests {
1510 use super::*;
1511 use crate::syntax::{SyntaxKind, SyntaxNode};
1512 use rowan::GreenNode;
1513
1514 #[test]
1515 fn test_recursive_simple_emphasis() {
1516 let text = "*test*";
1517 let config = ParserOptions::default();
1518 let mut builder = GreenNodeBuilder::new();
1519
1520 parse_inline_text_recursive(&mut builder, text, &config);
1521
1522 let green: GreenNode = builder.finish();
1523 let node = SyntaxNode::new_root(green);
1524
1525 assert_eq!(node.text().to_string(), text);
1527
1528 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1530 assert!(has_emph, "Should have EMPHASIS node");
1531 }
1532
1533 #[test]
1534 fn test_recursive_nested() {
1535 let text = "*foo **bar** baz*";
1536 let config = ParserOptions::default();
1537 let mut builder = GreenNodeBuilder::new();
1538
1539 builder.start_node(SyntaxKind::PARAGRAPH.into());
1541 parse_inline_text_recursive(&mut builder, text, &config);
1542 builder.finish_node();
1543
1544 let green: GreenNode = builder.finish();
1545 let node = SyntaxNode::new_root(green);
1546
1547 assert_eq!(node.text().to_string(), text);
1549
1550 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1552 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1553
1554 assert!(has_emph, "Should have EMPHASIS node");
1555 assert!(has_strong, "Should have STRONG node");
1556 }
1557
1558 #[test]
1561 fn test_triple_emphasis_star_then_double_star() {
1562 use crate::options::ParserOptions;
1563 use crate::syntax::SyntaxNode;
1564 use rowan::GreenNode;
1565
1566 let text = "***foo* bar**";
1567 let config = ParserOptions::default();
1568 let mut builder = GreenNodeBuilder::new();
1569
1570 builder.start_node(SyntaxKind::DOCUMENT.into());
1571 parse_inline_text_recursive(&mut builder, text, &config);
1572 builder.finish_node();
1573
1574 let green: GreenNode = builder.finish();
1575 let node = SyntaxNode::new_root(green);
1576
1577 assert_eq!(node.text().to_string(), text);
1579
1580 let structure = format!("{:#?}", node);
1583
1584 assert!(structure.contains("STRONG"), "Should have STRONG node");
1586 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1587
1588 let mut found_strong = false;
1591 let mut found_emph_after_strong = false;
1592 for descendant in node.descendants() {
1593 if descendant.kind() == SyntaxKind::STRONG {
1594 found_strong = true;
1595 }
1596 if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
1597 found_emph_after_strong = true;
1598 break;
1599 }
1600 }
1601
1602 assert!(
1603 found_emph_after_strong,
1604 "EMPH should be inside STRONG, not before it. Current structure:\n{}",
1605 structure
1606 );
1607 }
1608
1609 #[test]
1612 fn test_triple_emphasis_double_star_then_star() {
1613 use crate::options::ParserOptions;
1614 use crate::syntax::SyntaxNode;
1615 use rowan::GreenNode;
1616
1617 let text = "***foo** bar*";
1618 let config = ParserOptions::default();
1619 let mut builder = GreenNodeBuilder::new();
1620
1621 builder.start_node(SyntaxKind::DOCUMENT.into());
1622 parse_inline_text_recursive(&mut builder, text, &config);
1623 builder.finish_node();
1624
1625 let green: GreenNode = builder.finish();
1626 let node = SyntaxNode::new_root(green);
1627
1628 assert_eq!(node.text().to_string(), text);
1630
1631 let structure = format!("{:#?}", node);
1633
1634 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
1636 assert!(structure.contains("STRONG"), "Should have STRONG node");
1637
1638 let mut found_emph = false;
1640 let mut found_strong_after_emph = false;
1641 for descendant in node.descendants() {
1642 if descendant.kind() == SyntaxKind::EMPHASIS {
1643 found_emph = true;
1644 }
1645 if found_emph && descendant.kind() == SyntaxKind::STRONG {
1646 found_strong_after_emph = true;
1647 break;
1648 }
1649 }
1650
1651 assert!(
1652 found_strong_after_emph,
1653 "STRONG should be inside EMPH. Current structure:\n{}",
1654 structure
1655 );
1656 }
1657
1658 #[test]
1661 fn test_display_math_with_attributes() {
1662 use crate::options::ParserOptions;
1663 use crate::syntax::SyntaxNode;
1664 use rowan::GreenNode;
1665
1666 let text = "$$ E = mc^2 $$ {#eq-einstein}";
1667 let mut config = ParserOptions::default();
1668 config.extensions.quarto_crossrefs = true; let mut builder = GreenNodeBuilder::new();
1671 builder.start_node(SyntaxKind::DOCUMENT.into()); parse_inline_text_recursive(&mut builder, text, &config);
1675
1676 builder.finish_node(); let green: GreenNode = builder.finish();
1678 let node = SyntaxNode::new_root(green);
1679
1680 assert_eq!(node.text().to_string(), text);
1682
1683 let has_display_math = node
1685 .descendants()
1686 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
1687 assert!(has_display_math, "Should have DISPLAY_MATH node");
1688
1689 let has_attributes = node
1691 .descendants()
1692 .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
1693 assert!(
1694 has_attributes,
1695 "Should have ATTRIBUTE node for {{#eq-einstein}}"
1696 );
1697
1698 let math_followed_by_text = node.descendants().any(|n| {
1700 n.kind() == SyntaxKind::DISPLAY_MATH
1701 && n.next_sibling()
1702 .map(|s| {
1703 s.kind() == SyntaxKind::TEXT
1704 && s.text().to_string().contains("{#eq-einstein}")
1705 })
1706 .unwrap_or(false)
1707 });
1708 assert!(
1709 !math_followed_by_text,
1710 "Attributes should not be parsed as TEXT"
1711 );
1712 }
1713
1714 #[test]
1715 fn test_parse_inline_text_gfm_inline_link_destination_not_autolinked() {
1716 use crate::options::{Dialect, Extensions, Flavor};
1717
1718 let config = ParserOptions {
1719 flavor: Flavor::Gfm,
1720 dialect: Dialect::for_flavor(Flavor::Gfm),
1721 extensions: Extensions::for_flavor(Flavor::Gfm),
1722 ..ParserOptions::default()
1723 };
1724
1725 let mut builder = GreenNodeBuilder::new();
1726 builder.start_node(SyntaxKind::PARAGRAPH.into());
1727 parse_inline_text_recursive(
1728 &mut builder,
1729 "Second Link [link_text](https://link.com)",
1730 &config,
1731 );
1732 builder.finish_node();
1733 let green = builder.finish();
1734 let root = SyntaxNode::new_root(green);
1735
1736 let links: Vec<_> = root
1737 .descendants()
1738 .filter(|n| n.kind() == SyntaxKind::LINK)
1739 .collect();
1740 assert_eq!(
1741 links.len(),
1742 1,
1743 "Expected exactly one LINK node for inline link, not nested bare URI autolink"
1744 );
1745
1746 let link = links[0].clone();
1747 let mut link_text = None::<String>;
1748 let mut link_dest = None::<String>;
1749
1750 for child in link.children() {
1751 match child.kind() {
1752 SyntaxKind::LINK_TEXT => link_text = Some(child.text().to_string()),
1753 SyntaxKind::LINK_DEST => link_dest = Some(child.text().to_string()),
1754 _ => {}
1755 }
1756 }
1757
1758 assert_eq!(link_text.as_deref(), Some("link_text"));
1759 assert_eq!(link_dest.as_deref(), Some("https://link.com"));
1760 }
1761
1762 #[test]
1763 fn test_autolink_bare_uri_utf8_boundary_safe() {
1764 let text = "§";
1765 let mut config = ParserOptions::default();
1766 config.extensions.autolink_bare_uris = true;
1767 let mut builder = GreenNodeBuilder::new();
1768
1769 builder.start_node(SyntaxKind::DOCUMENT.into());
1770 parse_inline_text_recursive(&mut builder, text, &config);
1771 builder.finish_node();
1772
1773 let green: GreenNode = builder.finish();
1774 let node = SyntaxNode::new_root(green);
1775 assert_eq!(node.text().to_string(), text);
1776 }
1777
1778 #[test]
1779 fn test_parse_emphasis_unicode_content_no_panic() {
1780 let text = "*§*";
1781 let config = ParserOptions::default();
1782 let mut builder = GreenNodeBuilder::new();
1783
1784 builder.start_node(SyntaxKind::PARAGRAPH.into());
1785 parse_inline_text_recursive(&mut builder, text, &config);
1786 builder.finish_node();
1787
1788 let green: GreenNode = builder.finish();
1789 let node = SyntaxNode::new_root(green);
1790 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1791 assert!(has_emph, "Should have EMPHASIS node");
1792 assert_eq!(node.text().to_string(), text);
1793 }
1794}
1795
1796#[test]
1797fn test_two_with_nested_one_and_triple_closer() {
1798 use crate::options::ParserOptions;
1803 use crate::syntax::SyntaxNode;
1804 use rowan::GreenNode;
1805
1806 let text = "**bold with *italic***";
1807 let config = ParserOptions::default();
1808 let mut builder = GreenNodeBuilder::new();
1809
1810 builder.start_node(SyntaxKind::PARAGRAPH.into());
1811 parse_inline_text_recursive(&mut builder, text, &config);
1812 builder.finish_node();
1813
1814 let green: GreenNode = builder.finish();
1815 let node = SyntaxNode::new_root(green);
1816
1817 assert_eq!(node.text().to_string(), text, "Should be lossless");
1818
1819 let strong_nodes: Vec<_> = node
1820 .descendants()
1821 .filter(|n| n.kind() == SyntaxKind::STRONG)
1822 .collect();
1823 assert_eq!(strong_nodes.len(), 1, "Should have exactly one STRONG node");
1824 let has_emphasis_in_strong = strong_nodes[0]
1825 .descendants()
1826 .any(|n| n.kind() == SyntaxKind::EMPHASIS);
1827 assert!(
1828 has_emphasis_in_strong,
1829 "STRONG should contain EMPHASIS node"
1830 );
1831}
1832
1833#[test]
1834fn test_emphasis_with_trailing_space_before_closer() {
1835 use crate::options::ParserOptions;
1839 use crate::syntax::SyntaxNode;
1840 use rowan::GreenNode;
1841
1842 let text = "*foo *";
1843 let config = ParserOptions::default();
1844 let mut builder = GreenNodeBuilder::new();
1845
1846 builder.start_node(SyntaxKind::PARAGRAPH.into());
1847 parse_inline_text_recursive(&mut builder, text, &config);
1848 builder.finish_node();
1849
1850 let green: GreenNode = builder.finish();
1851 let node = SyntaxNode::new_root(green);
1852
1853 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1854 assert!(has_emph, "Should have EMPHASIS node");
1855 assert_eq!(node.text().to_string(), text);
1856}
1857
1858#[test]
1859fn test_triple_emphasis_all_strong_nested() {
1860 use crate::options::ParserOptions;
1864 use crate::syntax::SyntaxNode;
1865 use rowan::GreenNode;
1866
1867 let text = "***foo** bar **baz***";
1868 let config = ParserOptions::default();
1869 let mut builder = GreenNodeBuilder::new();
1870
1871 builder.start_node(SyntaxKind::DOCUMENT.into());
1872 parse_inline_text_recursive(&mut builder, text, &config);
1873 builder.finish_node();
1874
1875 let green: GreenNode = builder.finish();
1876 let node = SyntaxNode::new_root(green);
1877
1878 let emphasis_nodes: Vec<_> = node
1880 .descendants()
1881 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1882 .collect();
1883 assert_eq!(
1884 emphasis_nodes.len(),
1885 1,
1886 "Should have exactly one EMPHASIS node, found: {}",
1887 emphasis_nodes.len()
1888 );
1889
1890 let emphasis_node = emphasis_nodes[0].clone();
1892 let strong_in_emphasis: Vec<_> = emphasis_node
1893 .children()
1894 .filter(|n| n.kind() == SyntaxKind::STRONG)
1895 .collect();
1896 assert_eq!(
1897 strong_in_emphasis.len(),
1898 2,
1899 "EMPHASIS should contain two STRONG nodes, found: {}",
1900 strong_in_emphasis.len()
1901 );
1902
1903 assert_eq!(node.text().to_string(), text);
1905}
1906
1907#[test]
1908fn test_triple_emphasis_all_emph_nested() {
1909 use crate::options::ParserOptions;
1913 use crate::syntax::SyntaxNode;
1914 use rowan::GreenNode;
1915
1916 let text = "***foo* bar *baz***";
1917 let config = ParserOptions::default();
1918 let mut builder = GreenNodeBuilder::new();
1919
1920 builder.start_node(SyntaxKind::DOCUMENT.into());
1921 parse_inline_text_recursive(&mut builder, text, &config);
1922 builder.finish_node();
1923
1924 let green: GreenNode = builder.finish();
1925 let node = SyntaxNode::new_root(green);
1926
1927 let strong_nodes: Vec<_> = node
1929 .descendants()
1930 .filter(|n| n.kind() == SyntaxKind::STRONG)
1931 .collect();
1932 assert_eq!(
1933 strong_nodes.len(),
1934 1,
1935 "Should have exactly one STRONG node, found: {}",
1936 strong_nodes.len()
1937 );
1938
1939 let strong_node = strong_nodes[0].clone();
1941 let emph_in_strong: Vec<_> = strong_node
1942 .children()
1943 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
1944 .collect();
1945 assert_eq!(
1946 emph_in_strong.len(),
1947 2,
1948 "STRONG should contain two EMPHASIS nodes, found: {}",
1949 emph_in_strong.len()
1950 );
1951
1952 assert_eq!(node.text().to_string(), text);
1954}
1955
1956#[test]
1958fn test_parse_emphasis_multiline() {
1959 use crate::options::ParserOptions;
1961 use crate::syntax::SyntaxNode;
1962 use rowan::GreenNode;
1963
1964 let text = "*text on\nline two*";
1965 let config = ParserOptions::default();
1966 let mut builder = GreenNodeBuilder::new();
1967
1968 builder.start_node(SyntaxKind::PARAGRAPH.into());
1969 parse_inline_text_recursive(&mut builder, text, &config);
1970 builder.finish_node();
1971
1972 let green: GreenNode = builder.finish();
1973 let node = SyntaxNode::new_root(green);
1974
1975 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1976 assert!(has_emph, "Should have EMPHASIS node");
1977
1978 assert_eq!(node.text().to_string(), text);
1979 assert!(
1980 node.text().to_string().contains('\n'),
1981 "Should preserve newline in emphasis content"
1982 );
1983}
1984
1985#[test]
1986fn test_parse_strong_multiline() {
1987 use crate::options::ParserOptions;
1989 use crate::syntax::SyntaxNode;
1990 use rowan::GreenNode;
1991
1992 let text = "**strong on\nline two**";
1993 let config = ParserOptions::default();
1994 let mut builder = GreenNodeBuilder::new();
1995
1996 builder.start_node(SyntaxKind::PARAGRAPH.into());
1997 parse_inline_text_recursive(&mut builder, text, &config);
1998 builder.finish_node();
1999
2000 let green: GreenNode = builder.finish();
2001 let node = SyntaxNode::new_root(green);
2002
2003 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2004 assert!(has_strong, "Should have STRONG node");
2005
2006 assert_eq!(node.text().to_string(), text);
2007 assert!(
2008 node.text().to_string().contains('\n'),
2009 "Should preserve newline in strong content"
2010 );
2011}
2012
2013#[test]
2014fn test_parse_triple_emphasis_multiline() {
2015 use crate::options::ParserOptions;
2017 use crate::syntax::SyntaxNode;
2018 use rowan::GreenNode;
2019
2020 let text = "***both on\nline two***";
2021 let config = ParserOptions::default();
2022 let mut builder = GreenNodeBuilder::new();
2023
2024 builder.start_node(SyntaxKind::PARAGRAPH.into());
2025 parse_inline_text_recursive(&mut builder, text, &config);
2026 builder.finish_node();
2027
2028 let green: GreenNode = builder.finish();
2029 let node = SyntaxNode::new_root(green);
2030
2031 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2033 assert!(has_strong, "Should have STRONG node");
2034
2035 assert_eq!(node.text().to_string(), text);
2036 assert!(
2037 node.text().to_string().contains('\n'),
2038 "Should preserve newline in triple emphasis content"
2039 );
2040}