1use crate::options::{Dialect, ParserOptions};
24use crate::syntax::SyntaxKind;
25use rowan::GreenNodeBuilder;
26
27use super::bookdown::{
29 try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
30};
31use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
32use super::citations::{
33 emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
34 try_parse_bracketed_citation,
35};
36use super::code_spans::{emit_code_span, try_parse_code_span};
37use super::emoji::{emit_emoji, try_parse_emoji};
38use super::escapes::{EscapeType, emit_escape, try_parse_escape};
39use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
40use super::inline_footnotes::{
41 emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
42 try_parse_inline_footnote,
43};
44use super::inline_html::{emit_inline_html, try_parse_inline_html};
45use super::latex::{parse_latex_command, try_parse_latex_command};
46use super::links::{
47 emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link, emit_reference_image,
48 emit_reference_link, try_parse_autolink, try_parse_bare_uri, try_parse_inline_image,
49 try_parse_inline_link, try_parse_reference_image, try_parse_reference_link,
50};
51use super::mark::{emit_mark, try_parse_mark};
52use super::math::{
53 emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
54 emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
55 emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
56 try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
57 try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
58 try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
59};
60use super::native_spans::{emit_native_span, try_parse_native_span};
61use super::raw_inline::is_raw_inline;
62use super::shortcodes::{emit_shortcode, try_parse_shortcode};
63use super::strikeout::{emit_strikeout, try_parse_strikeout};
64use super::subscript::{emit_subscript, try_parse_subscript};
65use super::superscript::{emit_superscript, try_parse_superscript};
66
67pub fn parse_inline_text_recursive(
84 builder: &mut GreenNodeBuilder,
85 text: &str,
86 config: &ParserOptions,
87) {
88 log::trace!(
89 "Recursive inline parsing: {:?} ({} bytes)",
90 &text[..text.len().min(40)],
91 text.len()
92 );
93
94 parse_inline_range(text, 0, text.len(), config, builder);
95
96 log::trace!("Recursive inline parsing complete");
97}
98
99pub fn parse_inline_text(
108 builder: &mut GreenNodeBuilder,
109 text: &str,
110 config: &ParserOptions,
111 _allow_reference_links: bool,
112) {
113 log::trace!(
114 "Parsing inline text (nested in link): {:?} ({} bytes)",
115 &text[..text.len().min(40)],
116 text.len()
117 );
118
119 parse_inline_range_impl(text, 0, text.len(), config, builder, false, true);
120}
121
122pub fn try_parse_emphasis(
147 text: &str,
148 pos: usize,
149 end: usize,
150 config: &ParserOptions,
151 builder: &mut GreenNodeBuilder,
152) -> Option<(usize, usize)> {
153 let bytes = text.as_bytes();
154
155 if pos >= bytes.len() {
156 return None;
157 }
158
159 let delim_char = bytes[pos] as char;
160 if delim_char != '*' && delim_char != '_' {
161 return None;
162 }
163
164 let mut count = 0;
166 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
167 count += 1;
168 }
169
170 let after_pos = pos + count;
171
172 log::trace!(
173 "try_parse_emphasis: '{}' x {} at pos {}",
174 delim_char,
175 count,
176 pos
177 );
178
179 if after_pos < text.len()
181 && let Some(next_char) = text[after_pos..].chars().next()
182 && next_char.is_whitespace()
183 {
184 log::trace!("Delimiter followed by whitespace, treating as literal");
185 return None;
186 }
187
188 if delim_char == '_'
191 && pos > 0
192 && let Some(prev_char) = text[..pos].chars().last()
193 && prev_char.is_alphanumeric()
194 {
195 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
196 return None;
197 }
198
199 let result = match count {
201 1 => try_parse_one(text, pos, delim_char, end, config, builder),
202 2 => try_parse_two(text, pos, delim_char, end, config, builder),
203 3 => try_parse_three(text, pos, delim_char, end, config, builder),
204 _ => {
205 log::trace!("{} delimiters (4+), treating as literal", count);
207 None
208 }
209 };
210
211 result.map(|consumed| (consumed, count))
214}
215
216fn try_parse_emphasis_nested(
225 text: &str,
226 pos: usize,
227 end: usize,
228 config: &ParserOptions,
229 builder: &mut GreenNodeBuilder,
230) -> Option<(usize, usize)> {
231 let bytes = text.as_bytes();
232
233 if pos >= bytes.len() {
234 return None;
235 }
236
237 let delim_char = bytes[pos] as char;
238 if delim_char != '*' && delim_char != '_' {
239 return None;
240 }
241
242 let mut count = 0;
244 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
245 count += 1;
246 }
247
248 log::trace!(
249 "try_parse_emphasis_nested: '{}' x {} at pos {}",
250 delim_char,
251 count,
252 pos
253 );
254
255 if delim_char == '_'
258 && pos > 0
259 && let Some(prev_char) = text[..pos].chars().last()
260 && prev_char.is_alphanumeric()
261 {
262 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
263 return None;
264 }
265
266 let result = match count {
272 1 => try_parse_one(text, pos, delim_char, end, config, builder),
273 2 => try_parse_two(text, pos, delim_char, end, config, builder),
274 3 => try_parse_three(text, pos, delim_char, end, config, builder),
275 _ => {
276 log::trace!("{} delimiters (4+), treating as literal", count);
278 None
279 }
280 };
281
282 result.map(|consumed| (consumed, count))
283}
284
285fn try_parse_three(
290 text: &str,
291 pos: usize,
292 delim_char: char,
293 end: usize,
294 config: &ParserOptions,
295 builder: &mut GreenNodeBuilder,
296) -> Option<usize> {
297 let content_start = pos + 3;
298 let one = delim_char.to_string();
299 let two = one.repeat(2);
300
301 log::trace!("try_parse_three: '{}' x 3 at pos {}", delim_char, pos);
302
303 let mut search_pos = content_start;
307
308 loop {
309 let closer_start = match find_first_potential_ender(text, search_pos, delim_char, end) {
311 Some(p) => p,
312 None => {
313 log::trace!("No potential ender found for ***");
314 return None;
315 }
316 };
317
318 log::trace!("Potential ender at pos {}", closer_start);
319
320 let bytes = text.as_bytes();
322 let mut closer_count = 0;
323 let mut check_pos = closer_start;
324 while check_pos < bytes.len() && bytes[check_pos] == delim_char as u8 {
325 closer_count += 1;
326 check_pos += 1;
327 }
328
329 log::trace!(
330 "Found {} x {} at pos {}",
331 delim_char,
332 closer_count,
333 closer_start
334 );
335
336 if closer_count >= 3 && is_valid_ender(text, closer_start, delim_char, 3) {
340 log::trace!("Matched *** closer, emitting Strong[Emph[content]]");
341
342 builder.start_node(SyntaxKind::STRONG.into());
343 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
344
345 builder.start_node(SyntaxKind::EMPHASIS.into());
346 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
347 parse_inline_range_nested(text, content_start, closer_start, config, builder);
348 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
349 builder.finish_node(); builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
352 builder.finish_node(); return Some(closer_start + 3 - pos);
355 }
356
357 if closer_count >= 2 && is_valid_ender(text, closer_start, delim_char, 2) {
359 log::trace!("Matched ** closer, wrapping as Strong and continuing with one");
360
361 let continue_pos = closer_start + 2;
362
363 if let Some(final_closer_pos) =
364 parse_until_closer_with_nested_two(text, continue_pos, delim_char, 1, end, config)
365 {
366 log::trace!(
367 "Found * closer at pos {}, emitting Emph[Strong[...], ...]",
368 final_closer_pos
369 );
370
371 builder.start_node(SyntaxKind::EMPHASIS.into());
372 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
373
374 builder.start_node(SyntaxKind::STRONG.into());
375 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
376 parse_inline_range_nested(text, content_start, closer_start, config, builder);
377 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
378 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
382
383 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
384 builder.finish_node(); return Some(final_closer_pos + 1 - pos);
387 }
388
389 log::trace!("No * closer found after **, emitting * + STRONG");
391 builder.token(SyntaxKind::TEXT.into(), &one);
392
393 builder.start_node(SyntaxKind::STRONG.into());
394 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
395 parse_inline_range_nested(text, content_start, closer_start, config, builder);
396 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
397 builder.finish_node(); return Some(closer_start + 2 - pos);
400 }
401
402 if closer_count >= 1 && is_valid_ender(text, closer_start, delim_char, 1) {
404 log::trace!("Matched * closer, wrapping as Emph and continuing with two");
405
406 let continue_pos = closer_start + 1;
407
408 if let Some(final_closer_pos) =
409 parse_until_closer_with_nested_one(text, continue_pos, delim_char, 2, end, config)
410 {
411 log::trace!(
412 "Found ** closer at pos {}, emitting Strong[Emph[...], ...]",
413 final_closer_pos
414 );
415
416 builder.start_node(SyntaxKind::STRONG.into());
417 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
418
419 builder.start_node(SyntaxKind::EMPHASIS.into());
420 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
421 parse_inline_range_nested(text, content_start, closer_start, config, builder);
422 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
423 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
426
427 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
428 builder.finish_node(); return Some(final_closer_pos + 2 - pos);
431 }
432
433 log::trace!("No ** closer found after *, emitting ** + EMPH");
435 builder.token(SyntaxKind::TEXT.into(), &two);
436
437 builder.start_node(SyntaxKind::EMPHASIS.into());
438 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
439 parse_inline_range_nested(text, content_start, closer_start, config, builder);
440 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
441 builder.finish_node(); return Some(closer_start + 1 - pos);
444 }
445
446 log::trace!(
448 "No valid ender at pos {}, continuing search from {}",
449 closer_start,
450 closer_start + closer_count
451 );
452 search_pos = closer_start + closer_count;
453 }
454}
455
456fn find_first_potential_ender(
460 text: &str,
461 start: usize,
462 delim_char: char,
463 end: usize,
464) -> Option<usize> {
465 let bytes = text.as_bytes();
466 let mut pos = start;
467
468 while pos < end.min(text.len()) {
469 if bytes[pos] == delim_char as u8 {
471 let is_escaped = {
473 let mut backslash_count = 0;
474 let mut check_pos = pos;
475 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
476 backslash_count += 1;
477 check_pos -= 1;
478 }
479 backslash_count % 2 == 1
480 };
481
482 if !is_escaped {
483 return Some(pos);
485 }
486 }
487
488 pos += 1;
489 }
490
491 None
492}
493
494fn is_valid_ender(text: &str, pos: usize, delim_char: char, delim_count: usize) -> bool {
497 let bytes = text.as_bytes();
498
499 if pos + delim_count > text.len() {
501 return false;
502 }
503
504 for i in 0..delim_count {
505 if bytes[pos + i] != delim_char as u8 {
506 return false;
507 }
508 }
509
510 if pos > 0 && bytes[pos - 1] == delim_char as u8 {
512 return false;
513 }
514
515 let after_pos = pos + delim_count;
517 if after_pos < bytes.len() && bytes[after_pos] == delim_char as u8 {
518 return false;
519 }
520
521 if delim_char == '_' {
524 if pos > 0
525 && let Some(prev_char) = text[..pos].chars().last()
526 && prev_char.is_whitespace()
527 {
528 return false;
529 }
530
531 if after_pos < text.len()
533 && let Some(next_char) = text[after_pos..].chars().next()
534 && next_char.is_alphanumeric()
535 {
536 return false;
537 }
538 }
539
540 true
541}
542
543fn try_parse_two(
548 text: &str,
549 pos: usize,
550 delim_char: char,
551 end: usize,
552 config: &ParserOptions,
553 builder: &mut GreenNodeBuilder,
554) -> Option<usize> {
555 let content_start = pos + 2;
556
557 log::trace!("try_parse_two: '{}' x 2 at pos {}", delim_char, pos);
558
559 if let Some(closer_pos) =
561 parse_until_closer_with_nested_one(text, content_start, delim_char, 2, end, config)
562 {
563 log::trace!("Found ** closer at pos {}", closer_pos);
564
565 builder.start_node(SyntaxKind::STRONG.into());
567 builder.token(SyntaxKind::STRONG_MARKER.into(), &text[pos..pos + 2]);
568 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
569 builder.token(
570 SyntaxKind::STRONG_MARKER.into(),
571 &text[closer_pos..closer_pos + 2],
572 );
573 builder.finish_node(); return Some(closer_pos + 2 - pos);
576 }
577
578 log::trace!("No closer found for **");
580 None
581}
582
583fn try_parse_one(
594 text: &str,
595 pos: usize,
596 delim_char: char,
597 end: usize,
598 config: &ParserOptions,
599 builder: &mut GreenNodeBuilder,
600) -> Option<usize> {
601 let content_start = pos + 1;
602
603 log::trace!("try_parse_one: '{}' x 1 at pos {}", delim_char, pos);
604
605 if let Some(closer_pos) =
607 parse_until_closer_with_nested_two(text, content_start, delim_char, 1, end, config)
608 {
609 log::trace!("Found * closer at pos {}", closer_pos);
610
611 builder.start_node(SyntaxKind::EMPHASIS.into());
613 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &text[pos..pos + 1]);
614 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
615 builder.token(
616 SyntaxKind::EMPHASIS_MARKER.into(),
617 &text[closer_pos..closer_pos + 1],
618 );
619 builder.finish_node(); return Some(closer_pos + 1 - pos);
622 }
623
624 log::trace!("No closer found for *");
626 None
627}
628
629fn parse_until_closer_with_nested_two(
648 text: &str,
649 start: usize,
650 delim_char: char,
651 delim_count: usize,
652 end: usize,
653 config: &ParserOptions,
654) -> Option<usize> {
655 let bytes = text.as_bytes();
656 let mut pos = start;
657
658 while pos < end.min(text.len()) {
659 if bytes[pos] == b'`'
660 && let Some(m) = try_parse_inline_executable(
661 &text[pos..],
662 config.extensions.rmarkdown_inline_code,
663 config.extensions.quarto_inline_code,
664 )
665 {
666 log::trace!(
667 "Skipping inline executable span of {} bytes at pos {}",
668 m.total_len,
669 pos
670 );
671 pos += m.total_len;
672 continue;
673 }
674
675 if bytes[pos] == b'`'
677 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
678 {
679 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
680 pos += len;
681 continue;
682 }
683
684 if bytes[pos] == b'$'
686 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
687 {
688 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
689 pos += len;
690 continue;
691 }
692
693 if bytes[pos] == b'['
695 && let Some((len, _, _, _)) = try_parse_inline_link(
696 &text[pos..],
697 config.dialect == crate::options::Dialect::CommonMark,
698 )
699 {
700 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
701 pos += len;
702 continue;
703 }
704
705 if delim_count == 1
709 && pos + 2 <= text.len()
710 && bytes[pos] == delim_char as u8
711 && bytes[pos + 1] == delim_char as u8
712 {
713 let first_is_escaped = {
715 let mut backslash_count = 0;
716 let mut check_pos = pos;
717 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
718 backslash_count += 1;
719 check_pos -= 1;
720 }
721 backslash_count % 2 == 1
722 };
723
724 if first_is_escaped {
725 log::trace!(
728 "First * at pos {} is escaped, skipping to check second *",
729 pos
730 );
731 pos = advance_char_boundary(text, pos, end);
732 continue;
733 }
734
735 let no_third_delim = pos + 2 >= bytes.len() || bytes[pos + 2] != delim_char as u8;
738
739 if no_third_delim {
740 log::trace!(
741 "try_parse_one: found ** at pos {}, attempting nested two",
742 pos
743 );
744
745 let mut temp_builder = GreenNodeBuilder::new();
748 if let Some(two_consumed) =
749 try_parse_two(text, pos, delim_char, end, config, &mut temp_builder)
750 {
751 log::trace!(
754 "Nested two succeeded, consumed {} bytes, continuing search",
755 two_consumed
756 );
757 pos += two_consumed;
758 continue;
759 }
760 log::trace!("Nested two failed at pos {}, entire one() should fail", pos);
766 return None;
767 }
768 }
769
770 if pos + delim_count <= text.len() {
772 let mut matches = true;
773 for i in 0..delim_count {
774 if bytes[pos + i] != delim_char as u8 {
775 matches = false;
776 break;
777 }
778 }
779
780 if matches {
781 let is_escaped = {
787 let mut backslash_count = 0;
788 let mut check_pos = pos;
789 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
790 backslash_count += 1;
791 check_pos -= 1;
792 }
793 backslash_count % 2 == 1 };
795
796 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
800 let after_pos = pos + delim_count;
801 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
802
803 if (at_run_start || at_run_end) && !is_escaped {
804 if delim_char == '_'
808 && pos > start
809 && let Some(prev_char) = text[..pos].chars().last()
810 && prev_char.is_whitespace()
811 {
812 log::trace!(
813 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
814 pos
815 );
816 pos = advance_char_boundary(text, pos, end);
818 continue;
819 }
820
821 log::trace!(
822 "Found exact {} x {} closer at pos {}",
823 delim_char,
824 delim_count,
825 pos
826 );
827 return Some(pos);
828 }
829 }
830 }
831
832 pos = advance_char_boundary(text, pos, end);
834 }
835
836 None
837}
838
839fn parse_until_closer_with_nested_one(
859 text: &str,
860 start: usize,
861 delim_char: char,
862 delim_count: usize,
863 end: usize,
864 config: &ParserOptions,
865) -> Option<usize> {
866 let bytes = text.as_bytes();
867 let mut pos = start;
868
869 while pos < end.min(text.len()) {
870 if bytes[pos] == b'`'
871 && let Some(m) = try_parse_inline_executable(
872 &text[pos..],
873 config.extensions.rmarkdown_inline_code,
874 config.extensions.quarto_inline_code,
875 )
876 {
877 log::trace!(
878 "Skipping inline executable span of {} bytes at pos {}",
879 m.total_len,
880 pos
881 );
882 pos += m.total_len;
883 continue;
884 }
885
886 if bytes[pos] == b'`'
888 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
889 {
890 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
891 pos += len;
892 continue;
893 }
894
895 if bytes[pos] == b'$'
897 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
898 {
899 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
900 pos += len;
901 continue;
902 }
903
904 if bytes[pos] == b'['
906 && let Some((len, _, _, _)) = try_parse_inline_link(
907 &text[pos..],
908 config.dialect == crate::options::Dialect::CommonMark,
909 )
910 {
911 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
912 pos += len;
913 continue;
914 }
915
916 if delim_count == 2 && pos < text.len() && bytes[pos] == delim_char as u8 {
923 let no_second_delim = pos + 1 >= bytes.len() || bytes[pos + 1] != delim_char as u8;
926
927 if no_second_delim {
928 let is_escaped = {
930 let mut backslash_count = 0;
931 let mut check_pos = pos;
932 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
933 backslash_count += 1;
934 check_pos -= 1;
935 }
936 backslash_count % 2 == 1
937 };
938
939 if is_escaped {
940 log::trace!("* at pos {} is escaped, skipping", pos);
942 pos = advance_char_boundary(text, pos, end);
943 continue;
944 }
945
946 let after_delim = pos + 1;
949 let followed_by_whitespace = after_delim < text.len()
950 && text[after_delim..]
951 .chars()
952 .next()
953 .is_some_and(|c| c.is_whitespace());
954
955 if followed_by_whitespace {
956 log::trace!(
958 "* at pos {} followed by whitespace, not an opener, skipping",
959 pos
960 );
961 pos = advance_char_boundary(text, pos, end);
962 continue;
963 }
964
965 log::trace!(
966 "try_parse_two: found * at pos {}, attempting nested one",
967 pos
968 );
969
970 let mut temp_builder = GreenNodeBuilder::new();
973 if let Some(one_consumed) =
974 try_parse_one(text, pos, delim_char, end, config, &mut temp_builder)
975 {
976 log::trace!(
979 "Nested one succeeded, consumed {} bytes, continuing search",
980 one_consumed
981 );
982 pos += one_consumed;
983 continue;
984 }
985
986 log::trace!(
992 "Nested one failed at pos {}, poisoning outer two (no closer found)",
993 pos
994 );
995 return None;
996 }
997 }
998
999 if pos + delim_count <= text.len() {
1001 let mut matches = true;
1002 for i in 0..delim_count {
1003 if bytes[pos + i] != delim_char as u8 {
1004 matches = false;
1005 break;
1006 }
1007 }
1008
1009 if matches {
1010 let is_escaped = {
1012 let mut backslash_count = 0;
1013 let mut check_pos = pos;
1014 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
1015 backslash_count += 1;
1016 check_pos -= 1;
1017 }
1018 backslash_count % 2 == 1 };
1020
1021 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
1025 let after_pos = pos + delim_count;
1026 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
1027
1028 if (at_run_start || at_run_end) && !is_escaped {
1029 if delim_char == '_'
1033 && pos > start
1034 && let Some(prev_char) = text[..pos].chars().last()
1035 && prev_char.is_whitespace()
1036 {
1037 log::trace!(
1038 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
1039 pos
1040 );
1041 pos = advance_char_boundary(text, pos, end);
1043 continue;
1044 }
1045
1046 log::trace!(
1047 "Found exact {} x {} closer at pos {}",
1048 delim_char,
1049 delim_count,
1050 pos
1051 );
1052 return Some(pos);
1053 }
1054 }
1055 }
1056
1057 pos = advance_char_boundary(text, pos, end);
1059 }
1060
1061 None
1062}
1063
1064fn parse_inline_range(
1081 text: &str,
1082 start: usize,
1083 end: usize,
1084 config: &ParserOptions,
1085 builder: &mut GreenNodeBuilder,
1086) {
1087 parse_inline_range_impl(text, start, end, config, builder, false, false)
1088}
1089
1090fn parse_inline_range_nested(
1093 text: &str,
1094 start: usize,
1095 end: usize,
1096 config: &ParserOptions,
1097 builder: &mut GreenNodeBuilder,
1098) {
1099 parse_inline_range_impl(text, start, end, config, builder, true, false)
1100}
1101
1102fn is_emoji_boundary(text: &str, pos: usize) -> bool {
1103 if pos > 0 {
1104 let prev = text.as_bytes()[pos - 1] as char;
1105 if prev.is_ascii_alphanumeric() || prev == '_' {
1106 return false;
1107 }
1108 }
1109 true
1110}
1111
1112#[inline]
1113fn advance_char_boundary(text: &str, pos: usize, end: usize) -> usize {
1114 if pos >= end || pos >= text.len() {
1115 return pos;
1116 }
1117 let ch_len = text[pos..]
1118 .chars()
1119 .next()
1120 .map_or(1, std::primitive::char::len_utf8);
1121 (pos + ch_len).min(end)
1122}
1123
1124fn parse_inline_range_impl(
1125 text: &str,
1126 start: usize,
1127 end: usize,
1128 config: &ParserOptions,
1129 builder: &mut GreenNodeBuilder,
1130 nested_emphasis: bool,
1131 nested_in_link: bool,
1132) {
1133 log::trace!(
1134 "parse_inline_range: start={}, end={}, text={:?}",
1135 start,
1136 end,
1137 &text[start..end]
1138 );
1139 let mut pos = start;
1140 let mut text_start = start;
1141
1142 while pos < end {
1143 let byte = text.as_bytes()[pos];
1144
1145 if byte == b'\\' {
1147 if config.extensions.tex_math_double_backslash {
1149 if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
1150 {
1151 if pos > text_start {
1152 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1153 }
1154 log::trace!("Matched double backslash display math at pos {}", pos);
1155 emit_double_backslash_display_math(builder, content);
1156 pos += len;
1157 text_start = pos;
1158 continue;
1159 }
1160
1161 if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
1163 if pos > text_start {
1164 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1165 }
1166 log::trace!("Matched double backslash inline math at pos {}", pos);
1167 emit_double_backslash_inline_math(builder, content);
1168 pos += len;
1169 text_start = pos;
1170 continue;
1171 }
1172 }
1173
1174 if config.extensions.tex_math_single_backslash {
1176 if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
1177 {
1178 if pos > text_start {
1179 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1180 }
1181 log::trace!("Matched single backslash display math at pos {}", pos);
1182 emit_single_backslash_display_math(builder, content);
1183 pos += len;
1184 text_start = pos;
1185 continue;
1186 }
1187
1188 if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
1190 if pos > text_start {
1191 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1192 }
1193 log::trace!("Matched single backslash inline math at pos {}", pos);
1194 emit_single_backslash_inline_math(builder, content);
1195 pos += len;
1196 text_start = pos;
1197 continue;
1198 }
1199 }
1200
1201 if config.extensions.raw_tex
1203 && let Some((len, begin_marker, content, end_marker)) =
1204 try_parse_math_environment(&text[pos..])
1205 {
1206 if pos > text_start {
1207 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1208 }
1209 log::trace!("Matched math environment at pos {}", pos);
1210 emit_display_math_environment(builder, begin_marker, content, end_marker);
1211 pos += len;
1212 text_start = pos;
1213 continue;
1214 }
1215
1216 if config.extensions.bookdown_references
1218 && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
1219 {
1220 if pos > text_start {
1221 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1222 }
1223 log::trace!("Matched bookdown reference at pos {}: {}", pos, label);
1224 super::citations::emit_bookdown_crossref(builder, label);
1225 pos += len;
1226 text_start = pos;
1227 continue;
1228 }
1229
1230 if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
1232 let escape_enabled = match escape_type {
1233 EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
1234 EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
1235 EscapeType::Literal => {
1236 const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!|~";
1249 BASE_ESCAPABLE.contains(ch)
1250 || config.extensions.all_symbols_escapable
1251 || (config.dialect == crate::Dialect::CommonMark
1252 && ch.is_ascii_punctuation())
1253 }
1254 };
1255 if !escape_enabled {
1256 pos = advance_char_boundary(text, pos, end);
1259 continue;
1260 }
1261
1262 if pos > text_start {
1264 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1265 }
1266
1267 log::trace!("Matched escape at pos {}: \\{}", pos, ch);
1268 emit_escape(builder, ch, escape_type);
1269 pos += len;
1270 text_start = pos;
1271 continue;
1272 }
1273
1274 if config.extensions.raw_tex
1276 && let Some(len) = try_parse_latex_command(&text[pos..])
1277 {
1278 if pos > text_start {
1279 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1280 }
1281 log::trace!("Matched LaTeX command at pos {}", pos);
1282 parse_latex_command(builder, &text[pos..], len);
1283 pos += len;
1284 text_start = pos;
1285 continue;
1286 }
1287 }
1288
1289 if byte == b'{'
1291 && pos + 1 < text.len()
1292 && text.as_bytes()[pos + 1] == b'{'
1293 && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
1294 {
1295 if pos > text_start {
1296 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1297 }
1298 log::trace!("Matched shortcode at pos {}: {}", pos, &name);
1299 emit_shortcode(builder, &name, attrs);
1300 pos += len;
1301 text_start = pos;
1302 continue;
1303 }
1304
1305 if byte == b'`'
1307 && let Some(m) = try_parse_inline_executable(
1308 &text[pos..],
1309 config.extensions.rmarkdown_inline_code,
1310 config.extensions.quarto_inline_code,
1311 )
1312 {
1313 if pos > text_start {
1314 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1315 }
1316 log::trace!("Matched inline executable code at pos {}", pos);
1317 emit_inline_executable(builder, &m);
1318 pos += m.total_len;
1319 text_start = pos;
1320 continue;
1321 }
1322
1323 if byte == b'`' {
1325 if let Some((len, content, backtick_count, attributes)) =
1326 try_parse_code_span(&text[pos..])
1327 {
1328 if pos > text_start {
1330 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1331 }
1332
1333 log::trace!(
1334 "Matched code span at pos {}: {} backticks",
1335 pos,
1336 backtick_count
1337 );
1338
1339 if let Some(ref attrs) = attributes
1341 && config.extensions.raw_attribute
1342 && let Some(format) = is_raw_inline(attrs)
1343 {
1344 use super::raw_inline::emit_raw_inline;
1345 log::trace!("Matched raw inline span at pos {}: format={}", pos, format);
1346 emit_raw_inline(builder, content, backtick_count, format);
1347 } else if !config.extensions.inline_code_attributes && attributes.is_some() {
1348 let code_span_len = backtick_count * 2 + content.len();
1349 emit_code_span(builder, content, backtick_count, None);
1350 pos += code_span_len;
1351 text_start = pos;
1352 continue;
1353 } else {
1354 emit_code_span(builder, content, backtick_count, attributes);
1355 }
1356
1357 pos += len;
1358 text_start = pos;
1359 continue;
1360 }
1361
1362 if config.dialect == Dialect::CommonMark {
1371 let run_len = text[pos..].bytes().take_while(|&b| b == b'`').count();
1372 pos += run_len;
1373 continue;
1374 }
1375 }
1376
1377 if byte == b':'
1379 && config.extensions.emoji
1380 && is_emoji_boundary(text, pos)
1381 && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1382 {
1383 if pos > text_start {
1384 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1385 }
1386 log::trace!("Matched emoji at pos {}", pos);
1387 emit_emoji(builder, &text[pos..pos + len]);
1388 pos += len;
1389 text_start = pos;
1390 continue;
1391 }
1392
1393 if byte == b'^'
1395 && pos + 1 < text.len()
1396 && text.as_bytes()[pos + 1] == b'['
1397 && config.extensions.inline_footnotes
1398 && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1399 {
1400 if pos > text_start {
1401 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1402 }
1403 log::trace!("Matched inline footnote at pos {}", pos);
1404 emit_inline_footnote(builder, content, config);
1405 pos += len;
1406 text_start = pos;
1407 continue;
1408 }
1409
1410 if byte == b'^'
1412 && config.extensions.superscript
1413 && let Some((len, content)) = try_parse_superscript(&text[pos..])
1414 {
1415 if pos > text_start {
1416 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1417 }
1418 log::trace!("Matched superscript at pos {}", pos);
1419 emit_superscript(builder, content, config);
1420 pos += len;
1421 text_start = pos;
1422 continue;
1423 }
1424
1425 if byte == b'(' && config.extensions.bookdown_references {
1427 if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1428 if pos > text_start {
1429 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1430 }
1431 log::trace!("Matched bookdown definition at pos {}: {}", pos, label);
1432 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1433 pos += len;
1434 text_start = pos;
1435 continue;
1436 }
1437 if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1438 if pos > text_start {
1439 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1440 }
1441 log::trace!("Matched bookdown text reference at pos {}: {}", pos, label);
1442 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1443 pos += len;
1444 text_start = pos;
1445 continue;
1446 }
1447 }
1448
1449 if byte == b'~'
1451 && config.extensions.subscript
1452 && let Some((len, content)) = try_parse_subscript(&text[pos..])
1453 {
1454 if pos > text_start {
1455 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1456 }
1457 log::trace!("Matched subscript at pos {}", pos);
1458 emit_subscript(builder, content, config);
1459 pos += len;
1460 text_start = pos;
1461 continue;
1462 }
1463
1464 if byte == b'~'
1466 && config.extensions.strikeout
1467 && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1468 {
1469 if pos > text_start {
1470 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1471 }
1472 log::trace!("Matched strikeout at pos {}", pos);
1473 emit_strikeout(builder, content, config);
1474 pos += len;
1475 text_start = pos;
1476 continue;
1477 }
1478
1479 if byte == b'='
1481 && config.extensions.mark
1482 && let Some((len, content)) = try_parse_mark(&text[pos..])
1483 {
1484 if pos > text_start {
1485 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1486 }
1487 log::trace!("Matched mark at pos {}", pos);
1488 emit_mark(builder, content, config);
1489 pos += len;
1490 text_start = pos;
1491 continue;
1492 }
1493
1494 if byte == b'$'
1496 && config.extensions.tex_math_gfm
1497 && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1498 {
1499 if pos > text_start {
1500 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1501 }
1502 log::trace!("Matched GFM inline math at pos {}", pos);
1503 emit_gfm_inline_math(builder, content);
1504 pos += len;
1505 text_start = pos;
1506 continue;
1507 }
1508
1509 if byte == b'$' && config.extensions.tex_math_dollars {
1511 if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1513 if pos > text_start {
1515 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1516 }
1517
1518 let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1519 log::trace!(
1520 "Matched display math at pos {}: {} dollars",
1521 pos,
1522 dollar_count
1523 );
1524
1525 let after_math = &text[pos + len..];
1527 let attr_len = if config.extensions.quarto_crossrefs {
1528 use crate::parser::utils::attributes::try_parse_trailing_attributes;
1529 if let Some((_attr_block, _)) = try_parse_trailing_attributes(after_math) {
1530 let trimmed_after = after_math.trim_start();
1531 if let Some(open_brace_pos) = trimmed_after.find('{') {
1532 let ws_before_brace = after_math.len() - trimmed_after.len();
1533 let attr_text_len = trimmed_after[open_brace_pos..]
1534 .find('}')
1535 .map(|close| close + 1)
1536 .unwrap_or(0);
1537 ws_before_brace + open_brace_pos + attr_text_len
1538 } else {
1539 0
1540 }
1541 } else {
1542 0
1543 }
1544 } else {
1545 0
1546 };
1547
1548 let total_len = len + attr_len;
1549 emit_display_math(builder, content, dollar_count);
1550
1551 if attr_len > 0 {
1553 use crate::parser::utils::attributes::{
1554 emit_attributes, try_parse_trailing_attributes,
1555 };
1556 let attr_text = &text[pos + len..pos + total_len];
1557 if let Some((attr_block, _text_before)) =
1558 try_parse_trailing_attributes(attr_text)
1559 {
1560 let trimmed_after = attr_text.trim_start();
1561 let ws_len = attr_text.len() - trimmed_after.len();
1562 if ws_len > 0 {
1563 builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1564 }
1565 emit_attributes(builder, &attr_block);
1566 }
1567 }
1568
1569 pos += total_len;
1570 text_start = pos;
1571 continue;
1572 }
1573
1574 if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1576 if pos > text_start {
1578 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1579 }
1580
1581 log::trace!("Matched inline math at pos {}", pos);
1582 emit_inline_math(builder, content);
1583 pos += len;
1584 text_start = pos;
1585 continue;
1586 }
1587
1588 if pos > text_start {
1591 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1592 }
1593 builder.token(SyntaxKind::TEXT.into(), "$");
1594 pos = advance_char_boundary(text, pos, end);
1595 text_start = pos;
1596 continue;
1597 }
1598
1599 if byte == b'<'
1601 && config.extensions.autolinks
1602 && let Some((len, url)) = try_parse_autolink(
1603 &text[pos..],
1604 config.dialect == crate::options::Dialect::CommonMark,
1605 )
1606 {
1607 if pos > text_start {
1608 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1609 }
1610 log::trace!("Matched autolink at pos {}", pos);
1611 emit_autolink(builder, &text[pos..pos + len], url);
1612 pos += len;
1613 text_start = pos;
1614 continue;
1615 }
1616
1617 if !nested_in_link
1618 && config.extensions.autolink_bare_uris
1619 && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1620 {
1621 if pos > text_start {
1622 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1623 }
1624 log::trace!("Matched bare URI at pos {}", pos);
1625 emit_bare_uri_link(builder, url, config);
1626 pos += len;
1627 text_start = pos;
1628 continue;
1629 }
1630
1631 if byte == b'<'
1633 && config.extensions.native_spans
1634 && let Some((len, content, attributes)) = try_parse_native_span(&text[pos..])
1635 {
1636 if pos > text_start {
1637 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1638 }
1639 log::trace!("Matched native span at pos {}", pos);
1640 emit_native_span(builder, content, &attributes, config);
1641 pos += len;
1642 text_start = pos;
1643 continue;
1644 }
1645
1646 if byte == b'<'
1650 && config.extensions.raw_html
1651 && let Some(len) = try_parse_inline_html(&text[pos..])
1652 {
1653 if pos > text_start {
1654 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1655 }
1656 log::trace!("Matched inline raw HTML at pos {}", pos);
1657 emit_inline_html(builder, &text[pos..pos + len]);
1658 pos += len;
1659 text_start = pos;
1660 continue;
1661 }
1662
1663 if byte == b'!' && pos + 1 < text.len() && text.as_bytes()[pos + 1] == b'[' {
1665 if let Some((len, alt_text, dest, attributes)) = try_parse_inline_image(&text[pos..]) {
1667 if pos > text_start {
1668 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1669 }
1670 log::trace!("Matched inline image at pos {}", pos);
1671 emit_inline_image(
1672 builder,
1673 &text[pos..pos + len],
1674 alt_text,
1675 dest,
1676 attributes,
1677 config,
1678 );
1679 pos += len;
1680 text_start = pos;
1681 continue;
1682 }
1683
1684 if config.extensions.reference_links {
1686 let allow_shortcut = config.extensions.shortcut_reference_links;
1687 if let Some((len, alt_text, reference, is_implicit)) =
1688 try_parse_reference_image(&text[pos..], allow_shortcut)
1689 {
1690 if pos > text_start {
1691 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1692 }
1693 log::trace!("Matched reference image at pos {}", pos);
1694 emit_reference_image(builder, alt_text, &reference, is_implicit, config);
1695 pos += len;
1696 text_start = pos;
1697 continue;
1698 }
1699 }
1700 }
1701
1702 if byte == b'[' {
1704 if config.extensions.footnotes
1706 && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1707 {
1708 if pos > text_start {
1709 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1710 }
1711 log::trace!("Matched footnote reference at pos {}", pos);
1712 emit_footnote_reference(builder, &id);
1713 pos += len;
1714 text_start = pos;
1715 continue;
1716 }
1717
1718 if config.extensions.inline_links
1720 && let Some((len, link_text, dest, attributes)) = try_parse_inline_link(
1721 &text[pos..],
1722 config.dialect == crate::options::Dialect::CommonMark,
1723 )
1724 {
1725 if pos > text_start {
1726 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1727 }
1728 log::trace!("Matched inline link at pos {}", pos);
1729 emit_inline_link(
1730 builder,
1731 &text[pos..pos + len],
1732 link_text,
1733 dest,
1734 attributes,
1735 config,
1736 );
1737 pos += len;
1738 text_start = pos;
1739 continue;
1740 }
1741
1742 if config.extensions.reference_links {
1744 let allow_shortcut = config.extensions.shortcut_reference_links;
1745 if let Some((len, link_text, reference, is_implicit)) =
1746 try_parse_reference_link(&text[pos..], allow_shortcut)
1747 {
1748 if pos > text_start {
1749 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1750 }
1751 log::trace!("Matched reference link at pos {}", pos);
1752 emit_reference_link(builder, link_text, &reference, is_implicit, config);
1753 pos += len;
1754 text_start = pos;
1755 continue;
1756 }
1757 }
1758
1759 if config.extensions.citations
1761 && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1762 {
1763 if pos > text_start {
1764 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1765 }
1766 log::trace!("Matched bracketed citation at pos {}", pos);
1767 emit_bracketed_citation(builder, content);
1768 pos += len;
1769 text_start = pos;
1770 continue;
1771 }
1772 }
1773
1774 if byte == b'['
1777 && config.extensions.bracketed_spans
1778 && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1779 {
1780 if pos > text_start {
1781 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1782 }
1783 log::trace!("Matched bracketed span at pos {}", pos);
1784 emit_bracketed_span(builder, &text_content, &attrs, config);
1785 pos += len;
1786 text_start = pos;
1787 continue;
1788 }
1789
1790 if byte == b'@'
1792 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1793 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1794 {
1795 let is_crossref =
1796 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1797 if is_crossref || config.extensions.citations {
1798 if pos > text_start {
1799 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1800 }
1801 if is_crossref {
1802 log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1803 super::citations::emit_crossref(builder, key, has_suppress);
1804 } else {
1805 log::trace!("Matched bare citation at pos {}: {}", pos, &key);
1806 emit_bare_citation(builder, key, has_suppress);
1807 }
1808 pos += len;
1809 text_start = pos;
1810 continue;
1811 }
1812 }
1813
1814 if byte == b'-'
1816 && pos + 1 < text.len()
1817 && text.as_bytes()[pos + 1] == b'@'
1818 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1819 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1820 {
1821 let is_crossref =
1822 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1823 if is_crossref || config.extensions.citations {
1824 if pos > text_start {
1825 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1826 }
1827 if is_crossref {
1828 log::trace!("Matched Quarto crossref at pos {}: {}", pos, &key);
1829 super::citations::emit_crossref(builder, key, has_suppress);
1830 } else {
1831 log::trace!("Matched suppress-author citation at pos {}: {}", pos, &key);
1832 emit_bare_citation(builder, key, has_suppress);
1833 }
1834 pos += len;
1835 text_start = pos;
1836 continue;
1837 }
1838 }
1839
1840 if byte == b'*' || byte == b'_' {
1842 let bytes = text.as_bytes();
1844 let mut delim_count = 0;
1845 while pos + delim_count < bytes.len() && bytes[pos + delim_count] == byte {
1846 delim_count += 1;
1847 }
1848
1849 if pos > text_start {
1851 log::trace!(
1852 "Emitting TEXT before delimiter: {:?}",
1853 &text[text_start..pos]
1854 );
1855 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1856 text_start = pos; }
1858
1859 let emphasis_result = if nested_emphasis {
1862 try_parse_emphasis_nested(text, pos, end, config, builder)
1863 } else {
1864 try_parse_emphasis(text, pos, end, config, builder)
1865 };
1866
1867 if let Some((consumed, _)) = emphasis_result {
1868 log::trace!(
1870 "Parsed emphasis, consumed {} bytes from pos {}",
1871 consumed,
1872 pos
1873 );
1874 pos += consumed;
1875 text_start = pos;
1876 } else {
1877 log::trace!(
1880 "Failed to parse emphasis at pos {}, skipping {} delimiters as literal",
1881 pos,
1882 delim_count
1883 );
1884 pos += delim_count;
1885 }
1887 continue;
1888 }
1889
1890 if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1892 let text_before = &text[text_start..pos];
1893
1894 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1896 if trailing_spaces >= 2 {
1897 let text_content = &text_before[..text_before.len() - trailing_spaces];
1899 if !text_content.is_empty() {
1900 builder.token(SyntaxKind::TEXT.into(), text_content);
1901 }
1902 let spaces = " ".repeat(trailing_spaces);
1903 builder.token(
1904 SyntaxKind::HARD_LINE_BREAK.into(),
1905 &format!("{}\r\n", spaces),
1906 );
1907 pos += 2;
1908 text_start = pos;
1909 continue;
1910 }
1911
1912 if config.extensions.hard_line_breaks {
1914 if !text_before.is_empty() {
1915 builder.token(SyntaxKind::TEXT.into(), text_before);
1916 }
1917 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1918 pos += 2;
1919 text_start = pos;
1920 continue;
1921 }
1922
1923 if !text_before.is_empty() {
1925 builder.token(SyntaxKind::TEXT.into(), text_before);
1926 }
1927 builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1928 pos += 2;
1929 text_start = pos;
1930 continue;
1931 }
1932
1933 if byte == b'\n' {
1934 let text_before = &text[text_start..pos];
1935
1936 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1938 if trailing_spaces >= 2 {
1939 let text_content = &text_before[..text_before.len() - trailing_spaces];
1941 if !text_content.is_empty() {
1942 builder.token(SyntaxKind::TEXT.into(), text_content);
1943 }
1944 let spaces = " ".repeat(trailing_spaces);
1945 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1946 pos += 1;
1947 text_start = pos;
1948 continue;
1949 }
1950
1951 if config.extensions.hard_line_breaks {
1953 if !text_before.is_empty() {
1954 builder.token(SyntaxKind::TEXT.into(), text_before);
1955 }
1956 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1957 pos += 1;
1958 text_start = pos;
1959 continue;
1960 }
1961
1962 if !text_before.is_empty() {
1964 builder.token(SyntaxKind::TEXT.into(), text_before);
1965 }
1966 builder.token(SyntaxKind::NEWLINE.into(), "\n");
1967 pos += 1;
1968 text_start = pos;
1969 continue;
1970 }
1971
1972 pos = advance_char_boundary(text, pos, end);
1974 }
1975
1976 if pos > text_start && text_start < end {
1978 log::trace!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1979 builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1980 }
1981
1982 log::trace!("parse_inline_range complete: start={}, end={}", start, end);
1983}
1984
1985#[cfg(test)]
1986mod tests {
1987 use super::*;
1988 use crate::syntax::{SyntaxKind, SyntaxNode};
1989 use rowan::GreenNode;
1990
1991 #[test]
1992 fn test_recursive_simple_emphasis() {
1993 let text = "*test*";
1994 let config = ParserOptions::default();
1995 let mut builder = GreenNodeBuilder::new();
1996
1997 parse_inline_text_recursive(&mut builder, text, &config);
1998
1999 let green: GreenNode = builder.finish();
2000 let node = SyntaxNode::new_root(green);
2001
2002 assert_eq!(node.text().to_string(), text);
2004
2005 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2007 assert!(has_emph, "Should have EMPHASIS node");
2008 }
2009
2010 #[test]
2011 fn test_recursive_nested() {
2012 let text = "*foo **bar** baz*";
2013 let config = ParserOptions::default();
2014 let mut builder = GreenNodeBuilder::new();
2015
2016 builder.start_node(SyntaxKind::PARAGRAPH.into());
2018 parse_inline_text_recursive(&mut builder, text, &config);
2019 builder.finish_node();
2020
2021 let green: GreenNode = builder.finish();
2022 let node = SyntaxNode::new_root(green);
2023
2024 assert_eq!(node.text().to_string(), text);
2026
2027 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2029 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2030
2031 assert!(has_emph, "Should have EMPHASIS node");
2032 assert!(has_strong, "Should have STRONG node");
2033 }
2034
2035 #[test]
2037 fn test_parse_simple_emphasis() {
2038 use crate::options::ParserOptions;
2039 use crate::syntax::SyntaxNode;
2040 use rowan::GreenNode;
2041
2042 let text = "*test*";
2043 let config = ParserOptions::default();
2044 let mut builder = GreenNodeBuilder::new();
2045
2046 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2048
2049 assert_eq!(result, Some((6, 1))); let green: GreenNode = builder.finish();
2054 let node = SyntaxNode::new_root(green);
2055
2056 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2058
2059 assert_eq!(node.text().to_string(), text);
2061 }
2062
2063 #[test]
2065 fn test_parse_nested_emphasis_strong() {
2066 use crate::options::ParserOptions;
2067
2068 let text = "*foo **bar** baz*";
2069 let config = ParserOptions::default();
2070 let mut builder = GreenNodeBuilder::new();
2071
2072 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2074
2075 let green = builder.finish();
2076 let node = crate::syntax::SyntaxNode::new_root(green);
2077
2078 assert_eq!(node.text().to_string(), text);
2080
2081 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2083 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2084
2085 assert!(has_emph, "Should have EMPHASIS node");
2086 assert!(has_strong, "Should have STRONG node");
2087 }
2088
2089 #[test]
2093 fn test_triple_emphasis_star_then_double_star() {
2094 use crate::options::ParserOptions;
2095 use crate::syntax::SyntaxNode;
2096 use rowan::GreenNode;
2097
2098 let text = "***foo* bar**";
2099 let config = ParserOptions::default();
2100 let mut builder = GreenNodeBuilder::new();
2101
2102 builder.start_node(SyntaxKind::DOCUMENT.into());
2103 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2104 builder.finish_node();
2105
2106 let green: GreenNode = builder.finish();
2107 let node = SyntaxNode::new_root(green);
2108
2109 assert_eq!(node.text().to_string(), text);
2111
2112 let structure = format!("{:#?}", node);
2115
2116 assert!(structure.contains("STRONG"), "Should have STRONG node");
2118 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2119
2120 let mut found_strong = false;
2123 let mut found_emph_after_strong = false;
2124 for descendant in node.descendants() {
2125 if descendant.kind() == SyntaxKind::STRONG {
2126 found_strong = true;
2127 }
2128 if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
2129 found_emph_after_strong = true;
2130 break;
2131 }
2132 }
2133
2134 assert!(
2135 found_emph_after_strong,
2136 "EMPH should be inside STRONG, not before it. Current structure:\n{}",
2137 structure
2138 );
2139 }
2140
2141 #[test]
2144 fn test_triple_emphasis_double_star_then_star() {
2145 use crate::options::ParserOptions;
2146 use crate::syntax::SyntaxNode;
2147 use rowan::GreenNode;
2148
2149 let text = "***foo** bar*";
2150 let config = ParserOptions::default();
2151 let mut builder = GreenNodeBuilder::new();
2152
2153 builder.start_node(SyntaxKind::DOCUMENT.into());
2154 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2155 builder.finish_node();
2156
2157 let green: GreenNode = builder.finish();
2158 let node = SyntaxNode::new_root(green);
2159
2160 assert_eq!(node.text().to_string(), text);
2162
2163 let structure = format!("{:#?}", node);
2165
2166 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2168 assert!(structure.contains("STRONG"), "Should have STRONG node");
2169
2170 let mut found_emph = false;
2172 let mut found_strong_after_emph = false;
2173 for descendant in node.descendants() {
2174 if descendant.kind() == SyntaxKind::EMPHASIS {
2175 found_emph = true;
2176 }
2177 if found_emph && descendant.kind() == SyntaxKind::STRONG {
2178 found_strong_after_emph = true;
2179 break;
2180 }
2181 }
2182
2183 assert!(
2184 found_strong_after_emph,
2185 "STRONG should be inside EMPH. Current structure:\n{}",
2186 structure
2187 );
2188 }
2189
2190 #[test]
2193 fn test_display_math_with_attributes() {
2194 use crate::options::ParserOptions;
2195 use crate::syntax::SyntaxNode;
2196 use rowan::GreenNode;
2197
2198 let text = "$$ E = mc^2 $$ {#eq-einstein}";
2199 let mut config = ParserOptions::default();
2200 config.extensions.quarto_crossrefs = true; let mut builder = GreenNodeBuilder::new();
2203 builder.start_node(SyntaxKind::DOCUMENT.into()); parse_inline_text_recursive(&mut builder, text, &config);
2207
2208 builder.finish_node(); let green: GreenNode = builder.finish();
2210 let node = SyntaxNode::new_root(green);
2211
2212 assert_eq!(node.text().to_string(), text);
2214
2215 let has_display_math = node
2217 .descendants()
2218 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
2219 assert!(has_display_math, "Should have DISPLAY_MATH node");
2220
2221 let has_attributes = node
2223 .descendants()
2224 .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
2225 assert!(
2226 has_attributes,
2227 "Should have ATTRIBUTE node for {{#eq-einstein}}"
2228 );
2229
2230 let math_followed_by_text = node.descendants().any(|n| {
2232 n.kind() == SyntaxKind::DISPLAY_MATH
2233 && n.next_sibling()
2234 .map(|s| {
2235 s.kind() == SyntaxKind::TEXT
2236 && s.text().to_string().contains("{#eq-einstein}")
2237 })
2238 .unwrap_or(false)
2239 });
2240 assert!(
2241 !math_followed_by_text,
2242 "Attributes should not be parsed as TEXT"
2243 );
2244 }
2245
2246 #[test]
2247 fn test_parse_inline_text_gfm_inline_link_destination_not_autolinked() {
2248 use crate::options::{Dialect, Extensions, Flavor};
2249
2250 let config = ParserOptions {
2251 flavor: Flavor::Gfm,
2252 dialect: Dialect::for_flavor(Flavor::Gfm),
2253 extensions: Extensions::for_flavor(Flavor::Gfm),
2254 ..ParserOptions::default()
2255 };
2256
2257 let mut builder = GreenNodeBuilder::new();
2258 builder.start_node(SyntaxKind::PARAGRAPH.into());
2259 parse_inline_text_recursive(
2260 &mut builder,
2261 "Second Link [link_text](https://link.com)",
2262 &config,
2263 );
2264 builder.finish_node();
2265 let green = builder.finish();
2266 let root = SyntaxNode::new_root(green);
2267
2268 let links: Vec<_> = root
2269 .descendants()
2270 .filter(|n| n.kind() == SyntaxKind::LINK)
2271 .collect();
2272 assert_eq!(
2273 links.len(),
2274 1,
2275 "Expected exactly one LINK node for inline link, not nested bare URI autolink"
2276 );
2277
2278 let link = links[0].clone();
2279 let mut link_text = None::<String>;
2280 let mut link_dest = None::<String>;
2281
2282 for child in link.children() {
2283 match child.kind() {
2284 SyntaxKind::LINK_TEXT => link_text = Some(child.text().to_string()),
2285 SyntaxKind::LINK_DEST => link_dest = Some(child.text().to_string()),
2286 _ => {}
2287 }
2288 }
2289
2290 assert_eq!(link_text.as_deref(), Some("link_text"));
2291 assert_eq!(link_dest.as_deref(), Some("https://link.com"));
2292 }
2293
2294 #[test]
2295 fn test_autolink_bare_uri_utf8_boundary_safe() {
2296 let text = "§";
2297 let mut config = ParserOptions::default();
2298 config.extensions.autolink_bare_uris = true;
2299 let mut builder = GreenNodeBuilder::new();
2300
2301 builder.start_node(SyntaxKind::DOCUMENT.into());
2302 parse_inline_text_recursive(&mut builder, text, &config);
2303 builder.finish_node();
2304
2305 let green: GreenNode = builder.finish();
2306 let node = SyntaxNode::new_root(green);
2307 assert_eq!(node.text().to_string(), text);
2308 }
2309
2310 #[test]
2311 fn test_parse_emphasis_unicode_content_no_panic() {
2312 let text = "*§*";
2313 let config = ParserOptions::default();
2314 let mut builder = GreenNodeBuilder::new();
2315
2316 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2317 assert_eq!(result, Some((text.len(), 1)));
2318
2319 let green: GreenNode = builder.finish();
2320 let node = SyntaxNode::new_root(green);
2321 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2322 assert_eq!(node.text().to_string(), text);
2323 }
2324}
2325
2326#[test]
2327fn test_two_with_nested_one_and_triple_closer() {
2328 use crate::options::ParserOptions;
2333 use crate::syntax::SyntaxNode;
2334 use rowan::GreenNode;
2335
2336 let text = "**bold with *italic***";
2337 let config = ParserOptions::default();
2338 let mut builder = GreenNodeBuilder::new();
2339
2340 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2342
2343 let green: GreenNode = builder.finish();
2344 let node = SyntaxNode::new_root(green);
2345
2346 assert_eq!(node.text().to_string(), text, "Should be lossless");
2348
2349 assert_eq!(
2351 node.kind(),
2352 SyntaxKind::STRONG,
2353 "Root should be STRONG, got: {:?}",
2354 node.kind()
2355 );
2356
2357 let has_emphasis = node.children().any(|c| c.kind() == SyntaxKind::EMPHASIS);
2359 assert!(has_emphasis, "STRONG should contain EMPHASIS node");
2360}
2361
2362#[test]
2363fn test_emphasis_with_trailing_space_before_closer() {
2364 use crate::options::ParserOptions;
2368 use crate::syntax::SyntaxNode;
2369 use rowan::GreenNode;
2370
2371 let text = "*foo *";
2372 let config = ParserOptions::default();
2373 let mut builder = GreenNodeBuilder::new();
2374
2375 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2377
2378 assert_eq!(
2380 result,
2381 Some((6, 1)),
2382 "Should parse as emphasis, result: {:?}",
2383 result
2384 );
2385
2386 let green: GreenNode = builder.finish();
2388 let node = SyntaxNode::new_root(green);
2389
2390 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2392
2393 assert_eq!(node.text().to_string(), text);
2395}
2396
2397#[test]
2398fn test_triple_emphasis_all_strong_nested() {
2399 use crate::options::ParserOptions;
2403 use crate::syntax::SyntaxNode;
2404 use rowan::GreenNode;
2405
2406 let text = "***foo** bar **baz***";
2407 let config = ParserOptions::default();
2408 let mut builder = GreenNodeBuilder::new();
2409
2410 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2411
2412 let green: GreenNode = builder.finish();
2413 let node = SyntaxNode::new_root(green);
2414
2415 let emphasis_nodes: Vec<_> = node
2417 .descendants()
2418 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2419 .collect();
2420 assert_eq!(
2421 emphasis_nodes.len(),
2422 1,
2423 "Should have exactly one EMPHASIS node, found: {}",
2424 emphasis_nodes.len()
2425 );
2426
2427 let emphasis_node = emphasis_nodes[0].clone();
2429 let strong_in_emphasis: Vec<_> = emphasis_node
2430 .children()
2431 .filter(|n| n.kind() == SyntaxKind::STRONG)
2432 .collect();
2433 assert_eq!(
2434 strong_in_emphasis.len(),
2435 2,
2436 "EMPHASIS should contain two STRONG nodes, found: {}",
2437 strong_in_emphasis.len()
2438 );
2439
2440 assert_eq!(node.text().to_string(), text);
2442}
2443
2444#[test]
2445fn test_triple_emphasis_all_emph_nested() {
2446 use crate::options::ParserOptions;
2450 use crate::syntax::SyntaxNode;
2451 use rowan::GreenNode;
2452
2453 let text = "***foo* bar *baz***";
2454 let config = ParserOptions::default();
2455 let mut builder = GreenNodeBuilder::new();
2456
2457 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2458
2459 let green: GreenNode = builder.finish();
2460 let node = SyntaxNode::new_root(green);
2461
2462 let strong_nodes: Vec<_> = node
2464 .descendants()
2465 .filter(|n| n.kind() == SyntaxKind::STRONG)
2466 .collect();
2467 assert_eq!(
2468 strong_nodes.len(),
2469 1,
2470 "Should have exactly one STRONG node, found: {}",
2471 strong_nodes.len()
2472 );
2473
2474 let strong_node = strong_nodes[0].clone();
2476 let emph_in_strong: Vec<_> = strong_node
2477 .children()
2478 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2479 .collect();
2480 assert_eq!(
2481 emph_in_strong.len(),
2482 2,
2483 "STRONG should contain two EMPHASIS nodes, found: {}",
2484 emph_in_strong.len()
2485 );
2486
2487 assert_eq!(node.text().to_string(), text);
2489}
2490
2491#[test]
2493fn test_parse_emphasis_multiline() {
2494 use crate::options::ParserOptions;
2496 use crate::syntax::SyntaxNode;
2497 use rowan::GreenNode;
2498
2499 let text = "*text on\nline two*";
2500 let config = ParserOptions::default();
2501 let mut builder = GreenNodeBuilder::new();
2502
2503 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2504
2505 assert_eq!(
2507 result,
2508 Some((text.len(), 1)),
2509 "Emphasis should parse multiline content"
2510 );
2511
2512 let green: GreenNode = builder.finish();
2514 let node = SyntaxNode::new_root(green);
2515
2516 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2518
2519 assert_eq!(node.text().to_string(), text);
2521 assert!(
2522 node.text().to_string().contains('\n'),
2523 "Should preserve newline in emphasis content"
2524 );
2525}
2526
2527#[test]
2528fn test_parse_strong_multiline() {
2529 use crate::options::ParserOptions;
2531 use crate::syntax::SyntaxNode;
2532 use rowan::GreenNode;
2533
2534 let text = "**strong on\nline two**";
2535 let config = ParserOptions::default();
2536 let mut builder = GreenNodeBuilder::new();
2537
2538 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2539
2540 assert_eq!(
2542 result,
2543 Some((text.len(), 2)),
2544 "Strong emphasis should parse multiline content"
2545 );
2546
2547 let green: GreenNode = builder.finish();
2549 let node = SyntaxNode::new_root(green);
2550
2551 assert_eq!(node.kind(), SyntaxKind::STRONG);
2553
2554 assert_eq!(node.text().to_string(), text);
2556 assert!(
2557 node.text().to_string().contains('\n'),
2558 "Should preserve newline in strong content"
2559 );
2560}
2561
2562#[test]
2563fn test_parse_triple_emphasis_multiline() {
2564 use crate::options::ParserOptions;
2566 use crate::syntax::SyntaxNode;
2567 use rowan::GreenNode;
2568
2569 let text = "***both on\nline two***";
2570 let config = ParserOptions::default();
2571 let mut builder = GreenNodeBuilder::new();
2572
2573 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2574
2575 assert_eq!(
2577 result,
2578 Some((text.len(), 3)),
2579 "Triple emphasis should parse multiline content"
2580 );
2581
2582 let green: GreenNode = builder.finish();
2584 let node = SyntaxNode::new_root(green);
2585
2586 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2588 assert!(has_strong, "Should have STRONG node");
2589
2590 assert_eq!(node.text().to_string(), text);
2592 assert!(
2593 node.text().to_string().contains('\n'),
2594 "Should preserve newline in triple emphasis content"
2595 );
2596}