1use crate::options::ParserOptions;
24use crate::syntax::SyntaxKind;
25use rowan::GreenNodeBuilder;
26
27use super::bookdown::{
29 try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
30};
31use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
32use super::citations::{
33 emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
34 try_parse_bracketed_citation,
35};
36use super::code_spans::{emit_code_span, try_parse_code_span};
37use super::emoji::{emit_emoji, try_parse_emoji};
38use super::escapes::{EscapeType, emit_escape, try_parse_escape};
39use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
40use super::inline_footnotes::{
41 emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
42 try_parse_inline_footnote,
43};
44use super::latex::{parse_latex_command, try_parse_latex_command};
45use super::links::{
46 emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link, emit_reference_image,
47 emit_reference_link, try_parse_autolink, try_parse_bare_uri, try_parse_inline_image,
48 try_parse_inline_link, try_parse_reference_image, try_parse_reference_link,
49};
50use super::math::{
51 emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
52 emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
53 emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
54 try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
55 try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
56 try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
57};
58use super::native_spans::{emit_native_span, try_parse_native_span};
59use super::raw_inline::is_raw_inline;
60use super::shortcodes::{emit_shortcode, try_parse_shortcode};
61use super::strikeout::{emit_strikeout, try_parse_strikeout};
62use super::subscript::{emit_subscript, try_parse_subscript};
63use super::superscript::{emit_superscript, try_parse_superscript};
64
65pub fn parse_inline_text_recursive(
82 builder: &mut GreenNodeBuilder,
83 text: &str,
84 config: &ParserOptions,
85) {
86 log::debug!(
87 "Recursive inline parsing: {:?} ({} bytes)",
88 &text[..text.len().min(40)],
89 text.len()
90 );
91
92 parse_inline_range(text, 0, text.len(), config, builder);
93
94 log::debug!("Recursive inline parsing complete");
95}
96
97pub fn parse_inline_text(
103 builder: &mut GreenNodeBuilder,
104 text: &str,
105 config: &ParserOptions,
106 _allow_reference_links: bool,
107) {
108 log::trace!(
109 "Parsing inline text (recursive): {:?} ({} bytes)",
110 &text[..text.len().min(40)],
111 text.len()
112 );
113
114 parse_inline_text_recursive(builder, text, config);
116}
117
118pub fn try_parse_emphasis(
143 text: &str,
144 pos: usize,
145 end: usize,
146 config: &ParserOptions,
147 builder: &mut GreenNodeBuilder,
148) -> Option<(usize, usize)> {
149 let bytes = text.as_bytes();
150
151 if pos >= bytes.len() {
152 return None;
153 }
154
155 let delim_char = bytes[pos] as char;
156 if delim_char != '*' && delim_char != '_' {
157 return None;
158 }
159
160 let mut count = 0;
162 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
163 count += 1;
164 }
165
166 let after_pos = pos + count;
167
168 log::debug!(
169 "try_parse_emphasis: '{}' x {} at pos {}",
170 delim_char,
171 count,
172 pos
173 );
174
175 if after_pos < text.len()
177 && let Some(next_char) = text[after_pos..].chars().next()
178 && next_char.is_whitespace()
179 {
180 log::trace!("Delimiter followed by whitespace, treating as literal");
181 return None;
182 }
183
184 if delim_char == '_'
187 && pos > 0
188 && let Some(prev_char) = text[..pos].chars().last()
189 && prev_char.is_alphanumeric()
190 {
191 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
192 return None;
193 }
194
195 let result = match count {
197 1 => try_parse_one(text, pos, delim_char, end, config, builder),
198 2 => try_parse_two(text, pos, delim_char, end, config, builder),
199 3 => try_parse_three(text, pos, delim_char, end, config, builder),
200 _ => {
201 log::trace!("{} delimiters (4+), treating as literal", count);
203 None
204 }
205 };
206
207 result.map(|consumed| (consumed, count))
210}
211
212fn try_parse_emphasis_nested(
221 text: &str,
222 pos: usize,
223 end: usize,
224 config: &ParserOptions,
225 builder: &mut GreenNodeBuilder,
226) -> Option<(usize, usize)> {
227 let bytes = text.as_bytes();
228
229 if pos >= bytes.len() {
230 return None;
231 }
232
233 let delim_char = bytes[pos] as char;
234 if delim_char != '*' && delim_char != '_' {
235 return None;
236 }
237
238 let mut count = 0;
240 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
241 count += 1;
242 }
243
244 log::debug!(
245 "try_parse_emphasis_nested: '{}' x {} at pos {}",
246 delim_char,
247 count,
248 pos
249 );
250
251 if delim_char == '_'
254 && pos > 0
255 && let Some(prev_char) = text[..pos].chars().last()
256 && prev_char.is_alphanumeric()
257 {
258 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
259 return None;
260 }
261
262 let result = match count {
268 1 => try_parse_one(text, pos, delim_char, end, config, builder),
269 2 => try_parse_two(text, pos, delim_char, end, config, builder),
270 3 => try_parse_three(text, pos, delim_char, end, config, builder),
271 _ => {
272 log::trace!("{} delimiters (4+), treating as literal", count);
274 None
275 }
276 };
277
278 result.map(|consumed| (consumed, count))
279}
280
281fn try_parse_three(
286 text: &str,
287 pos: usize,
288 delim_char: char,
289 end: usize,
290 config: &ParserOptions,
291 builder: &mut GreenNodeBuilder,
292) -> Option<usize> {
293 let content_start = pos + 3;
294 let one = delim_char.to_string();
295 let two = one.repeat(2);
296
297 log::debug!("try_parse_three: '{}' x 3 at pos {}", delim_char, pos);
298
299 let mut search_pos = content_start;
303
304 loop {
305 let closer_start = match find_first_potential_ender(text, search_pos, delim_char, end) {
307 Some(p) => p,
308 None => {
309 log::trace!("No potential ender found for ***");
310 return None;
311 }
312 };
313
314 log::debug!("Potential ender at pos {}", closer_start);
315
316 let bytes = text.as_bytes();
318 let mut closer_count = 0;
319 let mut check_pos = closer_start;
320 while check_pos < bytes.len() && bytes[check_pos] == delim_char as u8 {
321 closer_count += 1;
322 check_pos += 1;
323 }
324
325 log::debug!(
326 "Found {} x {} at pos {}",
327 delim_char,
328 closer_count,
329 closer_start
330 );
331
332 if closer_count >= 3 && is_valid_ender(text, closer_start, delim_char, 3) {
336 log::debug!("Matched *** closer, emitting Strong[Emph[content]]");
337
338 builder.start_node(SyntaxKind::STRONG.into());
339 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
340
341 builder.start_node(SyntaxKind::EMPHASIS.into());
342 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
343 parse_inline_range_nested(text, content_start, closer_start, config, builder);
344 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
345 builder.finish_node(); builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
348 builder.finish_node(); return Some(closer_start + 3 - pos);
351 }
352
353 if closer_count >= 2 && is_valid_ender(text, closer_start, delim_char, 2) {
355 log::debug!("Matched ** closer, wrapping as Strong and continuing with one");
356
357 let continue_pos = closer_start + 2;
358
359 if let Some(final_closer_pos) =
360 parse_until_closer_with_nested_two(text, continue_pos, delim_char, 1, end, config)
361 {
362 log::debug!(
363 "Found * closer at pos {}, emitting Emph[Strong[...], ...]",
364 final_closer_pos
365 );
366
367 builder.start_node(SyntaxKind::EMPHASIS.into());
368 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
369
370 builder.start_node(SyntaxKind::STRONG.into());
371 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
372 parse_inline_range_nested(text, content_start, closer_start, config, builder);
373 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
374 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
378
379 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
380 builder.finish_node(); return Some(final_closer_pos + 1 - pos);
383 }
384
385 log::debug!("No * closer found after **, emitting * + STRONG");
387 builder.token(SyntaxKind::TEXT.into(), &one);
388
389 builder.start_node(SyntaxKind::STRONG.into());
390 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
391 parse_inline_range_nested(text, content_start, closer_start, config, builder);
392 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
393 builder.finish_node(); return Some(closer_start + 2 - pos);
396 }
397
398 if closer_count >= 1 && is_valid_ender(text, closer_start, delim_char, 1) {
400 log::debug!("Matched * closer, wrapping as Emph and continuing with two");
401
402 let continue_pos = closer_start + 1;
403
404 if let Some(final_closer_pos) =
405 parse_until_closer_with_nested_one(text, continue_pos, delim_char, 2, end, config)
406 {
407 log::debug!(
408 "Found ** closer at pos {}, emitting Strong[Emph[...], ...]",
409 final_closer_pos
410 );
411
412 builder.start_node(SyntaxKind::STRONG.into());
413 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
414
415 builder.start_node(SyntaxKind::EMPHASIS.into());
416 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
417 parse_inline_range_nested(text, content_start, closer_start, config, builder);
418 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
419 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
422
423 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
424 builder.finish_node(); return Some(final_closer_pos + 2 - pos);
427 }
428
429 log::debug!("No ** closer found after *, emitting ** + EMPH");
431 builder.token(SyntaxKind::TEXT.into(), &two);
432
433 builder.start_node(SyntaxKind::EMPHASIS.into());
434 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
435 parse_inline_range_nested(text, content_start, closer_start, config, builder);
436 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
437 builder.finish_node(); return Some(closer_start + 1 - pos);
440 }
441
442 log::debug!(
444 "No valid ender at pos {}, continuing search from {}",
445 closer_start,
446 closer_start + closer_count
447 );
448 search_pos = closer_start + closer_count;
449 }
450}
451
452fn find_first_potential_ender(
456 text: &str,
457 start: usize,
458 delim_char: char,
459 end: usize,
460) -> Option<usize> {
461 let bytes = text.as_bytes();
462 let mut pos = start;
463
464 while pos < end.min(text.len()) {
465 if bytes[pos] == delim_char as u8 {
467 let is_escaped = {
469 let mut backslash_count = 0;
470 let mut check_pos = pos;
471 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
472 backslash_count += 1;
473 check_pos -= 1;
474 }
475 backslash_count % 2 == 1
476 };
477
478 if !is_escaped {
479 return Some(pos);
481 }
482 }
483
484 pos += 1;
485 }
486
487 None
488}
489
490fn is_valid_ender(text: &str, pos: usize, delim_char: char, delim_count: usize) -> bool {
493 let bytes = text.as_bytes();
494
495 if pos + delim_count > text.len() {
497 return false;
498 }
499
500 for i in 0..delim_count {
501 if bytes[pos + i] != delim_char as u8 {
502 return false;
503 }
504 }
505
506 if pos > 0 && bytes[pos - 1] == delim_char as u8 {
508 return false;
509 }
510
511 let after_pos = pos + delim_count;
513 if after_pos < bytes.len() && bytes[after_pos] == delim_char as u8 {
514 return false;
515 }
516
517 if delim_char == '_' {
520 if pos > 0
521 && let Some(prev_char) = text[..pos].chars().last()
522 && prev_char.is_whitespace()
523 {
524 return false;
525 }
526
527 if after_pos < text.len()
529 && let Some(next_char) = text[after_pos..].chars().next()
530 && next_char.is_alphanumeric()
531 {
532 return false;
533 }
534 }
535
536 true
537}
538
539fn try_parse_two(
544 text: &str,
545 pos: usize,
546 delim_char: char,
547 end: usize,
548 config: &ParserOptions,
549 builder: &mut GreenNodeBuilder,
550) -> Option<usize> {
551 let content_start = pos + 2;
552
553 log::debug!("try_parse_two: '{}' x 2 at pos {}", delim_char, pos);
554
555 if let Some(closer_pos) =
557 parse_until_closer_with_nested_one(text, content_start, delim_char, 2, end, config)
558 {
559 log::debug!("Found ** closer at pos {}", closer_pos);
560
561 builder.start_node(SyntaxKind::STRONG.into());
563 builder.token(SyntaxKind::STRONG_MARKER.into(), &text[pos..pos + 2]);
564 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
565 builder.token(
566 SyntaxKind::STRONG_MARKER.into(),
567 &text[closer_pos..closer_pos + 2],
568 );
569 builder.finish_node(); return Some(closer_pos + 2 - pos);
572 }
573
574 log::trace!("No closer found for **");
576 None
577}
578
579fn try_parse_one(
590 text: &str,
591 pos: usize,
592 delim_char: char,
593 end: usize,
594 config: &ParserOptions,
595 builder: &mut GreenNodeBuilder,
596) -> Option<usize> {
597 let content_start = pos + 1;
598
599 log::debug!("try_parse_one: '{}' x 1 at pos {}", delim_char, pos);
600
601 if let Some(closer_pos) =
603 parse_until_closer_with_nested_two(text, content_start, delim_char, 1, end, config)
604 {
605 log::debug!("Found * closer at pos {}", closer_pos);
606
607 builder.start_node(SyntaxKind::EMPHASIS.into());
609 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &text[pos..pos + 1]);
610 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
611 builder.token(
612 SyntaxKind::EMPHASIS_MARKER.into(),
613 &text[closer_pos..closer_pos + 1],
614 );
615 builder.finish_node(); return Some(closer_pos + 1 - pos);
618 }
619
620 log::trace!("No closer found for *");
622 None
623}
624
625fn parse_until_closer_with_nested_two(
644 text: &str,
645 start: usize,
646 delim_char: char,
647 delim_count: usize,
648 end: usize,
649 config: &ParserOptions,
650) -> Option<usize> {
651 let bytes = text.as_bytes();
652 let mut pos = start;
653
654 while pos < end.min(text.len()) {
655 if bytes[pos] == b'`'
656 && let Some(m) = try_parse_inline_executable(
657 &text[pos..],
658 config.extensions.rmarkdown_inline_code,
659 config.extensions.quarto_inline_code,
660 )
661 {
662 log::trace!(
663 "Skipping inline executable span of {} bytes at pos {}",
664 m.total_len,
665 pos
666 );
667 pos += m.total_len;
668 continue;
669 }
670
671 if bytes[pos] == b'`'
673 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
674 {
675 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
676 pos += len;
677 continue;
678 }
679
680 if bytes[pos] == b'$'
682 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
683 {
684 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
685 pos += len;
686 continue;
687 }
688
689 if bytes[pos] == b'['
691 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
692 {
693 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
694 pos += len;
695 continue;
696 }
697
698 if delim_count == 1
702 && pos + 2 <= text.len()
703 && bytes[pos] == delim_char as u8
704 && bytes[pos + 1] == delim_char as u8
705 {
706 let first_is_escaped = {
708 let mut backslash_count = 0;
709 let mut check_pos = pos;
710 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
711 backslash_count += 1;
712 check_pos -= 1;
713 }
714 backslash_count % 2 == 1
715 };
716
717 if first_is_escaped {
718 log::trace!(
721 "First * at pos {} is escaped, skipping to check second *",
722 pos
723 );
724 pos += 1;
725 continue;
726 }
727
728 let no_third_delim = pos + 2 >= bytes.len() || bytes[pos + 2] != delim_char as u8;
731
732 if no_third_delim {
733 log::trace!(
734 "try_parse_one: found ** at pos {}, attempting nested two",
735 pos
736 );
737
738 let mut temp_builder = GreenNodeBuilder::new();
741 if let Some(two_consumed) =
742 try_parse_two(text, pos, delim_char, end, config, &mut temp_builder)
743 {
744 log::debug!(
747 "Nested two succeeded, consumed {} bytes, continuing search",
748 two_consumed
749 );
750 pos += two_consumed;
751 continue;
752 }
753 log::trace!("Nested two failed at pos {}, entire one() should fail", pos);
759 return None;
760 }
761 }
762
763 if pos + delim_count <= text.len() {
765 let mut matches = true;
766 for i in 0..delim_count {
767 if bytes[pos + i] != delim_char as u8 {
768 matches = false;
769 break;
770 }
771 }
772
773 if matches {
774 let is_escaped = {
780 let mut backslash_count = 0;
781 let mut check_pos = pos;
782 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
783 backslash_count += 1;
784 check_pos -= 1;
785 }
786 backslash_count % 2 == 1 };
788
789 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
793 let after_pos = pos + delim_count;
794 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
795
796 if (at_run_start || at_run_end) && !is_escaped {
797 if delim_char == '_'
801 && pos > start
802 && let Some(prev_char) = text[..pos].chars().last()
803 && prev_char.is_whitespace()
804 {
805 log::trace!(
806 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
807 pos
808 );
809 pos += 1;
811 continue;
812 }
813
814 log::trace!(
815 "Found exact {} x {} closer at pos {}",
816 delim_char,
817 delim_count,
818 pos
819 );
820 return Some(pos);
821 }
822 }
823 }
824
825 pos += 1;
828 }
829
830 None
831}
832
833fn parse_until_closer_with_nested_one(
853 text: &str,
854 start: usize,
855 delim_char: char,
856 delim_count: usize,
857 end: usize,
858 config: &ParserOptions,
859) -> Option<usize> {
860 let bytes = text.as_bytes();
861 let mut pos = start;
862
863 while pos < end.min(text.len()) {
864 if bytes[pos] == b'`'
865 && let Some(m) = try_parse_inline_executable(
866 &text[pos..],
867 config.extensions.rmarkdown_inline_code,
868 config.extensions.quarto_inline_code,
869 )
870 {
871 log::trace!(
872 "Skipping inline executable span of {} bytes at pos {}",
873 m.total_len,
874 pos
875 );
876 pos += m.total_len;
877 continue;
878 }
879
880 if bytes[pos] == b'`'
882 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
883 {
884 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
885 pos += len;
886 continue;
887 }
888
889 if bytes[pos] == b'$'
891 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
892 {
893 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
894 pos += len;
895 continue;
896 }
897
898 if bytes[pos] == b'['
900 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
901 {
902 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
903 pos += len;
904 continue;
905 }
906
907 if delim_count == 2 && pos < text.len() && bytes[pos] == delim_char as u8 {
914 let no_second_delim = pos + 1 >= bytes.len() || bytes[pos + 1] != delim_char as u8;
917
918 if no_second_delim {
919 let is_escaped = {
921 let mut backslash_count = 0;
922 let mut check_pos = pos;
923 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
924 backslash_count += 1;
925 check_pos -= 1;
926 }
927 backslash_count % 2 == 1
928 };
929
930 if is_escaped {
931 log::trace!("* at pos {} is escaped, skipping", pos);
933 pos += 1;
934 continue;
935 }
936
937 let after_delim = pos + 1;
940 let followed_by_whitespace = after_delim < text.len()
941 && text[after_delim..]
942 .chars()
943 .next()
944 .is_some_and(|c| c.is_whitespace());
945
946 if followed_by_whitespace {
947 log::trace!(
949 "* at pos {} followed by whitespace, not an opener, skipping",
950 pos
951 );
952 pos += 1;
953 continue;
954 }
955
956 log::trace!(
957 "try_parse_two: found * at pos {}, attempting nested one",
958 pos
959 );
960
961 let mut temp_builder = GreenNodeBuilder::new();
964 if let Some(one_consumed) =
965 try_parse_one(text, pos, delim_char, end, config, &mut temp_builder)
966 {
967 log::debug!(
970 "Nested one succeeded, consumed {} bytes, continuing search",
971 one_consumed
972 );
973 pos += one_consumed;
974 continue;
975 }
976
977 log::debug!(
983 "Nested one failed at pos {}, poisoning outer two (no closer found)",
984 pos
985 );
986 return None;
987 }
988 }
989
990 if pos + delim_count <= text.len() {
992 let mut matches = true;
993 for i in 0..delim_count {
994 if bytes[pos + i] != delim_char as u8 {
995 matches = false;
996 break;
997 }
998 }
999
1000 if matches {
1001 let is_escaped = {
1003 let mut backslash_count = 0;
1004 let mut check_pos = pos;
1005 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
1006 backslash_count += 1;
1007 check_pos -= 1;
1008 }
1009 backslash_count % 2 == 1 };
1011
1012 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
1016 let after_pos = pos + delim_count;
1017 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
1018
1019 if (at_run_start || at_run_end) && !is_escaped {
1020 if delim_char == '_'
1024 && pos > start
1025 && let Some(prev_char) = text[..pos].chars().last()
1026 && prev_char.is_whitespace()
1027 {
1028 log::trace!(
1029 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
1030 pos
1031 );
1032 pos += 1;
1034 continue;
1035 }
1036
1037 log::trace!(
1038 "Found exact {} x {} closer at pos {}",
1039 delim_char,
1040 delim_count,
1041 pos
1042 );
1043 return Some(pos);
1044 }
1045 }
1046 }
1047
1048 pos += 1;
1051 }
1052
1053 None
1054}
1055
1056fn parse_inline_range(
1073 text: &str,
1074 start: usize,
1075 end: usize,
1076 config: &ParserOptions,
1077 builder: &mut GreenNodeBuilder,
1078) {
1079 parse_inline_range_impl(text, start, end, config, builder, false)
1080}
1081
1082fn parse_inline_range_nested(
1085 text: &str,
1086 start: usize,
1087 end: usize,
1088 config: &ParserOptions,
1089 builder: &mut GreenNodeBuilder,
1090) {
1091 parse_inline_range_impl(text, start, end, config, builder, true)
1092}
1093
1094fn is_emoji_boundary(text: &str, pos: usize) -> bool {
1095 if pos > 0 {
1096 let prev = text.as_bytes()[pos - 1] as char;
1097 if prev.is_ascii_alphanumeric() || prev == '_' {
1098 return false;
1099 }
1100 }
1101 true
1102}
1103
1104fn parse_inline_range_impl(
1105 text: &str,
1106 start: usize,
1107 end: usize,
1108 config: &ParserOptions,
1109 builder: &mut GreenNodeBuilder,
1110 nested_emphasis: bool,
1111) {
1112 log::debug!(
1113 "parse_inline_range: start={}, end={}, text={:?}",
1114 start,
1115 end,
1116 &text[start..end]
1117 );
1118 let mut pos = start;
1119 let mut text_start = start;
1120
1121 while pos < end {
1122 let byte = text.as_bytes()[pos];
1123
1124 if byte == b'\\' {
1126 if config.extensions.tex_math_double_backslash {
1128 if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
1129 {
1130 if pos > text_start {
1131 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1132 }
1133 log::debug!("Matched double backslash display math at pos {}", pos);
1134 emit_double_backslash_display_math(builder, content);
1135 pos += len;
1136 text_start = pos;
1137 continue;
1138 }
1139
1140 if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
1142 if pos > text_start {
1143 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1144 }
1145 log::debug!("Matched double backslash inline math at pos {}", pos);
1146 emit_double_backslash_inline_math(builder, content);
1147 pos += len;
1148 text_start = pos;
1149 continue;
1150 }
1151 }
1152
1153 if config.extensions.tex_math_single_backslash {
1155 if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
1156 {
1157 if pos > text_start {
1158 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1159 }
1160 log::debug!("Matched single backslash display math at pos {}", pos);
1161 emit_single_backslash_display_math(builder, content);
1162 pos += len;
1163 text_start = pos;
1164 continue;
1165 }
1166
1167 if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
1169 if pos > text_start {
1170 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1171 }
1172 log::debug!("Matched single backslash inline math at pos {}", pos);
1173 emit_single_backslash_inline_math(builder, content);
1174 pos += len;
1175 text_start = pos;
1176 continue;
1177 }
1178 }
1179
1180 if config.extensions.raw_tex
1182 && let Some((len, begin_marker, content, end_marker)) =
1183 try_parse_math_environment(&text[pos..])
1184 {
1185 if pos > text_start {
1186 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1187 }
1188 log::debug!("Matched math environment at pos {}", pos);
1189 emit_display_math_environment(builder, begin_marker, content, end_marker);
1190 pos += len;
1191 text_start = pos;
1192 continue;
1193 }
1194
1195 if config.extensions.bookdown_references
1197 && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
1198 {
1199 if pos > text_start {
1200 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1201 }
1202 log::debug!("Matched bookdown reference at pos {}: {}", pos, label);
1203 super::citations::emit_bookdown_crossref(builder, label);
1204 pos += len;
1205 text_start = pos;
1206 continue;
1207 }
1208
1209 if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
1211 let escape_enabled = match escape_type {
1212 EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
1213 EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
1214 EscapeType::Literal => {
1215 const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!";
1216 BASE_ESCAPABLE.contains(ch) || config.extensions.all_symbols_escapable
1217 }
1218 };
1219 if !escape_enabled {
1220 pos += 1;
1223 continue;
1224 }
1225
1226 if pos > text_start {
1228 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1229 }
1230
1231 log::debug!("Matched escape at pos {}: \\{}", pos, ch);
1232 emit_escape(builder, ch, escape_type);
1233 pos += len;
1234 text_start = pos;
1235 continue;
1236 }
1237
1238 if config.extensions.raw_tex
1240 && let Some(len) = try_parse_latex_command(&text[pos..])
1241 {
1242 if pos > text_start {
1243 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1244 }
1245 log::debug!("Matched LaTeX command at pos {}", pos);
1246 parse_latex_command(builder, &text[pos..], len);
1247 pos += len;
1248 text_start = pos;
1249 continue;
1250 }
1251 }
1252
1253 if byte == b'{'
1255 && pos + 1 < text.len()
1256 && text.as_bytes()[pos + 1] == b'{'
1257 && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
1258 {
1259 if pos > text_start {
1260 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1261 }
1262 log::debug!("Matched shortcode at pos {}: {}", pos, &name);
1263 emit_shortcode(builder, &name, attrs);
1264 pos += len;
1265 text_start = pos;
1266 continue;
1267 }
1268
1269 if byte == b'`'
1271 && let Some(m) = try_parse_inline_executable(
1272 &text[pos..],
1273 config.extensions.rmarkdown_inline_code,
1274 config.extensions.quarto_inline_code,
1275 )
1276 {
1277 if pos > text_start {
1278 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1279 }
1280 log::debug!("Matched inline executable code at pos {}", pos);
1281 emit_inline_executable(builder, &m);
1282 pos += m.total_len;
1283 text_start = pos;
1284 continue;
1285 }
1286
1287 if byte == b'`'
1289 && let Some((len, content, backtick_count, attributes)) =
1290 try_parse_code_span(&text[pos..])
1291 {
1292 if pos > text_start {
1294 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1295 }
1296
1297 log::debug!(
1298 "Matched code span at pos {}: {} backticks",
1299 pos,
1300 backtick_count
1301 );
1302
1303 if let Some(ref attrs) = attributes
1305 && config.extensions.raw_attribute
1306 && let Some(format) = is_raw_inline(attrs)
1307 {
1308 use super::raw_inline::emit_raw_inline;
1309 log::debug!("Matched raw inline span at pos {}: format={}", pos, format);
1310 emit_raw_inline(builder, content, backtick_count, format);
1311 } else if !config.extensions.inline_code_attributes && attributes.is_some() {
1312 let code_span_len = backtick_count * 2 + content.len();
1313 emit_code_span(builder, content, backtick_count, None);
1314 pos += code_span_len;
1315 text_start = pos;
1316 continue;
1317 } else {
1318 emit_code_span(builder, content, backtick_count, attributes);
1319 }
1320
1321 pos += len;
1322 text_start = pos;
1323 continue;
1324 }
1325
1326 if byte == b':'
1328 && config.extensions.emoji
1329 && is_emoji_boundary(text, pos)
1330 && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1331 {
1332 if pos > text_start {
1333 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1334 }
1335 log::debug!("Matched emoji at pos {}", pos);
1336 emit_emoji(builder, &text[pos..pos + len]);
1337 pos += len;
1338 text_start = pos;
1339 continue;
1340 }
1341
1342 if byte == b'^'
1344 && pos + 1 < text.len()
1345 && text.as_bytes()[pos + 1] == b'['
1346 && config.extensions.inline_footnotes
1347 && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1348 {
1349 if pos > text_start {
1350 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1351 }
1352 log::debug!("Matched inline footnote at pos {}", pos);
1353 emit_inline_footnote(builder, content, config);
1354 pos += len;
1355 text_start = pos;
1356 continue;
1357 }
1358
1359 if byte == b'^'
1361 && config.extensions.superscript
1362 && let Some((len, content)) = try_parse_superscript(&text[pos..])
1363 {
1364 if pos > text_start {
1365 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1366 }
1367 log::debug!("Matched superscript at pos {}", pos);
1368 emit_superscript(builder, content, config);
1369 pos += len;
1370 text_start = pos;
1371 continue;
1372 }
1373
1374 if byte == b'(' && config.extensions.bookdown_references {
1376 if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1377 if pos > text_start {
1378 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1379 }
1380 log::debug!("Matched bookdown definition at pos {}: {}", pos, label);
1381 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1382 pos += len;
1383 text_start = pos;
1384 continue;
1385 }
1386 if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1387 if pos > text_start {
1388 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1389 }
1390 log::debug!("Matched bookdown text reference at pos {}: {}", pos, label);
1391 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1392 pos += len;
1393 text_start = pos;
1394 continue;
1395 }
1396 }
1397
1398 if byte == b'~'
1400 && config.extensions.subscript
1401 && let Some((len, content)) = try_parse_subscript(&text[pos..])
1402 {
1403 if pos > text_start {
1404 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1405 }
1406 log::debug!("Matched subscript at pos {}", pos);
1407 emit_subscript(builder, content, config);
1408 pos += len;
1409 text_start = pos;
1410 continue;
1411 }
1412
1413 if byte == b'~'
1415 && config.extensions.strikeout
1416 && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1417 {
1418 if pos > text_start {
1419 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1420 }
1421 log::debug!("Matched strikeout at pos {}", pos);
1422 emit_strikeout(builder, content, config);
1423 pos += len;
1424 text_start = pos;
1425 continue;
1426 }
1427
1428 if byte == b'$'
1430 && config.extensions.tex_math_gfm
1431 && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1432 {
1433 if pos > text_start {
1434 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1435 }
1436 log::debug!("Matched GFM inline math at pos {}", pos);
1437 emit_gfm_inline_math(builder, content);
1438 pos += len;
1439 text_start = pos;
1440 continue;
1441 }
1442
1443 if byte == b'$' && config.extensions.tex_math_dollars {
1445 if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1447 if pos > text_start {
1449 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1450 }
1451
1452 let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1453 log::debug!(
1454 "Matched display math at pos {}: {} dollars",
1455 pos,
1456 dollar_count
1457 );
1458
1459 let after_math = &text[pos + len..];
1461 let attr_len = if config.extensions.quarto_crossrefs {
1462 use crate::parser::utils::attributes::try_parse_trailing_attributes;
1463 if let Some((_attr_block, _)) = try_parse_trailing_attributes(after_math) {
1464 let trimmed_after = after_math.trim_start();
1465 if let Some(open_brace_pos) = trimmed_after.find('{') {
1466 let ws_before_brace = after_math.len() - trimmed_after.len();
1467 let attr_text_len = trimmed_after[open_brace_pos..]
1468 .find('}')
1469 .map(|close| close + 1)
1470 .unwrap_or(0);
1471 ws_before_brace + open_brace_pos + attr_text_len
1472 } else {
1473 0
1474 }
1475 } else {
1476 0
1477 }
1478 } else {
1479 0
1480 };
1481
1482 let total_len = len + attr_len;
1483 emit_display_math(builder, content, dollar_count);
1484
1485 if attr_len > 0 {
1487 use crate::parser::utils::attributes::{
1488 emit_attributes, try_parse_trailing_attributes,
1489 };
1490 let attr_text = &text[pos + len..pos + total_len];
1491 if let Some((attr_block, _text_before)) =
1492 try_parse_trailing_attributes(attr_text)
1493 {
1494 let trimmed_after = attr_text.trim_start();
1495 let ws_len = attr_text.len() - trimmed_after.len();
1496 if ws_len > 0 {
1497 builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1498 }
1499 emit_attributes(builder, &attr_block);
1500 }
1501 }
1502
1503 pos += total_len;
1504 text_start = pos;
1505 continue;
1506 }
1507
1508 if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1510 if pos > text_start {
1512 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1513 }
1514
1515 log::debug!("Matched inline math at pos {}", pos);
1516 emit_inline_math(builder, content);
1517 pos += len;
1518 text_start = pos;
1519 continue;
1520 }
1521
1522 if pos > text_start {
1525 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1526 }
1527 builder.token(SyntaxKind::TEXT.into(), "$");
1528 pos += 1;
1529 text_start = pos;
1530 continue;
1531 }
1532
1533 if byte == b'<'
1535 && config.extensions.autolinks
1536 && let Some((len, url)) = try_parse_autolink(&text[pos..])
1537 {
1538 if pos > text_start {
1539 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1540 }
1541 log::debug!("Matched autolink at pos {}", pos);
1542 emit_autolink(builder, &text[pos..pos + len], url);
1543 pos += len;
1544 text_start = pos;
1545 continue;
1546 }
1547
1548 if config.extensions.autolink_bare_uris
1549 && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1550 {
1551 if pos > text_start {
1552 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1553 }
1554 log::debug!("Matched bare URI at pos {}", pos);
1555 emit_bare_uri_link(builder, url, config);
1556 pos += len;
1557 text_start = pos;
1558 continue;
1559 }
1560
1561 if byte == b'<'
1563 && config.extensions.native_spans
1564 && let Some((len, content, attributes)) = try_parse_native_span(&text[pos..])
1565 {
1566 if pos > text_start {
1567 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1568 }
1569 log::debug!("Matched native span at pos {}", pos);
1570 emit_native_span(builder, content, &attributes, config);
1571 pos += len;
1572 text_start = pos;
1573 continue;
1574 }
1575
1576 if byte == b'!' && pos + 1 < text.len() && text.as_bytes()[pos + 1] == b'[' {
1578 if let Some((len, alt_text, dest, attributes)) = try_parse_inline_image(&text[pos..]) {
1580 if pos > text_start {
1581 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1582 }
1583 log::debug!("Matched inline image at pos {}", pos);
1584 emit_inline_image(
1585 builder,
1586 &text[pos..pos + len],
1587 alt_text,
1588 dest,
1589 attributes,
1590 config,
1591 );
1592 pos += len;
1593 text_start = pos;
1594 continue;
1595 }
1596
1597 if config.extensions.reference_links {
1599 let allow_shortcut = config.extensions.shortcut_reference_links;
1600 if let Some((len, alt_text, reference, is_implicit)) =
1601 try_parse_reference_image(&text[pos..], allow_shortcut)
1602 {
1603 if pos > text_start {
1604 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1605 }
1606 log::debug!("Matched reference image at pos {}", pos);
1607 emit_reference_image(builder, alt_text, &reference, is_implicit, config);
1608 pos += len;
1609 text_start = pos;
1610 continue;
1611 }
1612 }
1613 }
1614
1615 if byte == b'[' {
1617 if config.extensions.footnotes
1619 && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1620 {
1621 if pos > text_start {
1622 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1623 }
1624 log::debug!("Matched footnote reference at pos {}", pos);
1625 emit_footnote_reference(builder, &id);
1626 pos += len;
1627 text_start = pos;
1628 continue;
1629 }
1630
1631 if config.extensions.inline_links
1633 && let Some((len, link_text, dest, attributes)) =
1634 try_parse_inline_link(&text[pos..])
1635 {
1636 if pos > text_start {
1637 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1638 }
1639 log::debug!("Matched inline link at pos {}", pos);
1640 emit_inline_link(
1641 builder,
1642 &text[pos..pos + len],
1643 link_text,
1644 dest,
1645 attributes,
1646 config,
1647 );
1648 pos += len;
1649 text_start = pos;
1650 continue;
1651 }
1652
1653 if config.extensions.reference_links {
1655 let allow_shortcut = config.extensions.shortcut_reference_links;
1656 if let Some((len, link_text, reference, is_implicit)) =
1657 try_parse_reference_link(&text[pos..], allow_shortcut)
1658 {
1659 if pos > text_start {
1660 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1661 }
1662 log::debug!("Matched reference link at pos {}", pos);
1663 emit_reference_link(builder, link_text, &reference, is_implicit, config);
1664 pos += len;
1665 text_start = pos;
1666 continue;
1667 }
1668 }
1669
1670 if config.extensions.citations
1672 && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1673 {
1674 if pos > text_start {
1675 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1676 }
1677 log::debug!("Matched bracketed citation at pos {}", pos);
1678 emit_bracketed_citation(builder, content);
1679 pos += len;
1680 text_start = pos;
1681 continue;
1682 }
1683 }
1684
1685 if byte == b'['
1688 && config.extensions.bracketed_spans
1689 && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1690 {
1691 if pos > text_start {
1692 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1693 }
1694 log::debug!("Matched bracketed span at pos {}", pos);
1695 emit_bracketed_span(builder, &text_content, &attrs, config);
1696 pos += len;
1697 text_start = pos;
1698 continue;
1699 }
1700
1701 if byte == b'@'
1703 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1704 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1705 {
1706 let is_crossref =
1707 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1708 if is_crossref || config.extensions.citations {
1709 if pos > text_start {
1710 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1711 }
1712 if is_crossref {
1713 log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1714 super::citations::emit_crossref(builder, key, has_suppress);
1715 } else {
1716 log::debug!("Matched bare citation at pos {}: {}", pos, &key);
1717 emit_bare_citation(builder, key, has_suppress);
1718 }
1719 pos += len;
1720 text_start = pos;
1721 continue;
1722 }
1723 }
1724
1725 if byte == b'-'
1727 && pos + 1 < text.len()
1728 && text.as_bytes()[pos + 1] == b'@'
1729 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1730 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1731 {
1732 let is_crossref =
1733 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1734 if is_crossref || config.extensions.citations {
1735 if pos > text_start {
1736 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1737 }
1738 if is_crossref {
1739 log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1740 super::citations::emit_crossref(builder, key, has_suppress);
1741 } else {
1742 log::debug!("Matched suppress-author citation at pos {}: {}", pos, &key);
1743 emit_bare_citation(builder, key, has_suppress);
1744 }
1745 pos += len;
1746 text_start = pos;
1747 continue;
1748 }
1749 }
1750
1751 if byte == b'*' || byte == b'_' {
1753 let bytes = text.as_bytes();
1755 let mut delim_count = 0;
1756 while pos + delim_count < bytes.len() && bytes[pos + delim_count] == byte {
1757 delim_count += 1;
1758 }
1759
1760 if pos > text_start {
1762 log::debug!(
1763 "Emitting TEXT before delimiter: {:?}",
1764 &text[text_start..pos]
1765 );
1766 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1767 text_start = pos; }
1769
1770 let emphasis_result = if nested_emphasis {
1773 try_parse_emphasis_nested(text, pos, end, config, builder)
1774 } else {
1775 try_parse_emphasis(text, pos, end, config, builder)
1776 };
1777
1778 if let Some((consumed, _)) = emphasis_result {
1779 log::debug!(
1781 "Parsed emphasis, consumed {} bytes from pos {}",
1782 consumed,
1783 pos
1784 );
1785 pos += consumed;
1786 text_start = pos;
1787 } else {
1788 log::debug!(
1791 "Failed to parse emphasis at pos {}, skipping {} delimiters as literal",
1792 pos,
1793 delim_count
1794 );
1795 pos += delim_count;
1796 }
1798 continue;
1799 }
1800
1801 if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1803 let text_before = &text[text_start..pos];
1804
1805 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1807 if trailing_spaces >= 2 {
1808 let text_content = &text_before[..text_before.len() - trailing_spaces];
1810 if !text_content.is_empty() {
1811 builder.token(SyntaxKind::TEXT.into(), text_content);
1812 }
1813 let spaces = " ".repeat(trailing_spaces);
1814 builder.token(
1815 SyntaxKind::HARD_LINE_BREAK.into(),
1816 &format!("{}\r\n", spaces),
1817 );
1818 pos += 2;
1819 text_start = pos;
1820 continue;
1821 }
1822
1823 if config.extensions.hard_line_breaks {
1825 if !text_before.is_empty() {
1826 builder.token(SyntaxKind::TEXT.into(), text_before);
1827 }
1828 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1829 pos += 2;
1830 text_start = pos;
1831 continue;
1832 }
1833
1834 if !text_before.is_empty() {
1836 builder.token(SyntaxKind::TEXT.into(), text_before);
1837 }
1838 builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1839 pos += 2;
1840 text_start = pos;
1841 continue;
1842 }
1843
1844 if byte == b'\n' {
1845 let text_before = &text[text_start..pos];
1846
1847 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1849 if trailing_spaces >= 2 {
1850 let text_content = &text_before[..text_before.len() - trailing_spaces];
1852 if !text_content.is_empty() {
1853 builder.token(SyntaxKind::TEXT.into(), text_content);
1854 }
1855 let spaces = " ".repeat(trailing_spaces);
1856 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1857 pos += 1;
1858 text_start = pos;
1859 continue;
1860 }
1861
1862 if config.extensions.hard_line_breaks {
1864 if !text_before.is_empty() {
1865 builder.token(SyntaxKind::TEXT.into(), text_before);
1866 }
1867 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1868 pos += 1;
1869 text_start = pos;
1870 continue;
1871 }
1872
1873 if !text_before.is_empty() {
1875 builder.token(SyntaxKind::TEXT.into(), text_before);
1876 }
1877 builder.token(SyntaxKind::NEWLINE.into(), "\n");
1878 pos += 1;
1879 text_start = pos;
1880 continue;
1881 }
1882
1883 pos += 1;
1885 }
1886
1887 if pos > text_start && text_start < end {
1889 log::debug!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1890 builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1891 }
1892
1893 log::debug!("parse_inline_range complete: start={}, end={}", start, end);
1894}
1895
1896#[cfg(test)]
1897mod tests {
1898 use super::*;
1899 use crate::syntax::{SyntaxKind, SyntaxNode};
1900 use rowan::GreenNode;
1901
1902 #[test]
1903 fn test_recursive_simple_emphasis() {
1904 let text = "*test*";
1905 let config = ParserOptions::default();
1906 let mut builder = GreenNodeBuilder::new();
1907
1908 parse_inline_text_recursive(&mut builder, text, &config);
1909
1910 let green: GreenNode = builder.finish();
1911 let node = SyntaxNode::new_root(green);
1912
1913 assert_eq!(node.text().to_string(), text);
1915
1916 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1918 assert!(has_emph, "Should have EMPHASIS node");
1919 }
1920
1921 #[test]
1922 fn test_recursive_nested() {
1923 let text = "*foo **bar** baz*";
1924 let config = ParserOptions::default();
1925 let mut builder = GreenNodeBuilder::new();
1926
1927 builder.start_node(SyntaxKind::PARAGRAPH.into());
1929 parse_inline_text_recursive(&mut builder, text, &config);
1930 builder.finish_node();
1931
1932 let green: GreenNode = builder.finish();
1933 let node = SyntaxNode::new_root(green);
1934
1935 assert_eq!(node.text().to_string(), text);
1937
1938 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1940 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1941
1942 assert!(has_emph, "Should have EMPHASIS node");
1943 assert!(has_strong, "Should have STRONG node");
1944 }
1945
1946 #[test]
1948 fn test_parse_simple_emphasis() {
1949 use crate::options::ParserOptions;
1950 use crate::syntax::SyntaxNode;
1951 use rowan::GreenNode;
1952
1953 let text = "*test*";
1954 let config = ParserOptions::default();
1955 let mut builder = GreenNodeBuilder::new();
1956
1957 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
1959
1960 assert_eq!(result, Some((6, 1))); let green: GreenNode = builder.finish();
1965 let node = SyntaxNode::new_root(green);
1966
1967 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
1969
1970 assert_eq!(node.text().to_string(), text);
1972 }
1973
1974 #[test]
1976 fn test_parse_nested_emphasis_strong() {
1977 use crate::options::ParserOptions;
1978
1979 let text = "*foo **bar** baz*";
1980 let config = ParserOptions::default();
1981 let mut builder = GreenNodeBuilder::new();
1982
1983 parse_inline_range(text, 0, text.len(), &config, &mut builder);
1985
1986 let green = builder.finish();
1987 let node = crate::syntax::SyntaxNode::new_root(green);
1988
1989 assert_eq!(node.text().to_string(), text);
1991
1992 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1994 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1995
1996 assert!(has_emph, "Should have EMPHASIS node");
1997 assert!(has_strong, "Should have STRONG node");
1998 }
1999
2000 #[test]
2004 fn test_triple_emphasis_star_then_double_star() {
2005 use crate::options::ParserOptions;
2006 use crate::syntax::SyntaxNode;
2007 use rowan::GreenNode;
2008
2009 let text = "***foo* bar**";
2010 let config = ParserOptions::default();
2011 let mut builder = GreenNodeBuilder::new();
2012
2013 builder.start_node(SyntaxKind::DOCUMENT.into());
2014 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2015 builder.finish_node();
2016
2017 let green: GreenNode = builder.finish();
2018 let node = SyntaxNode::new_root(green);
2019
2020 assert_eq!(node.text().to_string(), text);
2022
2023 let structure = format!("{:#?}", node);
2026
2027 assert!(structure.contains("STRONG"), "Should have STRONG node");
2029 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2030
2031 let mut found_strong = false;
2034 let mut found_emph_after_strong = false;
2035 for descendant in node.descendants() {
2036 if descendant.kind() == SyntaxKind::STRONG {
2037 found_strong = true;
2038 }
2039 if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
2040 found_emph_after_strong = true;
2041 break;
2042 }
2043 }
2044
2045 assert!(
2046 found_emph_after_strong,
2047 "EMPH should be inside STRONG, not before it. Current structure:\n{}",
2048 structure
2049 );
2050 }
2051
2052 #[test]
2055 fn test_triple_emphasis_double_star_then_star() {
2056 use crate::options::ParserOptions;
2057 use crate::syntax::SyntaxNode;
2058 use rowan::GreenNode;
2059
2060 let text = "***foo** bar*";
2061 let config = ParserOptions::default();
2062 let mut builder = GreenNodeBuilder::new();
2063
2064 builder.start_node(SyntaxKind::DOCUMENT.into());
2065 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2066 builder.finish_node();
2067
2068 let green: GreenNode = builder.finish();
2069 let node = SyntaxNode::new_root(green);
2070
2071 assert_eq!(node.text().to_string(), text);
2073
2074 let structure = format!("{:#?}", node);
2076
2077 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2079 assert!(structure.contains("STRONG"), "Should have STRONG node");
2080
2081 let mut found_emph = false;
2083 let mut found_strong_after_emph = false;
2084 for descendant in node.descendants() {
2085 if descendant.kind() == SyntaxKind::EMPHASIS {
2086 found_emph = true;
2087 }
2088 if found_emph && descendant.kind() == SyntaxKind::STRONG {
2089 found_strong_after_emph = true;
2090 break;
2091 }
2092 }
2093
2094 assert!(
2095 found_strong_after_emph,
2096 "STRONG should be inside EMPH. Current structure:\n{}",
2097 structure
2098 );
2099 }
2100
2101 #[test]
2104 fn test_display_math_with_attributes() {
2105 use crate::options::ParserOptions;
2106 use crate::syntax::SyntaxNode;
2107 use rowan::GreenNode;
2108
2109 let text = "$$ E = mc^2 $$ {#eq-einstein}";
2110 let mut config = ParserOptions::default();
2111 config.extensions.quarto_crossrefs = true; let mut builder = GreenNodeBuilder::new();
2114 builder.start_node(SyntaxKind::DOCUMENT.into()); parse_inline_text_recursive(&mut builder, text, &config);
2118
2119 builder.finish_node(); let green: GreenNode = builder.finish();
2121 let node = SyntaxNode::new_root(green);
2122
2123 assert_eq!(node.text().to_string(), text);
2125
2126 let has_display_math = node
2128 .descendants()
2129 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
2130 assert!(has_display_math, "Should have DISPLAY_MATH node");
2131
2132 let has_attributes = node
2134 .descendants()
2135 .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
2136 assert!(
2137 has_attributes,
2138 "Should have ATTRIBUTE node for {{#eq-einstein}}"
2139 );
2140
2141 let math_followed_by_text = node.descendants().any(|n| {
2143 n.kind() == SyntaxKind::DISPLAY_MATH
2144 && n.next_sibling()
2145 .map(|s| {
2146 s.kind() == SyntaxKind::TEXT
2147 && s.text().to_string().contains("{#eq-einstein}")
2148 })
2149 .unwrap_or(false)
2150 });
2151 assert!(
2152 !math_followed_by_text,
2153 "Attributes should not be parsed as TEXT"
2154 );
2155 }
2156}
2157
2158#[test]
2159fn test_two_with_nested_one_and_triple_closer() {
2160 use crate::options::ParserOptions;
2165 use crate::syntax::SyntaxNode;
2166 use rowan::GreenNode;
2167
2168 let text = "**bold with *italic***";
2169 let config = ParserOptions::default();
2170 let mut builder = GreenNodeBuilder::new();
2171
2172 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2174
2175 let green: GreenNode = builder.finish();
2176 let node = SyntaxNode::new_root(green);
2177
2178 assert_eq!(node.text().to_string(), text, "Should be lossless");
2180
2181 assert_eq!(
2183 node.kind(),
2184 SyntaxKind::STRONG,
2185 "Root should be STRONG, got: {:?}",
2186 node.kind()
2187 );
2188
2189 let has_emphasis = node.children().any(|c| c.kind() == SyntaxKind::EMPHASIS);
2191 assert!(has_emphasis, "STRONG should contain EMPHASIS node");
2192}
2193
2194#[test]
2195fn test_emphasis_with_trailing_space_before_closer() {
2196 use crate::options::ParserOptions;
2200 use crate::syntax::SyntaxNode;
2201 use rowan::GreenNode;
2202
2203 let text = "*foo *";
2204 let config = ParserOptions::default();
2205 let mut builder = GreenNodeBuilder::new();
2206
2207 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2209
2210 assert_eq!(
2212 result,
2213 Some((6, 1)),
2214 "Should parse as emphasis, result: {:?}",
2215 result
2216 );
2217
2218 let green: GreenNode = builder.finish();
2220 let node = SyntaxNode::new_root(green);
2221
2222 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2224
2225 assert_eq!(node.text().to_string(), text);
2227}
2228
2229#[test]
2230fn test_triple_emphasis_all_strong_nested() {
2231 use crate::options::ParserOptions;
2235 use crate::syntax::SyntaxNode;
2236 use rowan::GreenNode;
2237
2238 let text = "***foo** bar **baz***";
2239 let config = ParserOptions::default();
2240 let mut builder = GreenNodeBuilder::new();
2241
2242 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2243
2244 let green: GreenNode = builder.finish();
2245 let node = SyntaxNode::new_root(green);
2246
2247 let emphasis_nodes: Vec<_> = node
2249 .descendants()
2250 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2251 .collect();
2252 assert_eq!(
2253 emphasis_nodes.len(),
2254 1,
2255 "Should have exactly one EMPHASIS node, found: {}",
2256 emphasis_nodes.len()
2257 );
2258
2259 let emphasis_node = emphasis_nodes[0].clone();
2261 let strong_in_emphasis: Vec<_> = emphasis_node
2262 .children()
2263 .filter(|n| n.kind() == SyntaxKind::STRONG)
2264 .collect();
2265 assert_eq!(
2266 strong_in_emphasis.len(),
2267 2,
2268 "EMPHASIS should contain two STRONG nodes, found: {}",
2269 strong_in_emphasis.len()
2270 );
2271
2272 assert_eq!(node.text().to_string(), text);
2274}
2275
2276#[test]
2277fn test_triple_emphasis_all_emph_nested() {
2278 use crate::options::ParserOptions;
2282 use crate::syntax::SyntaxNode;
2283 use rowan::GreenNode;
2284
2285 let text = "***foo* bar *baz***";
2286 let config = ParserOptions::default();
2287 let mut builder = GreenNodeBuilder::new();
2288
2289 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2290
2291 let green: GreenNode = builder.finish();
2292 let node = SyntaxNode::new_root(green);
2293
2294 let strong_nodes: Vec<_> = node
2296 .descendants()
2297 .filter(|n| n.kind() == SyntaxKind::STRONG)
2298 .collect();
2299 assert_eq!(
2300 strong_nodes.len(),
2301 1,
2302 "Should have exactly one STRONG node, found: {}",
2303 strong_nodes.len()
2304 );
2305
2306 let strong_node = strong_nodes[0].clone();
2308 let emph_in_strong: Vec<_> = strong_node
2309 .children()
2310 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2311 .collect();
2312 assert_eq!(
2313 emph_in_strong.len(),
2314 2,
2315 "STRONG should contain two EMPHASIS nodes, found: {}",
2316 emph_in_strong.len()
2317 );
2318
2319 assert_eq!(node.text().to_string(), text);
2321}
2322
2323#[test]
2325fn test_parse_emphasis_multiline() {
2326 use crate::options::ParserOptions;
2328 use crate::syntax::SyntaxNode;
2329 use rowan::GreenNode;
2330
2331 let text = "*text on\nline two*";
2332 let config = ParserOptions::default();
2333 let mut builder = GreenNodeBuilder::new();
2334
2335 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2336
2337 assert_eq!(
2339 result,
2340 Some((text.len(), 1)),
2341 "Emphasis should parse multiline content"
2342 );
2343
2344 let green: GreenNode = builder.finish();
2346 let node = SyntaxNode::new_root(green);
2347
2348 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2350
2351 assert_eq!(node.text().to_string(), text);
2353 assert!(
2354 node.text().to_string().contains('\n'),
2355 "Should preserve newline in emphasis content"
2356 );
2357}
2358
2359#[test]
2360fn test_parse_strong_multiline() {
2361 use crate::options::ParserOptions;
2363 use crate::syntax::SyntaxNode;
2364 use rowan::GreenNode;
2365
2366 let text = "**strong on\nline two**";
2367 let config = ParserOptions::default();
2368 let mut builder = GreenNodeBuilder::new();
2369
2370 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2371
2372 assert_eq!(
2374 result,
2375 Some((text.len(), 2)),
2376 "Strong emphasis should parse multiline content"
2377 );
2378
2379 let green: GreenNode = builder.finish();
2381 let node = SyntaxNode::new_root(green);
2382
2383 assert_eq!(node.kind(), SyntaxKind::STRONG);
2385
2386 assert_eq!(node.text().to_string(), text);
2388 assert!(
2389 node.text().to_string().contains('\n'),
2390 "Should preserve newline in strong content"
2391 );
2392}
2393
2394#[test]
2395fn test_parse_triple_emphasis_multiline() {
2396 use crate::options::ParserOptions;
2398 use crate::syntax::SyntaxNode;
2399 use rowan::GreenNode;
2400
2401 let text = "***both on\nline two***";
2402 let config = ParserOptions::default();
2403 let mut builder = GreenNodeBuilder::new();
2404
2405 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2406
2407 assert_eq!(
2409 result,
2410 Some((text.len(), 3)),
2411 "Triple emphasis should parse multiline content"
2412 );
2413
2414 let green: GreenNode = builder.finish();
2416 let node = SyntaxNode::new_root(green);
2417
2418 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2420 assert!(has_strong, "Should have STRONG node");
2421
2422 assert_eq!(node.text().to_string(), text);
2424 assert!(
2425 node.text().to_string().contains('\n'),
2426 "Should preserve newline in triple emphasis content"
2427 );
2428}