1use crate::options::ParserOptions;
24use crate::syntax::SyntaxKind;
25use rowan::GreenNodeBuilder;
26
27use super::bookdown::{
29 try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
30};
31use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
32use super::citations::{
33 emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
34 try_parse_bracketed_citation,
35};
36use super::code_spans::{emit_code_span, try_parse_code_span};
37use super::emoji::{emit_emoji, try_parse_emoji};
38use super::escapes::{EscapeType, emit_escape, try_parse_escape};
39use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
40use super::inline_footnotes::{
41 emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
42 try_parse_inline_footnote,
43};
44use super::latex::{parse_latex_command, try_parse_latex_command};
45use super::links::{
46 emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link, emit_reference_image,
47 emit_reference_link, try_parse_autolink, try_parse_bare_uri, try_parse_inline_image,
48 try_parse_inline_link, try_parse_reference_image, try_parse_reference_link,
49};
50use super::mark::{emit_mark, try_parse_mark};
51use super::math::{
52 emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
53 emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
54 emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
55 try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
56 try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
57 try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
58};
59use super::native_spans::{emit_native_span, try_parse_native_span};
60use super::raw_inline::is_raw_inline;
61use super::shortcodes::{emit_shortcode, try_parse_shortcode};
62use super::strikeout::{emit_strikeout, try_parse_strikeout};
63use super::subscript::{emit_subscript, try_parse_subscript};
64use super::superscript::{emit_superscript, try_parse_superscript};
65
66pub fn parse_inline_text_recursive(
83 builder: &mut GreenNodeBuilder,
84 text: &str,
85 config: &ParserOptions,
86) {
87 log::debug!(
88 "Recursive inline parsing: {:?} ({} bytes)",
89 &text[..text.len().min(40)],
90 text.len()
91 );
92
93 parse_inline_range(text, 0, text.len(), config, builder);
94
95 log::debug!("Recursive inline parsing complete");
96}
97
98pub fn parse_inline_text(
104 builder: &mut GreenNodeBuilder,
105 text: &str,
106 config: &ParserOptions,
107 _allow_reference_links: bool,
108) {
109 log::trace!(
110 "Parsing inline text (recursive): {:?} ({} bytes)",
111 &text[..text.len().min(40)],
112 text.len()
113 );
114
115 parse_inline_text_recursive(builder, text, config);
117}
118
119pub fn try_parse_emphasis(
144 text: &str,
145 pos: usize,
146 end: usize,
147 config: &ParserOptions,
148 builder: &mut GreenNodeBuilder,
149) -> Option<(usize, usize)> {
150 let bytes = text.as_bytes();
151
152 if pos >= bytes.len() {
153 return None;
154 }
155
156 let delim_char = bytes[pos] as char;
157 if delim_char != '*' && delim_char != '_' {
158 return None;
159 }
160
161 let mut count = 0;
163 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
164 count += 1;
165 }
166
167 let after_pos = pos + count;
168
169 log::debug!(
170 "try_parse_emphasis: '{}' x {} at pos {}",
171 delim_char,
172 count,
173 pos
174 );
175
176 if after_pos < text.len()
178 && let Some(next_char) = text[after_pos..].chars().next()
179 && next_char.is_whitespace()
180 {
181 log::trace!("Delimiter followed by whitespace, treating as literal");
182 return None;
183 }
184
185 if delim_char == '_'
188 && pos > 0
189 && let Some(prev_char) = text[..pos].chars().last()
190 && prev_char.is_alphanumeric()
191 {
192 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
193 return None;
194 }
195
196 let result = match count {
198 1 => try_parse_one(text, pos, delim_char, end, config, builder),
199 2 => try_parse_two(text, pos, delim_char, end, config, builder),
200 3 => try_parse_three(text, pos, delim_char, end, config, builder),
201 _ => {
202 log::trace!("{} delimiters (4+), treating as literal", count);
204 None
205 }
206 };
207
208 result.map(|consumed| (consumed, count))
211}
212
213fn try_parse_emphasis_nested(
222 text: &str,
223 pos: usize,
224 end: usize,
225 config: &ParserOptions,
226 builder: &mut GreenNodeBuilder,
227) -> Option<(usize, usize)> {
228 let bytes = text.as_bytes();
229
230 if pos >= bytes.len() {
231 return None;
232 }
233
234 let delim_char = bytes[pos] as char;
235 if delim_char != '*' && delim_char != '_' {
236 return None;
237 }
238
239 let mut count = 0;
241 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
242 count += 1;
243 }
244
245 log::debug!(
246 "try_parse_emphasis_nested: '{}' x {} at pos {}",
247 delim_char,
248 count,
249 pos
250 );
251
252 if delim_char == '_'
255 && pos > 0
256 && let Some(prev_char) = text[..pos].chars().last()
257 && prev_char.is_alphanumeric()
258 {
259 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
260 return None;
261 }
262
263 let result = match count {
269 1 => try_parse_one(text, pos, delim_char, end, config, builder),
270 2 => try_parse_two(text, pos, delim_char, end, config, builder),
271 3 => try_parse_three(text, pos, delim_char, end, config, builder),
272 _ => {
273 log::trace!("{} delimiters (4+), treating as literal", count);
275 None
276 }
277 };
278
279 result.map(|consumed| (consumed, count))
280}
281
282fn try_parse_three(
287 text: &str,
288 pos: usize,
289 delim_char: char,
290 end: usize,
291 config: &ParserOptions,
292 builder: &mut GreenNodeBuilder,
293) -> Option<usize> {
294 let content_start = pos + 3;
295 let one = delim_char.to_string();
296 let two = one.repeat(2);
297
298 log::debug!("try_parse_three: '{}' x 3 at pos {}", delim_char, pos);
299
300 let mut search_pos = content_start;
304
305 loop {
306 let closer_start = match find_first_potential_ender(text, search_pos, delim_char, end) {
308 Some(p) => p,
309 None => {
310 log::trace!("No potential ender found for ***");
311 return None;
312 }
313 };
314
315 log::debug!("Potential ender at pos {}", closer_start);
316
317 let bytes = text.as_bytes();
319 let mut closer_count = 0;
320 let mut check_pos = closer_start;
321 while check_pos < bytes.len() && bytes[check_pos] == delim_char as u8 {
322 closer_count += 1;
323 check_pos += 1;
324 }
325
326 log::debug!(
327 "Found {} x {} at pos {}",
328 delim_char,
329 closer_count,
330 closer_start
331 );
332
333 if closer_count >= 3 && is_valid_ender(text, closer_start, delim_char, 3) {
337 log::debug!("Matched *** closer, emitting Strong[Emph[content]]");
338
339 builder.start_node(SyntaxKind::STRONG.into());
340 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
341
342 builder.start_node(SyntaxKind::EMPHASIS.into());
343 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
344 parse_inline_range_nested(text, content_start, closer_start, config, builder);
345 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
346 builder.finish_node(); builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
349 builder.finish_node(); return Some(closer_start + 3 - pos);
352 }
353
354 if closer_count >= 2 && is_valid_ender(text, closer_start, delim_char, 2) {
356 log::debug!("Matched ** closer, wrapping as Strong and continuing with one");
357
358 let continue_pos = closer_start + 2;
359
360 if let Some(final_closer_pos) =
361 parse_until_closer_with_nested_two(text, continue_pos, delim_char, 1, end, config)
362 {
363 log::debug!(
364 "Found * closer at pos {}, emitting Emph[Strong[...], ...]",
365 final_closer_pos
366 );
367
368 builder.start_node(SyntaxKind::EMPHASIS.into());
369 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
370
371 builder.start_node(SyntaxKind::STRONG.into());
372 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
373 parse_inline_range_nested(text, content_start, closer_start, config, builder);
374 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
375 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
379
380 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
381 builder.finish_node(); return Some(final_closer_pos + 1 - pos);
384 }
385
386 log::debug!("No * closer found after **, emitting * + STRONG");
388 builder.token(SyntaxKind::TEXT.into(), &one);
389
390 builder.start_node(SyntaxKind::STRONG.into());
391 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
392 parse_inline_range_nested(text, content_start, closer_start, config, builder);
393 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
394 builder.finish_node(); return Some(closer_start + 2 - pos);
397 }
398
399 if closer_count >= 1 && is_valid_ender(text, closer_start, delim_char, 1) {
401 log::debug!("Matched * closer, wrapping as Emph and continuing with two");
402
403 let continue_pos = closer_start + 1;
404
405 if let Some(final_closer_pos) =
406 parse_until_closer_with_nested_one(text, continue_pos, delim_char, 2, end, config)
407 {
408 log::debug!(
409 "Found ** closer at pos {}, emitting Strong[Emph[...], ...]",
410 final_closer_pos
411 );
412
413 builder.start_node(SyntaxKind::STRONG.into());
414 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
415
416 builder.start_node(SyntaxKind::EMPHASIS.into());
417 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
418 parse_inline_range_nested(text, content_start, closer_start, config, builder);
419 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
420 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
423
424 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
425 builder.finish_node(); return Some(final_closer_pos + 2 - pos);
428 }
429
430 log::debug!("No ** closer found after *, emitting ** + EMPH");
432 builder.token(SyntaxKind::TEXT.into(), &two);
433
434 builder.start_node(SyntaxKind::EMPHASIS.into());
435 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
436 parse_inline_range_nested(text, content_start, closer_start, config, builder);
437 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
438 builder.finish_node(); return Some(closer_start + 1 - pos);
441 }
442
443 log::debug!(
445 "No valid ender at pos {}, continuing search from {}",
446 closer_start,
447 closer_start + closer_count
448 );
449 search_pos = closer_start + closer_count;
450 }
451}
452
453fn find_first_potential_ender(
457 text: &str,
458 start: usize,
459 delim_char: char,
460 end: usize,
461) -> Option<usize> {
462 let bytes = text.as_bytes();
463 let mut pos = start;
464
465 while pos < end.min(text.len()) {
466 if bytes[pos] == delim_char as u8 {
468 let is_escaped = {
470 let mut backslash_count = 0;
471 let mut check_pos = pos;
472 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
473 backslash_count += 1;
474 check_pos -= 1;
475 }
476 backslash_count % 2 == 1
477 };
478
479 if !is_escaped {
480 return Some(pos);
482 }
483 }
484
485 pos += 1;
486 }
487
488 None
489}
490
491fn is_valid_ender(text: &str, pos: usize, delim_char: char, delim_count: usize) -> bool {
494 let bytes = text.as_bytes();
495
496 if pos + delim_count > text.len() {
498 return false;
499 }
500
501 for i in 0..delim_count {
502 if bytes[pos + i] != delim_char as u8 {
503 return false;
504 }
505 }
506
507 if pos > 0 && bytes[pos - 1] == delim_char as u8 {
509 return false;
510 }
511
512 let after_pos = pos + delim_count;
514 if after_pos < bytes.len() && bytes[after_pos] == delim_char as u8 {
515 return false;
516 }
517
518 if delim_char == '_' {
521 if pos > 0
522 && let Some(prev_char) = text[..pos].chars().last()
523 && prev_char.is_whitespace()
524 {
525 return false;
526 }
527
528 if after_pos < text.len()
530 && let Some(next_char) = text[after_pos..].chars().next()
531 && next_char.is_alphanumeric()
532 {
533 return false;
534 }
535 }
536
537 true
538}
539
540fn try_parse_two(
545 text: &str,
546 pos: usize,
547 delim_char: char,
548 end: usize,
549 config: &ParserOptions,
550 builder: &mut GreenNodeBuilder,
551) -> Option<usize> {
552 let content_start = pos + 2;
553
554 log::debug!("try_parse_two: '{}' x 2 at pos {}", delim_char, pos);
555
556 if let Some(closer_pos) =
558 parse_until_closer_with_nested_one(text, content_start, delim_char, 2, end, config)
559 {
560 log::debug!("Found ** closer at pos {}", closer_pos);
561
562 builder.start_node(SyntaxKind::STRONG.into());
564 builder.token(SyntaxKind::STRONG_MARKER.into(), &text[pos..pos + 2]);
565 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
566 builder.token(
567 SyntaxKind::STRONG_MARKER.into(),
568 &text[closer_pos..closer_pos + 2],
569 );
570 builder.finish_node(); return Some(closer_pos + 2 - pos);
573 }
574
575 log::trace!("No closer found for **");
577 None
578}
579
580fn try_parse_one(
591 text: &str,
592 pos: usize,
593 delim_char: char,
594 end: usize,
595 config: &ParserOptions,
596 builder: &mut GreenNodeBuilder,
597) -> Option<usize> {
598 let content_start = pos + 1;
599
600 log::debug!("try_parse_one: '{}' x 1 at pos {}", delim_char, pos);
601
602 if let Some(closer_pos) =
604 parse_until_closer_with_nested_two(text, content_start, delim_char, 1, end, config)
605 {
606 log::debug!("Found * closer at pos {}", closer_pos);
607
608 builder.start_node(SyntaxKind::EMPHASIS.into());
610 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &text[pos..pos + 1]);
611 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
612 builder.token(
613 SyntaxKind::EMPHASIS_MARKER.into(),
614 &text[closer_pos..closer_pos + 1],
615 );
616 builder.finish_node(); return Some(closer_pos + 1 - pos);
619 }
620
621 log::trace!("No closer found for *");
623 None
624}
625
626fn parse_until_closer_with_nested_two(
645 text: &str,
646 start: usize,
647 delim_char: char,
648 delim_count: usize,
649 end: usize,
650 config: &ParserOptions,
651) -> Option<usize> {
652 let bytes = text.as_bytes();
653 let mut pos = start;
654
655 while pos < end.min(text.len()) {
656 if bytes[pos] == b'`'
657 && let Some(m) = try_parse_inline_executable(
658 &text[pos..],
659 config.extensions.rmarkdown_inline_code,
660 config.extensions.quarto_inline_code,
661 )
662 {
663 log::trace!(
664 "Skipping inline executable span of {} bytes at pos {}",
665 m.total_len,
666 pos
667 );
668 pos += m.total_len;
669 continue;
670 }
671
672 if bytes[pos] == b'`'
674 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
675 {
676 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
677 pos += len;
678 continue;
679 }
680
681 if bytes[pos] == b'$'
683 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
684 {
685 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
686 pos += len;
687 continue;
688 }
689
690 if bytes[pos] == b'['
692 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
693 {
694 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
695 pos += len;
696 continue;
697 }
698
699 if delim_count == 1
703 && pos + 2 <= text.len()
704 && bytes[pos] == delim_char as u8
705 && bytes[pos + 1] == delim_char as u8
706 {
707 let first_is_escaped = {
709 let mut backslash_count = 0;
710 let mut check_pos = pos;
711 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
712 backslash_count += 1;
713 check_pos -= 1;
714 }
715 backslash_count % 2 == 1
716 };
717
718 if first_is_escaped {
719 log::trace!(
722 "First * at pos {} is escaped, skipping to check second *",
723 pos
724 );
725 pos += 1;
726 continue;
727 }
728
729 let no_third_delim = pos + 2 >= bytes.len() || bytes[pos + 2] != delim_char as u8;
732
733 if no_third_delim {
734 log::trace!(
735 "try_parse_one: found ** at pos {}, attempting nested two",
736 pos
737 );
738
739 let mut temp_builder = GreenNodeBuilder::new();
742 if let Some(two_consumed) =
743 try_parse_two(text, pos, delim_char, end, config, &mut temp_builder)
744 {
745 log::debug!(
748 "Nested two succeeded, consumed {} bytes, continuing search",
749 two_consumed
750 );
751 pos += two_consumed;
752 continue;
753 }
754 log::trace!("Nested two failed at pos {}, entire one() should fail", pos);
760 return None;
761 }
762 }
763
764 if pos + delim_count <= text.len() {
766 let mut matches = true;
767 for i in 0..delim_count {
768 if bytes[pos + i] != delim_char as u8 {
769 matches = false;
770 break;
771 }
772 }
773
774 if matches {
775 let is_escaped = {
781 let mut backslash_count = 0;
782 let mut check_pos = pos;
783 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
784 backslash_count += 1;
785 check_pos -= 1;
786 }
787 backslash_count % 2 == 1 };
789
790 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
794 let after_pos = pos + delim_count;
795 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
796
797 if (at_run_start || at_run_end) && !is_escaped {
798 if delim_char == '_'
802 && pos > start
803 && let Some(prev_char) = text[..pos].chars().last()
804 && prev_char.is_whitespace()
805 {
806 log::trace!(
807 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
808 pos
809 );
810 pos += 1;
812 continue;
813 }
814
815 log::trace!(
816 "Found exact {} x {} closer at pos {}",
817 delim_char,
818 delim_count,
819 pos
820 );
821 return Some(pos);
822 }
823 }
824 }
825
826 pos += 1;
829 }
830
831 None
832}
833
834fn parse_until_closer_with_nested_one(
854 text: &str,
855 start: usize,
856 delim_char: char,
857 delim_count: usize,
858 end: usize,
859 config: &ParserOptions,
860) -> Option<usize> {
861 let bytes = text.as_bytes();
862 let mut pos = start;
863
864 while pos < end.min(text.len()) {
865 if bytes[pos] == b'`'
866 && let Some(m) = try_parse_inline_executable(
867 &text[pos..],
868 config.extensions.rmarkdown_inline_code,
869 config.extensions.quarto_inline_code,
870 )
871 {
872 log::trace!(
873 "Skipping inline executable span of {} bytes at pos {}",
874 m.total_len,
875 pos
876 );
877 pos += m.total_len;
878 continue;
879 }
880
881 if bytes[pos] == b'`'
883 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
884 {
885 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
886 pos += len;
887 continue;
888 }
889
890 if bytes[pos] == b'$'
892 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
893 {
894 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
895 pos += len;
896 continue;
897 }
898
899 if bytes[pos] == b'['
901 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
902 {
903 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
904 pos += len;
905 continue;
906 }
907
908 if delim_count == 2 && pos < text.len() && bytes[pos] == delim_char as u8 {
915 let no_second_delim = pos + 1 >= bytes.len() || bytes[pos + 1] != delim_char as u8;
918
919 if no_second_delim {
920 let is_escaped = {
922 let mut backslash_count = 0;
923 let mut check_pos = pos;
924 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
925 backslash_count += 1;
926 check_pos -= 1;
927 }
928 backslash_count % 2 == 1
929 };
930
931 if is_escaped {
932 log::trace!("* at pos {} is escaped, skipping", pos);
934 pos += 1;
935 continue;
936 }
937
938 let after_delim = pos + 1;
941 let followed_by_whitespace = after_delim < text.len()
942 && text[after_delim..]
943 .chars()
944 .next()
945 .is_some_and(|c| c.is_whitespace());
946
947 if followed_by_whitespace {
948 log::trace!(
950 "* at pos {} followed by whitespace, not an opener, skipping",
951 pos
952 );
953 pos += 1;
954 continue;
955 }
956
957 log::trace!(
958 "try_parse_two: found * at pos {}, attempting nested one",
959 pos
960 );
961
962 let mut temp_builder = GreenNodeBuilder::new();
965 if let Some(one_consumed) =
966 try_parse_one(text, pos, delim_char, end, config, &mut temp_builder)
967 {
968 log::debug!(
971 "Nested one succeeded, consumed {} bytes, continuing search",
972 one_consumed
973 );
974 pos += one_consumed;
975 continue;
976 }
977
978 log::debug!(
984 "Nested one failed at pos {}, poisoning outer two (no closer found)",
985 pos
986 );
987 return None;
988 }
989 }
990
991 if pos + delim_count <= text.len() {
993 let mut matches = true;
994 for i in 0..delim_count {
995 if bytes[pos + i] != delim_char as u8 {
996 matches = false;
997 break;
998 }
999 }
1000
1001 if matches {
1002 let is_escaped = {
1004 let mut backslash_count = 0;
1005 let mut check_pos = pos;
1006 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
1007 backslash_count += 1;
1008 check_pos -= 1;
1009 }
1010 backslash_count % 2 == 1 };
1012
1013 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
1017 let after_pos = pos + delim_count;
1018 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
1019
1020 if (at_run_start || at_run_end) && !is_escaped {
1021 if delim_char == '_'
1025 && pos > start
1026 && let Some(prev_char) = text[..pos].chars().last()
1027 && prev_char.is_whitespace()
1028 {
1029 log::trace!(
1030 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
1031 pos
1032 );
1033 pos += 1;
1035 continue;
1036 }
1037
1038 log::trace!(
1039 "Found exact {} x {} closer at pos {}",
1040 delim_char,
1041 delim_count,
1042 pos
1043 );
1044 return Some(pos);
1045 }
1046 }
1047 }
1048
1049 pos += 1;
1052 }
1053
1054 None
1055}
1056
1057fn parse_inline_range(
1074 text: &str,
1075 start: usize,
1076 end: usize,
1077 config: &ParserOptions,
1078 builder: &mut GreenNodeBuilder,
1079) {
1080 parse_inline_range_impl(text, start, end, config, builder, false)
1081}
1082
1083fn parse_inline_range_nested(
1086 text: &str,
1087 start: usize,
1088 end: usize,
1089 config: &ParserOptions,
1090 builder: &mut GreenNodeBuilder,
1091) {
1092 parse_inline_range_impl(text, start, end, config, builder, true)
1093}
1094
1095fn is_emoji_boundary(text: &str, pos: usize) -> bool {
1096 if pos > 0 {
1097 let prev = text.as_bytes()[pos - 1] as char;
1098 if prev.is_ascii_alphanumeric() || prev == '_' {
1099 return false;
1100 }
1101 }
1102 true
1103}
1104
1105fn parse_inline_range_impl(
1106 text: &str,
1107 start: usize,
1108 end: usize,
1109 config: &ParserOptions,
1110 builder: &mut GreenNodeBuilder,
1111 nested_emphasis: bool,
1112) {
1113 log::debug!(
1114 "parse_inline_range: start={}, end={}, text={:?}",
1115 start,
1116 end,
1117 &text[start..end]
1118 );
1119 let mut pos = start;
1120 let mut text_start = start;
1121
1122 while pos < end {
1123 let byte = text.as_bytes()[pos];
1124
1125 if byte == b'\\' {
1127 if config.extensions.tex_math_double_backslash {
1129 if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
1130 {
1131 if pos > text_start {
1132 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1133 }
1134 log::debug!("Matched double backslash display math at pos {}", pos);
1135 emit_double_backslash_display_math(builder, content);
1136 pos += len;
1137 text_start = pos;
1138 continue;
1139 }
1140
1141 if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
1143 if pos > text_start {
1144 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1145 }
1146 log::debug!("Matched double backslash inline math at pos {}", pos);
1147 emit_double_backslash_inline_math(builder, content);
1148 pos += len;
1149 text_start = pos;
1150 continue;
1151 }
1152 }
1153
1154 if config.extensions.tex_math_single_backslash {
1156 if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
1157 {
1158 if pos > text_start {
1159 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1160 }
1161 log::debug!("Matched single backslash display math at pos {}", pos);
1162 emit_single_backslash_display_math(builder, content);
1163 pos += len;
1164 text_start = pos;
1165 continue;
1166 }
1167
1168 if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
1170 if pos > text_start {
1171 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1172 }
1173 log::debug!("Matched single backslash inline math at pos {}", pos);
1174 emit_single_backslash_inline_math(builder, content);
1175 pos += len;
1176 text_start = pos;
1177 continue;
1178 }
1179 }
1180
1181 if config.extensions.raw_tex
1183 && let Some((len, begin_marker, content, end_marker)) =
1184 try_parse_math_environment(&text[pos..])
1185 {
1186 if pos > text_start {
1187 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1188 }
1189 log::debug!("Matched math environment at pos {}", pos);
1190 emit_display_math_environment(builder, begin_marker, content, end_marker);
1191 pos += len;
1192 text_start = pos;
1193 continue;
1194 }
1195
1196 if config.extensions.bookdown_references
1198 && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
1199 {
1200 if pos > text_start {
1201 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1202 }
1203 log::debug!("Matched bookdown reference at pos {}: {}", pos, label);
1204 super::citations::emit_bookdown_crossref(builder, label);
1205 pos += len;
1206 text_start = pos;
1207 continue;
1208 }
1209
1210 if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
1212 let escape_enabled = match escape_type {
1213 EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
1214 EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
1215 EscapeType::Literal => {
1216 const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!";
1217 BASE_ESCAPABLE.contains(ch) || config.extensions.all_symbols_escapable
1218 }
1219 };
1220 if !escape_enabled {
1221 pos += 1;
1224 continue;
1225 }
1226
1227 if pos > text_start {
1229 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1230 }
1231
1232 log::debug!("Matched escape at pos {}: \\{}", pos, ch);
1233 emit_escape(builder, ch, escape_type);
1234 pos += len;
1235 text_start = pos;
1236 continue;
1237 }
1238
1239 if config.extensions.raw_tex
1241 && let Some(len) = try_parse_latex_command(&text[pos..])
1242 {
1243 if pos > text_start {
1244 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1245 }
1246 log::debug!("Matched LaTeX command at pos {}", pos);
1247 parse_latex_command(builder, &text[pos..], len);
1248 pos += len;
1249 text_start = pos;
1250 continue;
1251 }
1252 }
1253
1254 if byte == b'{'
1256 && pos + 1 < text.len()
1257 && text.as_bytes()[pos + 1] == b'{'
1258 && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
1259 {
1260 if pos > text_start {
1261 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1262 }
1263 log::debug!("Matched shortcode at pos {}: {}", pos, &name);
1264 emit_shortcode(builder, &name, attrs);
1265 pos += len;
1266 text_start = pos;
1267 continue;
1268 }
1269
1270 if byte == b'`'
1272 && let Some(m) = try_parse_inline_executable(
1273 &text[pos..],
1274 config.extensions.rmarkdown_inline_code,
1275 config.extensions.quarto_inline_code,
1276 )
1277 {
1278 if pos > text_start {
1279 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1280 }
1281 log::debug!("Matched inline executable code at pos {}", pos);
1282 emit_inline_executable(builder, &m);
1283 pos += m.total_len;
1284 text_start = pos;
1285 continue;
1286 }
1287
1288 if byte == b'`'
1290 && let Some((len, content, backtick_count, attributes)) =
1291 try_parse_code_span(&text[pos..])
1292 {
1293 if pos > text_start {
1295 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1296 }
1297
1298 log::debug!(
1299 "Matched code span at pos {}: {} backticks",
1300 pos,
1301 backtick_count
1302 );
1303
1304 if let Some(ref attrs) = attributes
1306 && config.extensions.raw_attribute
1307 && let Some(format) = is_raw_inline(attrs)
1308 {
1309 use super::raw_inline::emit_raw_inline;
1310 log::debug!("Matched raw inline span at pos {}: format={}", pos, format);
1311 emit_raw_inline(builder, content, backtick_count, format);
1312 } else if !config.extensions.inline_code_attributes && attributes.is_some() {
1313 let code_span_len = backtick_count * 2 + content.len();
1314 emit_code_span(builder, content, backtick_count, None);
1315 pos += code_span_len;
1316 text_start = pos;
1317 continue;
1318 } else {
1319 emit_code_span(builder, content, backtick_count, attributes);
1320 }
1321
1322 pos += len;
1323 text_start = pos;
1324 continue;
1325 }
1326
1327 if byte == b':'
1329 && config.extensions.emoji
1330 && is_emoji_boundary(text, pos)
1331 && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1332 {
1333 if pos > text_start {
1334 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1335 }
1336 log::debug!("Matched emoji at pos {}", pos);
1337 emit_emoji(builder, &text[pos..pos + len]);
1338 pos += len;
1339 text_start = pos;
1340 continue;
1341 }
1342
1343 if byte == b'^'
1345 && pos + 1 < text.len()
1346 && text.as_bytes()[pos + 1] == b'['
1347 && config.extensions.inline_footnotes
1348 && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1349 {
1350 if pos > text_start {
1351 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1352 }
1353 log::debug!("Matched inline footnote at pos {}", pos);
1354 emit_inline_footnote(builder, content, config);
1355 pos += len;
1356 text_start = pos;
1357 continue;
1358 }
1359
1360 if byte == b'^'
1362 && config.extensions.superscript
1363 && let Some((len, content)) = try_parse_superscript(&text[pos..])
1364 {
1365 if pos > text_start {
1366 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1367 }
1368 log::debug!("Matched superscript at pos {}", pos);
1369 emit_superscript(builder, content, config);
1370 pos += len;
1371 text_start = pos;
1372 continue;
1373 }
1374
1375 if byte == b'(' && config.extensions.bookdown_references {
1377 if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1378 if pos > text_start {
1379 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1380 }
1381 log::debug!("Matched bookdown definition at pos {}: {}", pos, label);
1382 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1383 pos += len;
1384 text_start = pos;
1385 continue;
1386 }
1387 if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1388 if pos > text_start {
1389 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1390 }
1391 log::debug!("Matched bookdown text reference at pos {}: {}", pos, label);
1392 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1393 pos += len;
1394 text_start = pos;
1395 continue;
1396 }
1397 }
1398
1399 if byte == b'~'
1401 && config.extensions.subscript
1402 && let Some((len, content)) = try_parse_subscript(&text[pos..])
1403 {
1404 if pos > text_start {
1405 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1406 }
1407 log::debug!("Matched subscript at pos {}", pos);
1408 emit_subscript(builder, content, config);
1409 pos += len;
1410 text_start = pos;
1411 continue;
1412 }
1413
1414 if byte == b'~'
1416 && config.extensions.strikeout
1417 && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1418 {
1419 if pos > text_start {
1420 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1421 }
1422 log::debug!("Matched strikeout at pos {}", pos);
1423 emit_strikeout(builder, content, config);
1424 pos += len;
1425 text_start = pos;
1426 continue;
1427 }
1428
1429 if byte == b'='
1431 && config.extensions.mark
1432 && let Some((len, content)) = try_parse_mark(&text[pos..])
1433 {
1434 if pos > text_start {
1435 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1436 }
1437 log::debug!("Matched mark at pos {}", pos);
1438 emit_mark(builder, content, config);
1439 pos += len;
1440 text_start = pos;
1441 continue;
1442 }
1443
1444 if byte == b'$'
1446 && config.extensions.tex_math_gfm
1447 && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1448 {
1449 if pos > text_start {
1450 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1451 }
1452 log::debug!("Matched GFM inline math at pos {}", pos);
1453 emit_gfm_inline_math(builder, content);
1454 pos += len;
1455 text_start = pos;
1456 continue;
1457 }
1458
1459 if byte == b'$' && config.extensions.tex_math_dollars {
1461 if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1463 if pos > text_start {
1465 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1466 }
1467
1468 let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1469 log::debug!(
1470 "Matched display math at pos {}: {} dollars",
1471 pos,
1472 dollar_count
1473 );
1474
1475 let after_math = &text[pos + len..];
1477 let attr_len = if config.extensions.quarto_crossrefs {
1478 use crate::parser::utils::attributes::try_parse_trailing_attributes;
1479 if let Some((_attr_block, _)) = try_parse_trailing_attributes(after_math) {
1480 let trimmed_after = after_math.trim_start();
1481 if let Some(open_brace_pos) = trimmed_after.find('{') {
1482 let ws_before_brace = after_math.len() - trimmed_after.len();
1483 let attr_text_len = trimmed_after[open_brace_pos..]
1484 .find('}')
1485 .map(|close| close + 1)
1486 .unwrap_or(0);
1487 ws_before_brace + open_brace_pos + attr_text_len
1488 } else {
1489 0
1490 }
1491 } else {
1492 0
1493 }
1494 } else {
1495 0
1496 };
1497
1498 let total_len = len + attr_len;
1499 emit_display_math(builder, content, dollar_count);
1500
1501 if attr_len > 0 {
1503 use crate::parser::utils::attributes::{
1504 emit_attributes, try_parse_trailing_attributes,
1505 };
1506 let attr_text = &text[pos + len..pos + total_len];
1507 if let Some((attr_block, _text_before)) =
1508 try_parse_trailing_attributes(attr_text)
1509 {
1510 let trimmed_after = attr_text.trim_start();
1511 let ws_len = attr_text.len() - trimmed_after.len();
1512 if ws_len > 0 {
1513 builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1514 }
1515 emit_attributes(builder, &attr_block);
1516 }
1517 }
1518
1519 pos += total_len;
1520 text_start = pos;
1521 continue;
1522 }
1523
1524 if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1526 if pos > text_start {
1528 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1529 }
1530
1531 log::debug!("Matched inline math at pos {}", pos);
1532 emit_inline_math(builder, content);
1533 pos += len;
1534 text_start = pos;
1535 continue;
1536 }
1537
1538 if pos > text_start {
1541 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1542 }
1543 builder.token(SyntaxKind::TEXT.into(), "$");
1544 pos += 1;
1545 text_start = pos;
1546 continue;
1547 }
1548
1549 if byte == b'<'
1551 && config.extensions.autolinks
1552 && let Some((len, url)) = try_parse_autolink(&text[pos..])
1553 {
1554 if pos > text_start {
1555 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1556 }
1557 log::debug!("Matched autolink at pos {}", pos);
1558 emit_autolink(builder, &text[pos..pos + len], url);
1559 pos += len;
1560 text_start = pos;
1561 continue;
1562 }
1563
1564 if config.extensions.autolink_bare_uris
1565 && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1566 {
1567 if pos > text_start {
1568 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1569 }
1570 log::debug!("Matched bare URI at pos {}", pos);
1571 emit_bare_uri_link(builder, url, config);
1572 pos += len;
1573 text_start = pos;
1574 continue;
1575 }
1576
1577 if byte == b'<'
1579 && config.extensions.native_spans
1580 && let Some((len, content, attributes)) = try_parse_native_span(&text[pos..])
1581 {
1582 if pos > text_start {
1583 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1584 }
1585 log::debug!("Matched native span at pos {}", pos);
1586 emit_native_span(builder, content, &attributes, config);
1587 pos += len;
1588 text_start = pos;
1589 continue;
1590 }
1591
1592 if byte == b'!' && pos + 1 < text.len() && text.as_bytes()[pos + 1] == b'[' {
1594 if let Some((len, alt_text, dest, attributes)) = try_parse_inline_image(&text[pos..]) {
1596 if pos > text_start {
1597 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1598 }
1599 log::debug!("Matched inline image at pos {}", pos);
1600 emit_inline_image(
1601 builder,
1602 &text[pos..pos + len],
1603 alt_text,
1604 dest,
1605 attributes,
1606 config,
1607 );
1608 pos += len;
1609 text_start = pos;
1610 continue;
1611 }
1612
1613 if config.extensions.reference_links {
1615 let allow_shortcut = config.extensions.shortcut_reference_links;
1616 if let Some((len, alt_text, reference, is_implicit)) =
1617 try_parse_reference_image(&text[pos..], allow_shortcut)
1618 {
1619 if pos > text_start {
1620 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1621 }
1622 log::debug!("Matched reference image at pos {}", pos);
1623 emit_reference_image(builder, alt_text, &reference, is_implicit, config);
1624 pos += len;
1625 text_start = pos;
1626 continue;
1627 }
1628 }
1629 }
1630
1631 if byte == b'[' {
1633 if config.extensions.footnotes
1635 && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1636 {
1637 if pos > text_start {
1638 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1639 }
1640 log::debug!("Matched footnote reference at pos {}", pos);
1641 emit_footnote_reference(builder, &id);
1642 pos += len;
1643 text_start = pos;
1644 continue;
1645 }
1646
1647 if config.extensions.inline_links
1649 && let Some((len, link_text, dest, attributes)) =
1650 try_parse_inline_link(&text[pos..])
1651 {
1652 if pos > text_start {
1653 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1654 }
1655 log::debug!("Matched inline link at pos {}", pos);
1656 emit_inline_link(
1657 builder,
1658 &text[pos..pos + len],
1659 link_text,
1660 dest,
1661 attributes,
1662 config,
1663 );
1664 pos += len;
1665 text_start = pos;
1666 continue;
1667 }
1668
1669 if config.extensions.reference_links {
1671 let allow_shortcut = config.extensions.shortcut_reference_links;
1672 if let Some((len, link_text, reference, is_implicit)) =
1673 try_parse_reference_link(&text[pos..], allow_shortcut)
1674 {
1675 if pos > text_start {
1676 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1677 }
1678 log::debug!("Matched reference link at pos {}", pos);
1679 emit_reference_link(builder, link_text, &reference, is_implicit, config);
1680 pos += len;
1681 text_start = pos;
1682 continue;
1683 }
1684 }
1685
1686 if config.extensions.citations
1688 && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1689 {
1690 if pos > text_start {
1691 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1692 }
1693 log::debug!("Matched bracketed citation at pos {}", pos);
1694 emit_bracketed_citation(builder, content);
1695 pos += len;
1696 text_start = pos;
1697 continue;
1698 }
1699 }
1700
1701 if byte == b'['
1704 && config.extensions.bracketed_spans
1705 && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1706 {
1707 if pos > text_start {
1708 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1709 }
1710 log::debug!("Matched bracketed span at pos {}", pos);
1711 emit_bracketed_span(builder, &text_content, &attrs, config);
1712 pos += len;
1713 text_start = pos;
1714 continue;
1715 }
1716
1717 if byte == b'@'
1719 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1720 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1721 {
1722 let is_crossref =
1723 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1724 if is_crossref || config.extensions.citations {
1725 if pos > text_start {
1726 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1727 }
1728 if is_crossref {
1729 log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1730 super::citations::emit_crossref(builder, key, has_suppress);
1731 } else {
1732 log::debug!("Matched bare citation at pos {}: {}", pos, &key);
1733 emit_bare_citation(builder, key, has_suppress);
1734 }
1735 pos += len;
1736 text_start = pos;
1737 continue;
1738 }
1739 }
1740
1741 if byte == b'-'
1743 && pos + 1 < text.len()
1744 && text.as_bytes()[pos + 1] == b'@'
1745 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1746 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1747 {
1748 let is_crossref =
1749 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1750 if is_crossref || config.extensions.citations {
1751 if pos > text_start {
1752 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1753 }
1754 if is_crossref {
1755 log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1756 super::citations::emit_crossref(builder, key, has_suppress);
1757 } else {
1758 log::debug!("Matched suppress-author citation at pos {}: {}", pos, &key);
1759 emit_bare_citation(builder, key, has_suppress);
1760 }
1761 pos += len;
1762 text_start = pos;
1763 continue;
1764 }
1765 }
1766
1767 if byte == b'*' || byte == b'_' {
1769 let bytes = text.as_bytes();
1771 let mut delim_count = 0;
1772 while pos + delim_count < bytes.len() && bytes[pos + delim_count] == byte {
1773 delim_count += 1;
1774 }
1775
1776 if pos > text_start {
1778 log::debug!(
1779 "Emitting TEXT before delimiter: {:?}",
1780 &text[text_start..pos]
1781 );
1782 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1783 text_start = pos; }
1785
1786 let emphasis_result = if nested_emphasis {
1789 try_parse_emphasis_nested(text, pos, end, config, builder)
1790 } else {
1791 try_parse_emphasis(text, pos, end, config, builder)
1792 };
1793
1794 if let Some((consumed, _)) = emphasis_result {
1795 log::debug!(
1797 "Parsed emphasis, consumed {} bytes from pos {}",
1798 consumed,
1799 pos
1800 );
1801 pos += consumed;
1802 text_start = pos;
1803 } else {
1804 log::debug!(
1807 "Failed to parse emphasis at pos {}, skipping {} delimiters as literal",
1808 pos,
1809 delim_count
1810 );
1811 pos += delim_count;
1812 }
1814 continue;
1815 }
1816
1817 if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1819 let text_before = &text[text_start..pos];
1820
1821 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1823 if trailing_spaces >= 2 {
1824 let text_content = &text_before[..text_before.len() - trailing_spaces];
1826 if !text_content.is_empty() {
1827 builder.token(SyntaxKind::TEXT.into(), text_content);
1828 }
1829 let spaces = " ".repeat(trailing_spaces);
1830 builder.token(
1831 SyntaxKind::HARD_LINE_BREAK.into(),
1832 &format!("{}\r\n", spaces),
1833 );
1834 pos += 2;
1835 text_start = pos;
1836 continue;
1837 }
1838
1839 if config.extensions.hard_line_breaks {
1841 if !text_before.is_empty() {
1842 builder.token(SyntaxKind::TEXT.into(), text_before);
1843 }
1844 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1845 pos += 2;
1846 text_start = pos;
1847 continue;
1848 }
1849
1850 if !text_before.is_empty() {
1852 builder.token(SyntaxKind::TEXT.into(), text_before);
1853 }
1854 builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1855 pos += 2;
1856 text_start = pos;
1857 continue;
1858 }
1859
1860 if byte == b'\n' {
1861 let text_before = &text[text_start..pos];
1862
1863 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1865 if trailing_spaces >= 2 {
1866 let text_content = &text_before[..text_before.len() - trailing_spaces];
1868 if !text_content.is_empty() {
1869 builder.token(SyntaxKind::TEXT.into(), text_content);
1870 }
1871 let spaces = " ".repeat(trailing_spaces);
1872 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1873 pos += 1;
1874 text_start = pos;
1875 continue;
1876 }
1877
1878 if config.extensions.hard_line_breaks {
1880 if !text_before.is_empty() {
1881 builder.token(SyntaxKind::TEXT.into(), text_before);
1882 }
1883 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1884 pos += 1;
1885 text_start = pos;
1886 continue;
1887 }
1888
1889 if !text_before.is_empty() {
1891 builder.token(SyntaxKind::TEXT.into(), text_before);
1892 }
1893 builder.token(SyntaxKind::NEWLINE.into(), "\n");
1894 pos += 1;
1895 text_start = pos;
1896 continue;
1897 }
1898
1899 pos += 1;
1901 }
1902
1903 if pos > text_start && text_start < end {
1905 log::debug!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1906 builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1907 }
1908
1909 log::debug!("parse_inline_range complete: start={}, end={}", start, end);
1910}
1911
1912#[cfg(test)]
1913mod tests {
1914 use super::*;
1915 use crate::syntax::{SyntaxKind, SyntaxNode};
1916 use rowan::GreenNode;
1917
1918 #[test]
1919 fn test_recursive_simple_emphasis() {
1920 let text = "*test*";
1921 let config = ParserOptions::default();
1922 let mut builder = GreenNodeBuilder::new();
1923
1924 parse_inline_text_recursive(&mut builder, text, &config);
1925
1926 let green: GreenNode = builder.finish();
1927 let node = SyntaxNode::new_root(green);
1928
1929 assert_eq!(node.text().to_string(), text);
1931
1932 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1934 assert!(has_emph, "Should have EMPHASIS node");
1935 }
1936
1937 #[test]
1938 fn test_recursive_nested() {
1939 let text = "*foo **bar** baz*";
1940 let config = ParserOptions::default();
1941 let mut builder = GreenNodeBuilder::new();
1942
1943 builder.start_node(SyntaxKind::PARAGRAPH.into());
1945 parse_inline_text_recursive(&mut builder, text, &config);
1946 builder.finish_node();
1947
1948 let green: GreenNode = builder.finish();
1949 let node = SyntaxNode::new_root(green);
1950
1951 assert_eq!(node.text().to_string(), text);
1953
1954 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1956 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1957
1958 assert!(has_emph, "Should have EMPHASIS node");
1959 assert!(has_strong, "Should have STRONG node");
1960 }
1961
1962 #[test]
1964 fn test_parse_simple_emphasis() {
1965 use crate::options::ParserOptions;
1966 use crate::syntax::SyntaxNode;
1967 use rowan::GreenNode;
1968
1969 let text = "*test*";
1970 let config = ParserOptions::default();
1971 let mut builder = GreenNodeBuilder::new();
1972
1973 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
1975
1976 assert_eq!(result, Some((6, 1))); let green: GreenNode = builder.finish();
1981 let node = SyntaxNode::new_root(green);
1982
1983 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
1985
1986 assert_eq!(node.text().to_string(), text);
1988 }
1989
1990 #[test]
1992 fn test_parse_nested_emphasis_strong() {
1993 use crate::options::ParserOptions;
1994
1995 let text = "*foo **bar** baz*";
1996 let config = ParserOptions::default();
1997 let mut builder = GreenNodeBuilder::new();
1998
1999 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2001
2002 let green = builder.finish();
2003 let node = crate::syntax::SyntaxNode::new_root(green);
2004
2005 assert_eq!(node.text().to_string(), text);
2007
2008 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
2010 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2011
2012 assert!(has_emph, "Should have EMPHASIS node");
2013 assert!(has_strong, "Should have STRONG node");
2014 }
2015
2016 #[test]
2020 fn test_triple_emphasis_star_then_double_star() {
2021 use crate::options::ParserOptions;
2022 use crate::syntax::SyntaxNode;
2023 use rowan::GreenNode;
2024
2025 let text = "***foo* bar**";
2026 let config = ParserOptions::default();
2027 let mut builder = GreenNodeBuilder::new();
2028
2029 builder.start_node(SyntaxKind::DOCUMENT.into());
2030 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2031 builder.finish_node();
2032
2033 let green: GreenNode = builder.finish();
2034 let node = SyntaxNode::new_root(green);
2035
2036 assert_eq!(node.text().to_string(), text);
2038
2039 let structure = format!("{:#?}", node);
2042
2043 assert!(structure.contains("STRONG"), "Should have STRONG node");
2045 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2046
2047 let mut found_strong = false;
2050 let mut found_emph_after_strong = false;
2051 for descendant in node.descendants() {
2052 if descendant.kind() == SyntaxKind::STRONG {
2053 found_strong = true;
2054 }
2055 if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
2056 found_emph_after_strong = true;
2057 break;
2058 }
2059 }
2060
2061 assert!(
2062 found_emph_after_strong,
2063 "EMPH should be inside STRONG, not before it. Current structure:\n{}",
2064 structure
2065 );
2066 }
2067
2068 #[test]
2071 fn test_triple_emphasis_double_star_then_star() {
2072 use crate::options::ParserOptions;
2073 use crate::syntax::SyntaxNode;
2074 use rowan::GreenNode;
2075
2076 let text = "***foo** bar*";
2077 let config = ParserOptions::default();
2078 let mut builder = GreenNodeBuilder::new();
2079
2080 builder.start_node(SyntaxKind::DOCUMENT.into());
2081 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2082 builder.finish_node();
2083
2084 let green: GreenNode = builder.finish();
2085 let node = SyntaxNode::new_root(green);
2086
2087 assert_eq!(node.text().to_string(), text);
2089
2090 let structure = format!("{:#?}", node);
2092
2093 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2095 assert!(structure.contains("STRONG"), "Should have STRONG node");
2096
2097 let mut found_emph = false;
2099 let mut found_strong_after_emph = false;
2100 for descendant in node.descendants() {
2101 if descendant.kind() == SyntaxKind::EMPHASIS {
2102 found_emph = true;
2103 }
2104 if found_emph && descendant.kind() == SyntaxKind::STRONG {
2105 found_strong_after_emph = true;
2106 break;
2107 }
2108 }
2109
2110 assert!(
2111 found_strong_after_emph,
2112 "STRONG should be inside EMPH. Current structure:\n{}",
2113 structure
2114 );
2115 }
2116
2117 #[test]
2120 fn test_display_math_with_attributes() {
2121 use crate::options::ParserOptions;
2122 use crate::syntax::SyntaxNode;
2123 use rowan::GreenNode;
2124
2125 let text = "$$ E = mc^2 $$ {#eq-einstein}";
2126 let mut config = ParserOptions::default();
2127 config.extensions.quarto_crossrefs = true; let mut builder = GreenNodeBuilder::new();
2130 builder.start_node(SyntaxKind::DOCUMENT.into()); parse_inline_text_recursive(&mut builder, text, &config);
2134
2135 builder.finish_node(); let green: GreenNode = builder.finish();
2137 let node = SyntaxNode::new_root(green);
2138
2139 assert_eq!(node.text().to_string(), text);
2141
2142 let has_display_math = node
2144 .descendants()
2145 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
2146 assert!(has_display_math, "Should have DISPLAY_MATH node");
2147
2148 let has_attributes = node
2150 .descendants()
2151 .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
2152 assert!(
2153 has_attributes,
2154 "Should have ATTRIBUTE node for {{#eq-einstein}}"
2155 );
2156
2157 let math_followed_by_text = node.descendants().any(|n| {
2159 n.kind() == SyntaxKind::DISPLAY_MATH
2160 && n.next_sibling()
2161 .map(|s| {
2162 s.kind() == SyntaxKind::TEXT
2163 && s.text().to_string().contains("{#eq-einstein}")
2164 })
2165 .unwrap_or(false)
2166 });
2167 assert!(
2168 !math_followed_by_text,
2169 "Attributes should not be parsed as TEXT"
2170 );
2171 }
2172}
2173
2174#[test]
2175fn test_two_with_nested_one_and_triple_closer() {
2176 use crate::options::ParserOptions;
2181 use crate::syntax::SyntaxNode;
2182 use rowan::GreenNode;
2183
2184 let text = "**bold with *italic***";
2185 let config = ParserOptions::default();
2186 let mut builder = GreenNodeBuilder::new();
2187
2188 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2190
2191 let green: GreenNode = builder.finish();
2192 let node = SyntaxNode::new_root(green);
2193
2194 assert_eq!(node.text().to_string(), text, "Should be lossless");
2196
2197 assert_eq!(
2199 node.kind(),
2200 SyntaxKind::STRONG,
2201 "Root should be STRONG, got: {:?}",
2202 node.kind()
2203 );
2204
2205 let has_emphasis = node.children().any(|c| c.kind() == SyntaxKind::EMPHASIS);
2207 assert!(has_emphasis, "STRONG should contain EMPHASIS node");
2208}
2209
2210#[test]
2211fn test_emphasis_with_trailing_space_before_closer() {
2212 use crate::options::ParserOptions;
2216 use crate::syntax::SyntaxNode;
2217 use rowan::GreenNode;
2218
2219 let text = "*foo *";
2220 let config = ParserOptions::default();
2221 let mut builder = GreenNodeBuilder::new();
2222
2223 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2225
2226 assert_eq!(
2228 result,
2229 Some((6, 1)),
2230 "Should parse as emphasis, result: {:?}",
2231 result
2232 );
2233
2234 let green: GreenNode = builder.finish();
2236 let node = SyntaxNode::new_root(green);
2237
2238 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2240
2241 assert_eq!(node.text().to_string(), text);
2243}
2244
2245#[test]
2246fn test_triple_emphasis_all_strong_nested() {
2247 use crate::options::ParserOptions;
2251 use crate::syntax::SyntaxNode;
2252 use rowan::GreenNode;
2253
2254 let text = "***foo** bar **baz***";
2255 let config = ParserOptions::default();
2256 let mut builder = GreenNodeBuilder::new();
2257
2258 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2259
2260 let green: GreenNode = builder.finish();
2261 let node = SyntaxNode::new_root(green);
2262
2263 let emphasis_nodes: Vec<_> = node
2265 .descendants()
2266 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2267 .collect();
2268 assert_eq!(
2269 emphasis_nodes.len(),
2270 1,
2271 "Should have exactly one EMPHASIS node, found: {}",
2272 emphasis_nodes.len()
2273 );
2274
2275 let emphasis_node = emphasis_nodes[0].clone();
2277 let strong_in_emphasis: Vec<_> = emphasis_node
2278 .children()
2279 .filter(|n| n.kind() == SyntaxKind::STRONG)
2280 .collect();
2281 assert_eq!(
2282 strong_in_emphasis.len(),
2283 2,
2284 "EMPHASIS should contain two STRONG nodes, found: {}",
2285 strong_in_emphasis.len()
2286 );
2287
2288 assert_eq!(node.text().to_string(), text);
2290}
2291
2292#[test]
2293fn test_triple_emphasis_all_emph_nested() {
2294 use crate::options::ParserOptions;
2298 use crate::syntax::SyntaxNode;
2299 use rowan::GreenNode;
2300
2301 let text = "***foo* bar *baz***";
2302 let config = ParserOptions::default();
2303 let mut builder = GreenNodeBuilder::new();
2304
2305 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2306
2307 let green: GreenNode = builder.finish();
2308 let node = SyntaxNode::new_root(green);
2309
2310 let strong_nodes: Vec<_> = node
2312 .descendants()
2313 .filter(|n| n.kind() == SyntaxKind::STRONG)
2314 .collect();
2315 assert_eq!(
2316 strong_nodes.len(),
2317 1,
2318 "Should have exactly one STRONG node, found: {}",
2319 strong_nodes.len()
2320 );
2321
2322 let strong_node = strong_nodes[0].clone();
2324 let emph_in_strong: Vec<_> = strong_node
2325 .children()
2326 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2327 .collect();
2328 assert_eq!(
2329 emph_in_strong.len(),
2330 2,
2331 "STRONG should contain two EMPHASIS nodes, found: {}",
2332 emph_in_strong.len()
2333 );
2334
2335 assert_eq!(node.text().to_string(), text);
2337}
2338
2339#[test]
2341fn test_parse_emphasis_multiline() {
2342 use crate::options::ParserOptions;
2344 use crate::syntax::SyntaxNode;
2345 use rowan::GreenNode;
2346
2347 let text = "*text on\nline two*";
2348 let config = ParserOptions::default();
2349 let mut builder = GreenNodeBuilder::new();
2350
2351 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2352
2353 assert_eq!(
2355 result,
2356 Some((text.len(), 1)),
2357 "Emphasis should parse multiline content"
2358 );
2359
2360 let green: GreenNode = builder.finish();
2362 let node = SyntaxNode::new_root(green);
2363
2364 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2366
2367 assert_eq!(node.text().to_string(), text);
2369 assert!(
2370 node.text().to_string().contains('\n'),
2371 "Should preserve newline in emphasis content"
2372 );
2373}
2374
2375#[test]
2376fn test_parse_strong_multiline() {
2377 use crate::options::ParserOptions;
2379 use crate::syntax::SyntaxNode;
2380 use rowan::GreenNode;
2381
2382 let text = "**strong on\nline two**";
2383 let config = ParserOptions::default();
2384 let mut builder = GreenNodeBuilder::new();
2385
2386 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2387
2388 assert_eq!(
2390 result,
2391 Some((text.len(), 2)),
2392 "Strong emphasis should parse multiline content"
2393 );
2394
2395 let green: GreenNode = builder.finish();
2397 let node = SyntaxNode::new_root(green);
2398
2399 assert_eq!(node.kind(), SyntaxKind::STRONG);
2401
2402 assert_eq!(node.text().to_string(), text);
2404 assert!(
2405 node.text().to_string().contains('\n'),
2406 "Should preserve newline in strong content"
2407 );
2408}
2409
2410#[test]
2411fn test_parse_triple_emphasis_multiline() {
2412 use crate::options::ParserOptions;
2414 use crate::syntax::SyntaxNode;
2415 use rowan::GreenNode;
2416
2417 let text = "***both on\nline two***";
2418 let config = ParserOptions::default();
2419 let mut builder = GreenNodeBuilder::new();
2420
2421 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2422
2423 assert_eq!(
2425 result,
2426 Some((text.len(), 3)),
2427 "Triple emphasis should parse multiline content"
2428 );
2429
2430 let green: GreenNode = builder.finish();
2432 let node = SyntaxNode::new_root(green);
2433
2434 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2436 assert!(has_strong, "Should have STRONG node");
2437
2438 assert_eq!(node.text().to_string(), text);
2440 assert!(
2441 node.text().to_string().contains('\n'),
2442 "Should preserve newline in triple emphasis content"
2443 );
2444}