1use crate::config::Config;
24use crate::syntax::SyntaxKind;
25use rowan::GreenNodeBuilder;
26
27use super::bookdown::{
29 try_parse_bookdown_definition, try_parse_bookdown_reference, try_parse_bookdown_text_reference,
30};
31use super::bracketed_spans::{emit_bracketed_span, try_parse_bracketed_span};
32use super::citations::{
33 emit_bare_citation, emit_bracketed_citation, try_parse_bare_citation,
34 try_parse_bracketed_citation,
35};
36use super::code_spans::{emit_code_span, try_parse_code_span};
37use super::emoji::{emit_emoji, try_parse_emoji};
38use super::escapes::{EscapeType, emit_escape, try_parse_escape};
39use super::inline_executable::{emit_inline_executable, try_parse_inline_executable};
40use super::inline_footnotes::{
41 emit_footnote_reference, emit_inline_footnote, try_parse_footnote_reference,
42 try_parse_inline_footnote,
43};
44use super::latex::{parse_latex_command, try_parse_latex_command};
45use super::links::{
46 emit_autolink, emit_bare_uri_link, emit_inline_image, emit_inline_link, emit_reference_image,
47 emit_reference_link, try_parse_autolink, try_parse_bare_uri, try_parse_inline_image,
48 try_parse_inline_link, try_parse_reference_image, try_parse_reference_link,
49};
50use super::math::{
51 emit_display_math, emit_display_math_environment, emit_double_backslash_display_math,
52 emit_double_backslash_inline_math, emit_gfm_inline_math, emit_inline_math,
53 emit_single_backslash_display_math, emit_single_backslash_inline_math, try_parse_display_math,
54 try_parse_double_backslash_display_math, try_parse_double_backslash_inline_math,
55 try_parse_gfm_inline_math, try_parse_inline_math, try_parse_math_environment,
56 try_parse_single_backslash_display_math, try_parse_single_backslash_inline_math,
57};
58use super::native_spans::{emit_native_span, try_parse_native_span};
59use super::raw_inline::is_raw_inline;
60use super::shortcodes::{emit_shortcode, try_parse_shortcode};
61use super::strikeout::{emit_strikeout, try_parse_strikeout};
62use super::subscript::{emit_subscript, try_parse_subscript};
63use super::superscript::{emit_superscript, try_parse_superscript};
64
65pub fn parse_inline_text_recursive(builder: &mut GreenNodeBuilder, text: &str, config: &Config) {
82 log::debug!(
83 "Recursive inline parsing: {:?} ({} bytes)",
84 &text[..text.len().min(40)],
85 text.len()
86 );
87
88 parse_inline_range(text, 0, text.len(), config, builder);
89
90 log::debug!("Recursive inline parsing complete");
91}
92
93pub fn parse_inline_text(
99 builder: &mut GreenNodeBuilder,
100 text: &str,
101 config: &Config,
102 _allow_reference_links: bool,
103) {
104 log::trace!(
105 "Parsing inline text (recursive): {:?} ({} bytes)",
106 &text[..text.len().min(40)],
107 text.len()
108 );
109
110 parse_inline_text_recursive(builder, text, config);
112}
113
114pub fn try_parse_emphasis(
139 text: &str,
140 pos: usize,
141 end: usize,
142 config: &Config,
143 builder: &mut GreenNodeBuilder,
144) -> Option<(usize, usize)> {
145 let bytes = text.as_bytes();
146
147 if pos >= bytes.len() {
148 return None;
149 }
150
151 let delim_char = bytes[pos] as char;
152 if delim_char != '*' && delim_char != '_' {
153 return None;
154 }
155
156 let mut count = 0;
158 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
159 count += 1;
160 }
161
162 let after_pos = pos + count;
163
164 log::debug!(
165 "try_parse_emphasis: '{}' x {} at pos {}",
166 delim_char,
167 count,
168 pos
169 );
170
171 if after_pos < text.len()
173 && let Some(next_char) = text[after_pos..].chars().next()
174 && next_char.is_whitespace()
175 {
176 log::trace!("Delimiter followed by whitespace, treating as literal");
177 return None;
178 }
179
180 if delim_char == '_'
183 && pos > 0
184 && let Some(prev_char) = text[..pos].chars().last()
185 && prev_char.is_alphanumeric()
186 {
187 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
188 return None;
189 }
190
191 let result = match count {
193 1 => try_parse_one(text, pos, delim_char, end, config, builder),
194 2 => try_parse_two(text, pos, delim_char, end, config, builder),
195 3 => try_parse_three(text, pos, delim_char, end, config, builder),
196 _ => {
197 log::trace!("{} delimiters (4+), treating as literal", count);
199 None
200 }
201 };
202
203 result.map(|consumed| (consumed, count))
206}
207
208fn try_parse_emphasis_nested(
217 text: &str,
218 pos: usize,
219 end: usize,
220 config: &Config,
221 builder: &mut GreenNodeBuilder,
222) -> Option<(usize, usize)> {
223 let bytes = text.as_bytes();
224
225 if pos >= bytes.len() {
226 return None;
227 }
228
229 let delim_char = bytes[pos] as char;
230 if delim_char != '*' && delim_char != '_' {
231 return None;
232 }
233
234 let mut count = 0;
236 while pos + count < bytes.len() && bytes[pos + count] == bytes[pos] {
237 count += 1;
238 }
239
240 log::debug!(
241 "try_parse_emphasis_nested: '{}' x {} at pos {}",
242 delim_char,
243 count,
244 pos
245 );
246
247 if delim_char == '_'
250 && pos > 0
251 && let Some(prev_char) = text[..pos].chars().last()
252 && prev_char.is_alphanumeric()
253 {
254 log::trace!("Underscore preceded by alphanumeric, can't open (intraword)");
255 return None;
256 }
257
258 let result = match count {
264 1 => try_parse_one(text, pos, delim_char, end, config, builder),
265 2 => try_parse_two(text, pos, delim_char, end, config, builder),
266 3 => try_parse_three(text, pos, delim_char, end, config, builder),
267 _ => {
268 log::trace!("{} delimiters (4+), treating as literal", count);
270 None
271 }
272 };
273
274 result.map(|consumed| (consumed, count))
275}
276
277fn try_parse_three(
282 text: &str,
283 pos: usize,
284 delim_char: char,
285 end: usize,
286 config: &Config,
287 builder: &mut GreenNodeBuilder,
288) -> Option<usize> {
289 let content_start = pos + 3;
290 let one = delim_char.to_string();
291 let two = one.repeat(2);
292
293 log::debug!("try_parse_three: '{}' x 3 at pos {}", delim_char, pos);
294
295 let mut search_pos = content_start;
299
300 loop {
301 let closer_start = match find_first_potential_ender(text, search_pos, delim_char, end) {
303 Some(p) => p,
304 None => {
305 log::trace!("No potential ender found for ***");
306 return None;
307 }
308 };
309
310 log::debug!("Potential ender at pos {}", closer_start);
311
312 let bytes = text.as_bytes();
314 let mut closer_count = 0;
315 let mut check_pos = closer_start;
316 while check_pos < bytes.len() && bytes[check_pos] == delim_char as u8 {
317 closer_count += 1;
318 check_pos += 1;
319 }
320
321 log::debug!(
322 "Found {} x {} at pos {}",
323 delim_char,
324 closer_count,
325 closer_start
326 );
327
328 if closer_count >= 3 && is_valid_ender(text, closer_start, delim_char, 3) {
332 log::debug!("Matched *** closer, emitting Strong[Emph[content]]");
333
334 builder.start_node(SyntaxKind::STRONG.into());
335 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
336
337 builder.start_node(SyntaxKind::EMPHASIS.into());
338 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
339 parse_inline_range_nested(text, content_start, closer_start, config, builder);
340 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
341 builder.finish_node(); builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
344 builder.finish_node(); return Some(closer_start + 3 - pos);
347 }
348
349 if closer_count >= 2 && is_valid_ender(text, closer_start, delim_char, 2) {
351 log::debug!("Matched ** closer, wrapping as Strong and continuing with one");
352
353 let continue_pos = closer_start + 2;
354
355 if let Some(final_closer_pos) =
356 parse_until_closer_with_nested_two(text, continue_pos, delim_char, 1, end, config)
357 {
358 log::debug!(
359 "Found * closer at pos {}, emitting Emph[Strong[...], ...]",
360 final_closer_pos
361 );
362
363 builder.start_node(SyntaxKind::EMPHASIS.into());
364 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
365
366 builder.start_node(SyntaxKind::STRONG.into());
367 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
368 parse_inline_range_nested(text, content_start, closer_start, config, builder);
369 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
370 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
374
375 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
376 builder.finish_node(); return Some(final_closer_pos + 1 - pos);
379 }
380
381 log::debug!("No * closer found after **, emitting * + STRONG");
383 builder.token(SyntaxKind::TEXT.into(), &one);
384
385 builder.start_node(SyntaxKind::STRONG.into());
386 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
387 parse_inline_range_nested(text, content_start, closer_start, config, builder);
388 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
389 builder.finish_node(); return Some(closer_start + 2 - pos);
392 }
393
394 if closer_count >= 1 && is_valid_ender(text, closer_start, delim_char, 1) {
396 log::debug!("Matched * closer, wrapping as Emph and continuing with two");
397
398 let continue_pos = closer_start + 1;
399
400 if let Some(final_closer_pos) =
401 parse_until_closer_with_nested_one(text, continue_pos, delim_char, 2, end, config)
402 {
403 log::debug!(
404 "Found ** closer at pos {}, emitting Strong[Emph[...], ...]",
405 final_closer_pos
406 );
407
408 builder.start_node(SyntaxKind::STRONG.into());
409 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
410
411 builder.start_node(SyntaxKind::EMPHASIS.into());
412 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
413 parse_inline_range_nested(text, content_start, closer_start, config, builder);
414 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
415 builder.finish_node(); parse_inline_range_nested(text, continue_pos, final_closer_pos, config, builder);
418
419 builder.token(SyntaxKind::STRONG_MARKER.into(), &two);
420 builder.finish_node(); return Some(final_closer_pos + 2 - pos);
423 }
424
425 log::debug!("No ** closer found after *, emitting ** + EMPH");
427 builder.token(SyntaxKind::TEXT.into(), &two);
428
429 builder.start_node(SyntaxKind::EMPHASIS.into());
430 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
431 parse_inline_range_nested(text, content_start, closer_start, config, builder);
432 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &one);
433 builder.finish_node(); return Some(closer_start + 1 - pos);
436 }
437
438 log::debug!(
440 "No valid ender at pos {}, continuing search from {}",
441 closer_start,
442 closer_start + closer_count
443 );
444 search_pos = closer_start + closer_count;
445 }
446}
447
448fn find_first_potential_ender(
452 text: &str,
453 start: usize,
454 delim_char: char,
455 end: usize,
456) -> Option<usize> {
457 let bytes = text.as_bytes();
458 let mut pos = start;
459
460 while pos < end.min(text.len()) {
461 if bytes[pos] == delim_char as u8 {
463 let is_escaped = {
465 let mut backslash_count = 0;
466 let mut check_pos = pos;
467 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
468 backslash_count += 1;
469 check_pos -= 1;
470 }
471 backslash_count % 2 == 1
472 };
473
474 if !is_escaped {
475 return Some(pos);
477 }
478 }
479
480 pos += 1;
481 }
482
483 None
484}
485
486fn is_valid_ender(text: &str, pos: usize, delim_char: char, delim_count: usize) -> bool {
489 let bytes = text.as_bytes();
490
491 if pos + delim_count > text.len() {
493 return false;
494 }
495
496 for i in 0..delim_count {
497 if bytes[pos + i] != delim_char as u8 {
498 return false;
499 }
500 }
501
502 if pos > 0 && bytes[pos - 1] == delim_char as u8 {
504 return false;
505 }
506
507 let after_pos = pos + delim_count;
509 if after_pos < bytes.len() && bytes[after_pos] == delim_char as u8 {
510 return false;
511 }
512
513 if delim_char == '_' {
516 if pos > 0
517 && let Some(prev_char) = text[..pos].chars().last()
518 && prev_char.is_whitespace()
519 {
520 return false;
521 }
522
523 if after_pos < text.len()
525 && let Some(next_char) = text[after_pos..].chars().next()
526 && next_char.is_alphanumeric()
527 {
528 return false;
529 }
530 }
531
532 true
533}
534
535fn try_parse_two(
540 text: &str,
541 pos: usize,
542 delim_char: char,
543 end: usize,
544 config: &Config,
545 builder: &mut GreenNodeBuilder,
546) -> Option<usize> {
547 let content_start = pos + 2;
548
549 log::debug!("try_parse_two: '{}' x 2 at pos {}", delim_char, pos);
550
551 if let Some(closer_pos) =
553 parse_until_closer_with_nested_one(text, content_start, delim_char, 2, end, config)
554 {
555 log::debug!("Found ** closer at pos {}", closer_pos);
556
557 builder.start_node(SyntaxKind::STRONG.into());
559 builder.token(SyntaxKind::STRONG_MARKER.into(), &text[pos..pos + 2]);
560 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
561 builder.token(
562 SyntaxKind::STRONG_MARKER.into(),
563 &text[closer_pos..closer_pos + 2],
564 );
565 builder.finish_node(); return Some(closer_pos + 2 - pos);
568 }
569
570 log::trace!("No closer found for **");
572 None
573}
574
575fn try_parse_one(
586 text: &str,
587 pos: usize,
588 delim_char: char,
589 end: usize,
590 config: &Config,
591 builder: &mut GreenNodeBuilder,
592) -> Option<usize> {
593 let content_start = pos + 1;
594
595 log::debug!("try_parse_one: '{}' x 1 at pos {}", delim_char, pos);
596
597 if let Some(closer_pos) =
599 parse_until_closer_with_nested_two(text, content_start, delim_char, 1, end, config)
600 {
601 log::debug!("Found * closer at pos {}", closer_pos);
602
603 builder.start_node(SyntaxKind::EMPHASIS.into());
605 builder.token(SyntaxKind::EMPHASIS_MARKER.into(), &text[pos..pos + 1]);
606 parse_inline_range_nested(text, content_start, closer_pos, config, builder);
607 builder.token(
608 SyntaxKind::EMPHASIS_MARKER.into(),
609 &text[closer_pos..closer_pos + 1],
610 );
611 builder.finish_node(); return Some(closer_pos + 1 - pos);
614 }
615
616 log::trace!("No closer found for *");
618 None
619}
620
621fn parse_until_closer_with_nested_two(
640 text: &str,
641 start: usize,
642 delim_char: char,
643 delim_count: usize,
644 end: usize,
645 config: &Config,
646) -> Option<usize> {
647 let bytes = text.as_bytes();
648 let mut pos = start;
649
650 while pos < end.min(text.len()) {
651 if bytes[pos] == b'`'
652 && let Some(m) = try_parse_inline_executable(
653 &text[pos..],
654 config.extensions.rmarkdown_inline_code,
655 config.extensions.quarto_inline_code,
656 )
657 {
658 log::trace!(
659 "Skipping inline executable span of {} bytes at pos {}",
660 m.total_len,
661 pos
662 );
663 pos += m.total_len;
664 continue;
665 }
666
667 if bytes[pos] == b'`'
669 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
670 {
671 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
672 pos += len;
673 continue;
674 }
675
676 if bytes[pos] == b'$'
678 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
679 {
680 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
681 pos += len;
682 continue;
683 }
684
685 if bytes[pos] == b'['
687 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
688 {
689 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
690 pos += len;
691 continue;
692 }
693
694 if delim_count == 1
698 && pos + 2 <= text.len()
699 && bytes[pos] == delim_char as u8
700 && bytes[pos + 1] == delim_char as u8
701 {
702 let first_is_escaped = {
704 let mut backslash_count = 0;
705 let mut check_pos = pos;
706 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
707 backslash_count += 1;
708 check_pos -= 1;
709 }
710 backslash_count % 2 == 1
711 };
712
713 if first_is_escaped {
714 log::trace!(
717 "First * at pos {} is escaped, skipping to check second *",
718 pos
719 );
720 pos += 1;
721 continue;
722 }
723
724 let no_third_delim = pos + 2 >= bytes.len() || bytes[pos + 2] != delim_char as u8;
727
728 if no_third_delim {
729 log::trace!(
730 "try_parse_one: found ** at pos {}, attempting nested two",
731 pos
732 );
733
734 let mut temp_builder = GreenNodeBuilder::new();
737 if let Some(two_consumed) =
738 try_parse_two(text, pos, delim_char, end, config, &mut temp_builder)
739 {
740 log::debug!(
743 "Nested two succeeded, consumed {} bytes, continuing search",
744 two_consumed
745 );
746 pos += two_consumed;
747 continue;
748 }
749 log::trace!("Nested two failed at pos {}, entire one() should fail", pos);
755 return None;
756 }
757 }
758
759 if pos + delim_count <= text.len() {
761 let mut matches = true;
762 for i in 0..delim_count {
763 if bytes[pos + i] != delim_char as u8 {
764 matches = false;
765 break;
766 }
767 }
768
769 if matches {
770 let is_escaped = {
776 let mut backslash_count = 0;
777 let mut check_pos = pos;
778 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
779 backslash_count += 1;
780 check_pos -= 1;
781 }
782 backslash_count % 2 == 1 };
784
785 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
789 let after_pos = pos + delim_count;
790 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
791
792 if (at_run_start || at_run_end) && !is_escaped {
793 if delim_char == '_'
797 && pos > start
798 && let Some(prev_char) = text[..pos].chars().last()
799 && prev_char.is_whitespace()
800 {
801 log::trace!(
802 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
803 pos
804 );
805 pos += 1;
807 continue;
808 }
809
810 log::trace!(
811 "Found exact {} x {} closer at pos {}",
812 delim_char,
813 delim_count,
814 pos
815 );
816 return Some(pos);
817 }
818 }
819 }
820
821 pos += 1;
824 }
825
826 None
827}
828
829fn parse_until_closer_with_nested_one(
849 text: &str,
850 start: usize,
851 delim_char: char,
852 delim_count: usize,
853 end: usize,
854 config: &Config,
855) -> Option<usize> {
856 let bytes = text.as_bytes();
857 let mut pos = start;
858
859 while pos < end.min(text.len()) {
860 if bytes[pos] == b'`'
861 && let Some(m) = try_parse_inline_executable(
862 &text[pos..],
863 config.extensions.rmarkdown_inline_code,
864 config.extensions.quarto_inline_code,
865 )
866 {
867 log::trace!(
868 "Skipping inline executable span of {} bytes at pos {}",
869 m.total_len,
870 pos
871 );
872 pos += m.total_len;
873 continue;
874 }
875
876 if bytes[pos] == b'`'
878 && let Some((len, _, _, _)) = try_parse_code_span(&text[pos..])
879 {
880 log::trace!("Skipping code span of {} bytes at pos {}", len, pos);
881 pos += len;
882 continue;
883 }
884
885 if bytes[pos] == b'$'
887 && let Some((len, _)) = try_parse_inline_math(&text[pos..])
888 {
889 log::trace!("Skipping inline math of {} bytes at pos {}", len, pos);
890 pos += len;
891 continue;
892 }
893
894 if bytes[pos] == b'['
896 && let Some((len, _, _, _)) = try_parse_inline_link(&text[pos..])
897 {
898 log::trace!("Skipping inline link of {} bytes at pos {}", len, pos);
899 pos += len;
900 continue;
901 }
902
903 if delim_count == 2 && pos < text.len() && bytes[pos] == delim_char as u8 {
910 let no_second_delim = pos + 1 >= bytes.len() || bytes[pos + 1] != delim_char as u8;
913
914 if no_second_delim {
915 let is_escaped = {
917 let mut backslash_count = 0;
918 let mut check_pos = pos;
919 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
920 backslash_count += 1;
921 check_pos -= 1;
922 }
923 backslash_count % 2 == 1
924 };
925
926 if is_escaped {
927 log::trace!("* at pos {} is escaped, skipping", pos);
929 pos += 1;
930 continue;
931 }
932
933 let after_delim = pos + 1;
936 let followed_by_whitespace = after_delim < text.len()
937 && text[after_delim..]
938 .chars()
939 .next()
940 .is_some_and(|c| c.is_whitespace());
941
942 if followed_by_whitespace {
943 log::trace!(
945 "* at pos {} followed by whitespace, not an opener, skipping",
946 pos
947 );
948 pos += 1;
949 continue;
950 }
951
952 log::trace!(
953 "try_parse_two: found * at pos {}, attempting nested one",
954 pos
955 );
956
957 let mut temp_builder = GreenNodeBuilder::new();
960 if let Some(one_consumed) =
961 try_parse_one(text, pos, delim_char, end, config, &mut temp_builder)
962 {
963 log::debug!(
966 "Nested one succeeded, consumed {} bytes, continuing search",
967 one_consumed
968 );
969 pos += one_consumed;
970 continue;
971 }
972
973 log::debug!(
979 "Nested one failed at pos {}, poisoning outer two (no closer found)",
980 pos
981 );
982 return None;
983 }
984 }
985
986 if pos + delim_count <= text.len() {
988 let mut matches = true;
989 for i in 0..delim_count {
990 if bytes[pos + i] != delim_char as u8 {
991 matches = false;
992 break;
993 }
994 }
995
996 if matches {
997 let is_escaped = {
999 let mut backslash_count = 0;
1000 let mut check_pos = pos;
1001 while check_pos > 0 && bytes[check_pos - 1] == b'\\' {
1002 backslash_count += 1;
1003 check_pos -= 1;
1004 }
1005 backslash_count % 2 == 1 };
1007
1008 let at_run_start = pos == 0 || bytes[pos - 1] != delim_char as u8;
1012 let after_pos = pos + delim_count;
1013 let at_run_end = after_pos >= bytes.len() || bytes[after_pos] != delim_char as u8;
1014
1015 if (at_run_start || at_run_end) && !is_escaped {
1016 if delim_char == '_'
1020 && pos > start
1021 && let Some(prev_char) = text[..pos].chars().last()
1022 && prev_char.is_whitespace()
1023 {
1024 log::trace!(
1025 "Underscore closer preceded by whitespace at pos {}, not right-flanking",
1026 pos
1027 );
1028 pos += 1;
1030 continue;
1031 }
1032
1033 log::trace!(
1034 "Found exact {} x {} closer at pos {}",
1035 delim_char,
1036 delim_count,
1037 pos
1038 );
1039 return Some(pos);
1040 }
1041 }
1042 }
1043
1044 pos += 1;
1047 }
1048
1049 None
1050}
1051
1052fn parse_inline_range(
1069 text: &str,
1070 start: usize,
1071 end: usize,
1072 config: &Config,
1073 builder: &mut GreenNodeBuilder,
1074) {
1075 parse_inline_range_impl(text, start, end, config, builder, false)
1076}
1077
1078fn parse_inline_range_nested(
1081 text: &str,
1082 start: usize,
1083 end: usize,
1084 config: &Config,
1085 builder: &mut GreenNodeBuilder,
1086) {
1087 parse_inline_range_impl(text, start, end, config, builder, true)
1088}
1089
1090fn is_emoji_boundary(text: &str, pos: usize) -> bool {
1091 if pos > 0 {
1092 let prev = text.as_bytes()[pos - 1] as char;
1093 if prev.is_ascii_alphanumeric() || prev == '_' {
1094 return false;
1095 }
1096 }
1097 true
1098}
1099
1100fn parse_inline_range_impl(
1101 text: &str,
1102 start: usize,
1103 end: usize,
1104 config: &Config,
1105 builder: &mut GreenNodeBuilder,
1106 nested_emphasis: bool,
1107) {
1108 log::debug!(
1109 "parse_inline_range: start={}, end={}, text={:?}",
1110 start,
1111 end,
1112 &text[start..end]
1113 );
1114 let mut pos = start;
1115 let mut text_start = start;
1116
1117 while pos < end {
1118 let byte = text.as_bytes()[pos];
1119
1120 if byte == b'\\' {
1122 if config.extensions.tex_math_double_backslash {
1124 if let Some((len, content)) = try_parse_double_backslash_display_math(&text[pos..])
1125 {
1126 if pos > text_start {
1127 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1128 }
1129 log::debug!("Matched double backslash display math at pos {}", pos);
1130 emit_double_backslash_display_math(builder, content);
1131 pos += len;
1132 text_start = pos;
1133 continue;
1134 }
1135
1136 if let Some((len, content)) = try_parse_double_backslash_inline_math(&text[pos..]) {
1138 if pos > text_start {
1139 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1140 }
1141 log::debug!("Matched double backslash inline math at pos {}", pos);
1142 emit_double_backslash_inline_math(builder, content);
1143 pos += len;
1144 text_start = pos;
1145 continue;
1146 }
1147 }
1148
1149 if config.extensions.tex_math_single_backslash {
1151 if let Some((len, content)) = try_parse_single_backslash_display_math(&text[pos..])
1152 {
1153 if pos > text_start {
1154 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1155 }
1156 log::debug!("Matched single backslash display math at pos {}", pos);
1157 emit_single_backslash_display_math(builder, content);
1158 pos += len;
1159 text_start = pos;
1160 continue;
1161 }
1162
1163 if let Some((len, content)) = try_parse_single_backslash_inline_math(&text[pos..]) {
1165 if pos > text_start {
1166 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1167 }
1168 log::debug!("Matched single backslash inline math at pos {}", pos);
1169 emit_single_backslash_inline_math(builder, content);
1170 pos += len;
1171 text_start = pos;
1172 continue;
1173 }
1174 }
1175
1176 if config.extensions.raw_tex
1178 && let Some((len, begin_marker, content, end_marker)) =
1179 try_parse_math_environment(&text[pos..])
1180 {
1181 if pos > text_start {
1182 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1183 }
1184 log::debug!("Matched math environment at pos {}", pos);
1185 emit_display_math_environment(builder, begin_marker, content, end_marker);
1186 pos += len;
1187 text_start = pos;
1188 continue;
1189 }
1190
1191 if config.extensions.bookdown_references
1193 && let Some((len, label)) = try_parse_bookdown_reference(&text[pos..])
1194 {
1195 if pos > text_start {
1196 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1197 }
1198 log::debug!("Matched bookdown reference at pos {}: {}", pos, label);
1199 super::citations::emit_bookdown_crossref(builder, label);
1200 pos += len;
1201 text_start = pos;
1202 continue;
1203 }
1204
1205 if let Some((len, ch, escape_type)) = try_parse_escape(&text[pos..]) {
1207 let escape_enabled = match escape_type {
1208 EscapeType::HardLineBreak => config.extensions.escaped_line_breaks,
1209 EscapeType::NonbreakingSpace => config.extensions.all_symbols_escapable,
1210 EscapeType::Literal => {
1211 const BASE_ESCAPABLE: &str = "\\`*_{}[]()>#+-.!";
1212 BASE_ESCAPABLE.contains(ch) || config.extensions.all_symbols_escapable
1213 }
1214 };
1215 if !escape_enabled {
1216 pos += 1;
1219 continue;
1220 }
1221
1222 if pos > text_start {
1224 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1225 }
1226
1227 log::debug!("Matched escape at pos {}: \\{}", pos, ch);
1228 emit_escape(builder, ch, escape_type);
1229 pos += len;
1230 text_start = pos;
1231 continue;
1232 }
1233
1234 if config.extensions.raw_tex
1236 && let Some(len) = try_parse_latex_command(&text[pos..])
1237 {
1238 if pos > text_start {
1239 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1240 }
1241 log::debug!("Matched LaTeX command at pos {}", pos);
1242 parse_latex_command(builder, &text[pos..], len);
1243 pos += len;
1244 text_start = pos;
1245 continue;
1246 }
1247 }
1248
1249 if byte == b'{'
1251 && pos + 1 < text.len()
1252 && text.as_bytes()[pos + 1] == b'{'
1253 && let Some((len, name, attrs)) = try_parse_shortcode(&text[pos..])
1254 {
1255 if pos > text_start {
1256 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1257 }
1258 log::debug!("Matched shortcode at pos {}: {}", pos, &name);
1259 emit_shortcode(builder, &name, attrs);
1260 pos += len;
1261 text_start = pos;
1262 continue;
1263 }
1264
1265 if byte == b'`'
1267 && let Some(m) = try_parse_inline_executable(
1268 &text[pos..],
1269 config.extensions.rmarkdown_inline_code,
1270 config.extensions.quarto_inline_code,
1271 )
1272 {
1273 if pos > text_start {
1274 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1275 }
1276 log::debug!("Matched inline executable code at pos {}", pos);
1277 emit_inline_executable(builder, &m);
1278 pos += m.total_len;
1279 text_start = pos;
1280 continue;
1281 }
1282
1283 if byte == b'`'
1285 && let Some((len, content, backtick_count, attributes)) =
1286 try_parse_code_span(&text[pos..])
1287 {
1288 if pos > text_start {
1290 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1291 }
1292
1293 log::debug!(
1294 "Matched code span at pos {}: {} backticks",
1295 pos,
1296 backtick_count
1297 );
1298
1299 if let Some(ref attrs) = attributes
1301 && config.extensions.raw_attribute
1302 && let Some(format) = is_raw_inline(attrs)
1303 {
1304 use super::raw_inline::emit_raw_inline;
1305 log::debug!("Matched raw inline span at pos {}: format={}", pos, format);
1306 emit_raw_inline(builder, content, backtick_count, format);
1307 } else if !config.extensions.inline_code_attributes && attributes.is_some() {
1308 let code_span_len = backtick_count * 2 + content.len();
1309 emit_code_span(builder, content, backtick_count, None);
1310 pos += code_span_len;
1311 text_start = pos;
1312 continue;
1313 } else {
1314 emit_code_span(builder, content, backtick_count, attributes);
1315 }
1316
1317 pos += len;
1318 text_start = pos;
1319 continue;
1320 }
1321
1322 if byte == b':'
1324 && config.extensions.emoji
1325 && is_emoji_boundary(text, pos)
1326 && let Some((len, _alias)) = try_parse_emoji(&text[pos..])
1327 {
1328 if pos > text_start {
1329 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1330 }
1331 log::debug!("Matched emoji at pos {}", pos);
1332 emit_emoji(builder, &text[pos..pos + len]);
1333 pos += len;
1334 text_start = pos;
1335 continue;
1336 }
1337
1338 if byte == b'^'
1340 && pos + 1 < text.len()
1341 && text.as_bytes()[pos + 1] == b'['
1342 && config.extensions.inline_footnotes
1343 && let Some((len, content)) = try_parse_inline_footnote(&text[pos..])
1344 {
1345 if pos > text_start {
1346 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1347 }
1348 log::debug!("Matched inline footnote at pos {}", pos);
1349 emit_inline_footnote(builder, content, config);
1350 pos += len;
1351 text_start = pos;
1352 continue;
1353 }
1354
1355 if byte == b'^'
1357 && config.extensions.superscript
1358 && let Some((len, content)) = try_parse_superscript(&text[pos..])
1359 {
1360 if pos > text_start {
1361 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1362 }
1363 log::debug!("Matched superscript at pos {}", pos);
1364 emit_superscript(builder, content, config);
1365 pos += len;
1366 text_start = pos;
1367 continue;
1368 }
1369
1370 if byte == b'(' && config.extensions.bookdown_references {
1372 if let Some((len, label)) = try_parse_bookdown_definition(&text[pos..]) {
1373 if pos > text_start {
1374 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1375 }
1376 log::debug!("Matched bookdown definition at pos {}: {}", pos, label);
1377 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1378 pos += len;
1379 text_start = pos;
1380 continue;
1381 }
1382 if let Some((len, label)) = try_parse_bookdown_text_reference(&text[pos..]) {
1383 if pos > text_start {
1384 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1385 }
1386 log::debug!("Matched bookdown text reference at pos {}: {}", pos, label);
1387 builder.token(SyntaxKind::TEXT.into(), &text[pos..pos + len]);
1388 pos += len;
1389 text_start = pos;
1390 continue;
1391 }
1392 }
1393
1394 if byte == b'~'
1396 && config.extensions.subscript
1397 && let Some((len, content)) = try_parse_subscript(&text[pos..])
1398 {
1399 if pos > text_start {
1400 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1401 }
1402 log::debug!("Matched subscript at pos {}", pos);
1403 emit_subscript(builder, content, config);
1404 pos += len;
1405 text_start = pos;
1406 continue;
1407 }
1408
1409 if byte == b'~'
1411 && config.extensions.strikeout
1412 && let Some((len, content)) = try_parse_strikeout(&text[pos..])
1413 {
1414 if pos > text_start {
1415 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1416 }
1417 log::debug!("Matched strikeout at pos {}", pos);
1418 emit_strikeout(builder, content, config);
1419 pos += len;
1420 text_start = pos;
1421 continue;
1422 }
1423
1424 if byte == b'$'
1426 && config.extensions.tex_math_gfm
1427 && let Some((len, content)) = try_parse_gfm_inline_math(&text[pos..])
1428 {
1429 if pos > text_start {
1430 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1431 }
1432 log::debug!("Matched GFM inline math at pos {}", pos);
1433 emit_gfm_inline_math(builder, content);
1434 pos += len;
1435 text_start = pos;
1436 continue;
1437 }
1438
1439 if byte == b'$' && config.extensions.tex_math_dollars {
1441 if let Some((len, content)) = try_parse_display_math(&text[pos..]) {
1443 if pos > text_start {
1445 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1446 }
1447
1448 let dollar_count = text[pos..].chars().take_while(|&c| c == '$').count();
1449 log::debug!(
1450 "Matched display math at pos {}: {} dollars",
1451 pos,
1452 dollar_count
1453 );
1454
1455 let after_math = &text[pos + len..];
1457 let attr_len = if config.extensions.quarto_crossrefs {
1458 use crate::parser::utils::attributes::try_parse_trailing_attributes;
1459 if let Some((_attr_block, _)) = try_parse_trailing_attributes(after_math) {
1460 let trimmed_after = after_math.trim_start();
1461 if let Some(open_brace_pos) = trimmed_after.find('{') {
1462 let ws_before_brace = after_math.len() - trimmed_after.len();
1463 let attr_text_len = trimmed_after[open_brace_pos..]
1464 .find('}')
1465 .map(|close| close + 1)
1466 .unwrap_or(0);
1467 ws_before_brace + open_brace_pos + attr_text_len
1468 } else {
1469 0
1470 }
1471 } else {
1472 0
1473 }
1474 } else {
1475 0
1476 };
1477
1478 let total_len = len + attr_len;
1479 emit_display_math(builder, content, dollar_count);
1480
1481 if attr_len > 0 {
1483 use crate::parser::utils::attributes::{
1484 emit_attributes, try_parse_trailing_attributes,
1485 };
1486 let attr_text = &text[pos + len..pos + total_len];
1487 if let Some((attr_block, _text_before)) =
1488 try_parse_trailing_attributes(attr_text)
1489 {
1490 let trimmed_after = attr_text.trim_start();
1491 let ws_len = attr_text.len() - trimmed_after.len();
1492 if ws_len > 0 {
1493 builder.token(SyntaxKind::WHITESPACE.into(), &attr_text[..ws_len]);
1494 }
1495 emit_attributes(builder, &attr_block);
1496 }
1497 }
1498
1499 pos += total_len;
1500 text_start = pos;
1501 continue;
1502 }
1503
1504 if let Some((len, content)) = try_parse_inline_math(&text[pos..]) {
1506 if pos > text_start {
1508 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1509 }
1510
1511 log::debug!("Matched inline math at pos {}", pos);
1512 emit_inline_math(builder, content);
1513 pos += len;
1514 text_start = pos;
1515 continue;
1516 }
1517
1518 if pos > text_start {
1521 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1522 }
1523 builder.token(SyntaxKind::TEXT.into(), "$");
1524 pos += 1;
1525 text_start = pos;
1526 continue;
1527 }
1528
1529 if byte == b'<'
1531 && config.extensions.autolinks
1532 && let Some((len, url)) = try_parse_autolink(&text[pos..])
1533 {
1534 if pos > text_start {
1535 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1536 }
1537 log::debug!("Matched autolink at pos {}", pos);
1538 emit_autolink(builder, &text[pos..pos + len], url);
1539 pos += len;
1540 text_start = pos;
1541 continue;
1542 }
1543
1544 if config.extensions.autolink_bare_uris
1545 && let Some((len, url)) = try_parse_bare_uri(&text[pos..])
1546 {
1547 if pos > text_start {
1548 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1549 }
1550 log::debug!("Matched bare URI at pos {}", pos);
1551 emit_bare_uri_link(builder, url, config);
1552 pos += len;
1553 text_start = pos;
1554 continue;
1555 }
1556
1557 if byte == b'<'
1559 && config.extensions.native_spans
1560 && let Some((len, content, attributes)) = try_parse_native_span(&text[pos..])
1561 {
1562 if pos > text_start {
1563 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1564 }
1565 log::debug!("Matched native span at pos {}", pos);
1566 emit_native_span(builder, content, &attributes, config);
1567 pos += len;
1568 text_start = pos;
1569 continue;
1570 }
1571
1572 if byte == b'!' && pos + 1 < text.len() && text.as_bytes()[pos + 1] == b'[' {
1574 if let Some((len, alt_text, dest, attributes)) = try_parse_inline_image(&text[pos..]) {
1576 if pos > text_start {
1577 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1578 }
1579 log::debug!("Matched inline image at pos {}", pos);
1580 emit_inline_image(
1581 builder,
1582 &text[pos..pos + len],
1583 alt_text,
1584 dest,
1585 attributes,
1586 config,
1587 );
1588 pos += len;
1589 text_start = pos;
1590 continue;
1591 }
1592
1593 if config.extensions.reference_links {
1595 let allow_shortcut = config.extensions.shortcut_reference_links;
1596 if let Some((len, alt_text, reference, is_implicit)) =
1597 try_parse_reference_image(&text[pos..], allow_shortcut)
1598 {
1599 if pos > text_start {
1600 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1601 }
1602 log::debug!("Matched reference image at pos {}", pos);
1603 emit_reference_image(builder, alt_text, &reference, is_implicit, config);
1604 pos += len;
1605 text_start = pos;
1606 continue;
1607 }
1608 }
1609 }
1610
1611 if byte == b'[' {
1613 if config.extensions.footnotes
1615 && let Some((len, id)) = try_parse_footnote_reference(&text[pos..])
1616 {
1617 if pos > text_start {
1618 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1619 }
1620 log::debug!("Matched footnote reference at pos {}", pos);
1621 emit_footnote_reference(builder, &id);
1622 pos += len;
1623 text_start = pos;
1624 continue;
1625 }
1626
1627 if config.extensions.inline_links
1629 && let Some((len, link_text, dest, attributes)) =
1630 try_parse_inline_link(&text[pos..])
1631 {
1632 if pos > text_start {
1633 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1634 }
1635 log::debug!("Matched inline link at pos {}", pos);
1636 emit_inline_link(
1637 builder,
1638 &text[pos..pos + len],
1639 link_text,
1640 dest,
1641 attributes,
1642 config,
1643 );
1644 pos += len;
1645 text_start = pos;
1646 continue;
1647 }
1648
1649 if config.extensions.reference_links {
1651 let allow_shortcut = config.extensions.shortcut_reference_links;
1652 if let Some((len, link_text, reference, is_implicit)) =
1653 try_parse_reference_link(&text[pos..], allow_shortcut)
1654 {
1655 if pos > text_start {
1656 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1657 }
1658 log::debug!("Matched reference link at pos {}", pos);
1659 emit_reference_link(builder, link_text, &reference, is_implicit, config);
1660 pos += len;
1661 text_start = pos;
1662 continue;
1663 }
1664 }
1665
1666 if config.extensions.citations
1668 && let Some((len, content)) = try_parse_bracketed_citation(&text[pos..])
1669 {
1670 if pos > text_start {
1671 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1672 }
1673 log::debug!("Matched bracketed citation at pos {}", pos);
1674 emit_bracketed_citation(builder, content);
1675 pos += len;
1676 text_start = pos;
1677 continue;
1678 }
1679 }
1680
1681 if byte == b'['
1684 && config.extensions.bracketed_spans
1685 && let Some((len, text_content, attrs)) = try_parse_bracketed_span(&text[pos..])
1686 {
1687 if pos > text_start {
1688 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1689 }
1690 log::debug!("Matched bracketed span at pos {}", pos);
1691 emit_bracketed_span(builder, &text_content, &attrs, config);
1692 pos += len;
1693 text_start = pos;
1694 continue;
1695 }
1696
1697 if byte == b'@'
1699 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1700 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1701 {
1702 let is_crossref =
1703 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1704 if is_crossref || config.extensions.citations {
1705 if pos > text_start {
1706 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1707 }
1708 if is_crossref {
1709 log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1710 super::citations::emit_crossref(builder, key, has_suppress);
1711 } else {
1712 log::debug!("Matched bare citation at pos {}: {}", pos, &key);
1713 emit_bare_citation(builder, key, has_suppress);
1714 }
1715 pos += len;
1716 text_start = pos;
1717 continue;
1718 }
1719 }
1720
1721 if byte == b'-'
1723 && pos + 1 < text.len()
1724 && text.as_bytes()[pos + 1] == b'@'
1725 && (config.extensions.citations || config.extensions.quarto_crossrefs)
1726 && let Some((len, key, has_suppress)) = try_parse_bare_citation(&text[pos..])
1727 {
1728 let is_crossref =
1729 config.extensions.quarto_crossrefs && super::citations::is_quarto_crossref_key(key);
1730 if is_crossref || config.extensions.citations {
1731 if pos > text_start {
1732 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1733 }
1734 if is_crossref {
1735 log::debug!("Matched Quarto crossref at pos {}: {}", pos, &key);
1736 super::citations::emit_crossref(builder, key, has_suppress);
1737 } else {
1738 log::debug!("Matched suppress-author citation at pos {}: {}", pos, &key);
1739 emit_bare_citation(builder, key, has_suppress);
1740 }
1741 pos += len;
1742 text_start = pos;
1743 continue;
1744 }
1745 }
1746
1747 if byte == b'*' || byte == b'_' {
1749 let bytes = text.as_bytes();
1751 let mut delim_count = 0;
1752 while pos + delim_count < bytes.len() && bytes[pos + delim_count] == byte {
1753 delim_count += 1;
1754 }
1755
1756 if pos > text_start {
1758 log::debug!(
1759 "Emitting TEXT before delimiter: {:?}",
1760 &text[text_start..pos]
1761 );
1762 builder.token(SyntaxKind::TEXT.into(), &text[text_start..pos]);
1763 text_start = pos; }
1765
1766 let emphasis_result = if nested_emphasis {
1769 try_parse_emphasis_nested(text, pos, end, config, builder)
1770 } else {
1771 try_parse_emphasis(text, pos, end, config, builder)
1772 };
1773
1774 if let Some((consumed, _)) = emphasis_result {
1775 log::debug!(
1777 "Parsed emphasis, consumed {} bytes from pos {}",
1778 consumed,
1779 pos
1780 );
1781 pos += consumed;
1782 text_start = pos;
1783 } else {
1784 log::debug!(
1787 "Failed to parse emphasis at pos {}, skipping {} delimiters as literal",
1788 pos,
1789 delim_count
1790 );
1791 pos += delim_count;
1792 }
1794 continue;
1795 }
1796
1797 if byte == b'\r' && pos + 1 < end && text.as_bytes()[pos + 1] == b'\n' {
1799 let text_before = &text[text_start..pos];
1800
1801 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1803 if trailing_spaces >= 2 {
1804 let text_content = &text_before[..text_before.len() - trailing_spaces];
1806 if !text_content.is_empty() {
1807 builder.token(SyntaxKind::TEXT.into(), text_content);
1808 }
1809 let spaces = " ".repeat(trailing_spaces);
1810 builder.token(
1811 SyntaxKind::HARD_LINE_BREAK.into(),
1812 &format!("{}\r\n", spaces),
1813 );
1814 pos += 2;
1815 text_start = pos;
1816 continue;
1817 }
1818
1819 if config.extensions.hard_line_breaks {
1821 if !text_before.is_empty() {
1822 builder.token(SyntaxKind::TEXT.into(), text_before);
1823 }
1824 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\r\n");
1825 pos += 2;
1826 text_start = pos;
1827 continue;
1828 }
1829
1830 if !text_before.is_empty() {
1832 builder.token(SyntaxKind::TEXT.into(), text_before);
1833 }
1834 builder.token(SyntaxKind::NEWLINE.into(), "\r\n");
1835 pos += 2;
1836 text_start = pos;
1837 continue;
1838 }
1839
1840 if byte == b'\n' {
1841 let text_before = &text[text_start..pos];
1842
1843 let trailing_spaces = text_before.chars().rev().take_while(|&c| c == ' ').count();
1845 if trailing_spaces >= 2 {
1846 let text_content = &text_before[..text_before.len() - trailing_spaces];
1848 if !text_content.is_empty() {
1849 builder.token(SyntaxKind::TEXT.into(), text_content);
1850 }
1851 let spaces = " ".repeat(trailing_spaces);
1852 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), &format!("{}\n", spaces));
1853 pos += 1;
1854 text_start = pos;
1855 continue;
1856 }
1857
1858 if config.extensions.hard_line_breaks {
1860 if !text_before.is_empty() {
1861 builder.token(SyntaxKind::TEXT.into(), text_before);
1862 }
1863 builder.token(SyntaxKind::HARD_LINE_BREAK.into(), "\n");
1864 pos += 1;
1865 text_start = pos;
1866 continue;
1867 }
1868
1869 if !text_before.is_empty() {
1871 builder.token(SyntaxKind::TEXT.into(), text_before);
1872 }
1873 builder.token(SyntaxKind::NEWLINE.into(), "\n");
1874 pos += 1;
1875 text_start = pos;
1876 continue;
1877 }
1878
1879 pos += 1;
1881 }
1882
1883 if pos > text_start && text_start < end {
1885 log::debug!("Emitting remaining TEXT: {:?}", &text[text_start..end]);
1886 builder.token(SyntaxKind::TEXT.into(), &text[text_start..end]);
1887 }
1888
1889 log::debug!("parse_inline_range complete: start={}, end={}", start, end);
1890}
1891
1892#[cfg(test)]
1893mod tests {
1894 use super::*;
1895 use crate::syntax::{SyntaxKind, SyntaxNode};
1896 use rowan::GreenNode;
1897
1898 #[test]
1899 fn test_recursive_simple_emphasis() {
1900 let text = "*test*";
1901 let config = Config::default();
1902 let mut builder = GreenNodeBuilder::new();
1903
1904 parse_inline_text_recursive(&mut builder, text, &config);
1905
1906 let green: GreenNode = builder.finish();
1907 let node = SyntaxNode::new_root(green);
1908
1909 assert_eq!(node.text().to_string(), text);
1911
1912 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1914 assert!(has_emph, "Should have EMPHASIS node");
1915 }
1916
1917 #[test]
1918 fn test_recursive_nested() {
1919 let text = "*foo **bar** baz*";
1920 let config = Config::default();
1921 let mut builder = GreenNodeBuilder::new();
1922
1923 builder.start_node(SyntaxKind::PARAGRAPH.into());
1925 parse_inline_text_recursive(&mut builder, text, &config);
1926 builder.finish_node();
1927
1928 let green: GreenNode = builder.finish();
1929 let node = SyntaxNode::new_root(green);
1930
1931 assert_eq!(node.text().to_string(), text);
1933
1934 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1936 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1937
1938 assert!(has_emph, "Should have EMPHASIS node");
1939 assert!(has_strong, "Should have STRONG node");
1940 }
1941
1942 #[test]
1944 fn test_parse_simple_emphasis() {
1945 use crate::config::Config;
1946 use crate::syntax::SyntaxNode;
1947 use rowan::GreenNode;
1948
1949 let text = "*test*";
1950 let config = Config::default();
1951 let mut builder = GreenNodeBuilder::new();
1952
1953 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
1955
1956 assert_eq!(result, Some((6, 1))); let green: GreenNode = builder.finish();
1961 let node = SyntaxNode::new_root(green);
1962
1963 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
1965
1966 assert_eq!(node.text().to_string(), text);
1968 }
1969
1970 #[test]
1972 fn test_parse_nested_emphasis_strong() {
1973 use crate::config::Config;
1974
1975 let text = "*foo **bar** baz*";
1976 let config = Config::default();
1977 let mut builder = GreenNodeBuilder::new();
1978
1979 parse_inline_range(text, 0, text.len(), &config, &mut builder);
1981
1982 let green = builder.finish();
1983 let node = crate::syntax::SyntaxNode::new_root(green);
1984
1985 assert_eq!(node.text().to_string(), text);
1987
1988 let has_emph = node.descendants().any(|n| n.kind() == SyntaxKind::EMPHASIS);
1990 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
1991
1992 assert!(has_emph, "Should have EMPHASIS node");
1993 assert!(has_strong, "Should have STRONG node");
1994 }
1995
1996 #[test]
2000 fn test_triple_emphasis_star_then_double_star() {
2001 use crate::config::Config;
2002 use crate::syntax::SyntaxNode;
2003 use rowan::GreenNode;
2004
2005 let text = "***foo* bar**";
2006 let config = Config::default();
2007 let mut builder = GreenNodeBuilder::new();
2008
2009 builder.start_node(SyntaxKind::DOCUMENT.into());
2010 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2011 builder.finish_node();
2012
2013 let green: GreenNode = builder.finish();
2014 let node = SyntaxNode::new_root(green);
2015
2016 assert_eq!(node.text().to_string(), text);
2018
2019 let structure = format!("{:#?}", node);
2022
2023 assert!(structure.contains("STRONG"), "Should have STRONG node");
2025 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2026
2027 let mut found_strong = false;
2030 let mut found_emph_after_strong = false;
2031 for descendant in node.descendants() {
2032 if descendant.kind() == SyntaxKind::STRONG {
2033 found_strong = true;
2034 }
2035 if found_strong && descendant.kind() == SyntaxKind::EMPHASIS {
2036 found_emph_after_strong = true;
2037 break;
2038 }
2039 }
2040
2041 assert!(
2042 found_emph_after_strong,
2043 "EMPH should be inside STRONG, not before it. Current structure:\n{}",
2044 structure
2045 );
2046 }
2047
2048 #[test]
2051 fn test_triple_emphasis_double_star_then_star() {
2052 use crate::config::Config;
2053 use crate::syntax::SyntaxNode;
2054 use rowan::GreenNode;
2055
2056 let text = "***foo** bar*";
2057 let config = Config::default();
2058 let mut builder = GreenNodeBuilder::new();
2059
2060 builder.start_node(SyntaxKind::DOCUMENT.into());
2061 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2062 builder.finish_node();
2063
2064 let green: GreenNode = builder.finish();
2065 let node = SyntaxNode::new_root(green);
2066
2067 assert_eq!(node.text().to_string(), text);
2069
2070 let structure = format!("{:#?}", node);
2072
2073 assert!(structure.contains("EMPHASIS"), "Should have EMPHASIS node");
2075 assert!(structure.contains("STRONG"), "Should have STRONG node");
2076
2077 let mut found_emph = false;
2079 let mut found_strong_after_emph = false;
2080 for descendant in node.descendants() {
2081 if descendant.kind() == SyntaxKind::EMPHASIS {
2082 found_emph = true;
2083 }
2084 if found_emph && descendant.kind() == SyntaxKind::STRONG {
2085 found_strong_after_emph = true;
2086 break;
2087 }
2088 }
2089
2090 assert!(
2091 found_strong_after_emph,
2092 "STRONG should be inside EMPH. Current structure:\n{}",
2093 structure
2094 );
2095 }
2096
2097 #[test]
2100 fn test_display_math_with_attributes() {
2101 use crate::config::Config;
2102 use crate::syntax::SyntaxNode;
2103 use rowan::GreenNode;
2104
2105 let text = "$$ E = mc^2 $$ {#eq-einstein}";
2106 let mut config = Config::default();
2107 config.extensions.quarto_crossrefs = true; let mut builder = GreenNodeBuilder::new();
2110 builder.start_node(SyntaxKind::DOCUMENT.into()); parse_inline_text_recursive(&mut builder, text, &config);
2114
2115 builder.finish_node(); let green: GreenNode = builder.finish();
2117 let node = SyntaxNode::new_root(green);
2118
2119 assert_eq!(node.text().to_string(), text);
2121
2122 let has_display_math = node
2124 .descendants()
2125 .any(|n| n.kind() == SyntaxKind::DISPLAY_MATH);
2126 assert!(has_display_math, "Should have DISPLAY_MATH node");
2127
2128 let has_attributes = node
2130 .descendants()
2131 .any(|n| n.kind() == SyntaxKind::ATTRIBUTE);
2132 assert!(
2133 has_attributes,
2134 "Should have ATTRIBUTE node for {{#eq-einstein}}"
2135 );
2136
2137 let math_followed_by_text = node.descendants().any(|n| {
2139 n.kind() == SyntaxKind::DISPLAY_MATH
2140 && n.next_sibling()
2141 .map(|s| {
2142 s.kind() == SyntaxKind::TEXT
2143 && s.text().to_string().contains("{#eq-einstein}")
2144 })
2145 .unwrap_or(false)
2146 });
2147 assert!(
2148 !math_followed_by_text,
2149 "Attributes should not be parsed as TEXT"
2150 );
2151 }
2152}
2153
2154#[test]
2155fn test_two_with_nested_one_and_triple_closer() {
2156 use crate::config::Config;
2161 use crate::syntax::SyntaxNode;
2162 use rowan::GreenNode;
2163
2164 let text = "**bold with *italic***";
2165 let config = Config::default();
2166 let mut builder = GreenNodeBuilder::new();
2167
2168 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2170
2171 let green: GreenNode = builder.finish();
2172 let node = SyntaxNode::new_root(green);
2173
2174 assert_eq!(node.text().to_string(), text, "Should be lossless");
2176
2177 assert_eq!(
2179 node.kind(),
2180 SyntaxKind::STRONG,
2181 "Root should be STRONG, got: {:?}",
2182 node.kind()
2183 );
2184
2185 let has_emphasis = node.children().any(|c| c.kind() == SyntaxKind::EMPHASIS);
2187 assert!(has_emphasis, "STRONG should contain EMPHASIS node");
2188}
2189
2190#[test]
2191fn test_emphasis_with_trailing_space_before_closer() {
2192 use crate::config::Config;
2196 use crate::syntax::SyntaxNode;
2197 use rowan::GreenNode;
2198
2199 let text = "*foo *";
2200 let config = Config::default();
2201 let mut builder = GreenNodeBuilder::new();
2202
2203 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2205
2206 assert_eq!(
2208 result,
2209 Some((6, 1)),
2210 "Should parse as emphasis, result: {:?}",
2211 result
2212 );
2213
2214 let green: GreenNode = builder.finish();
2216 let node = SyntaxNode::new_root(green);
2217
2218 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2220
2221 assert_eq!(node.text().to_string(), text);
2223}
2224
2225#[test]
2226fn test_triple_emphasis_all_strong_nested() {
2227 use crate::config::Config;
2231 use crate::syntax::SyntaxNode;
2232 use rowan::GreenNode;
2233
2234 let text = "***foo** bar **baz***";
2235 let config = Config::default();
2236 let mut builder = GreenNodeBuilder::new();
2237
2238 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2239
2240 let green: GreenNode = builder.finish();
2241 let node = SyntaxNode::new_root(green);
2242
2243 let emphasis_nodes: Vec<_> = node
2245 .descendants()
2246 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2247 .collect();
2248 assert_eq!(
2249 emphasis_nodes.len(),
2250 1,
2251 "Should have exactly one EMPHASIS node, found: {}",
2252 emphasis_nodes.len()
2253 );
2254
2255 let emphasis_node = emphasis_nodes[0].clone();
2257 let strong_in_emphasis: Vec<_> = emphasis_node
2258 .children()
2259 .filter(|n| n.kind() == SyntaxKind::STRONG)
2260 .collect();
2261 assert_eq!(
2262 strong_in_emphasis.len(),
2263 2,
2264 "EMPHASIS should contain two STRONG nodes, found: {}",
2265 strong_in_emphasis.len()
2266 );
2267
2268 assert_eq!(node.text().to_string(), text);
2270}
2271
2272#[test]
2273fn test_triple_emphasis_all_emph_nested() {
2274 use crate::config::Config;
2278 use crate::syntax::SyntaxNode;
2279 use rowan::GreenNode;
2280
2281 let text = "***foo* bar *baz***";
2282 let config = Config::default();
2283 let mut builder = GreenNodeBuilder::new();
2284
2285 parse_inline_range(text, 0, text.len(), &config, &mut builder);
2286
2287 let green: GreenNode = builder.finish();
2288 let node = SyntaxNode::new_root(green);
2289
2290 let strong_nodes: Vec<_> = node
2292 .descendants()
2293 .filter(|n| n.kind() == SyntaxKind::STRONG)
2294 .collect();
2295 assert_eq!(
2296 strong_nodes.len(),
2297 1,
2298 "Should have exactly one STRONG node, found: {}",
2299 strong_nodes.len()
2300 );
2301
2302 let strong_node = strong_nodes[0].clone();
2304 let emph_in_strong: Vec<_> = strong_node
2305 .children()
2306 .filter(|n| n.kind() == SyntaxKind::EMPHASIS)
2307 .collect();
2308 assert_eq!(
2309 emph_in_strong.len(),
2310 2,
2311 "STRONG should contain two EMPHASIS nodes, found: {}",
2312 emph_in_strong.len()
2313 );
2314
2315 assert_eq!(node.text().to_string(), text);
2317}
2318
2319#[test]
2321fn test_parse_emphasis_multiline() {
2322 use crate::config::Config;
2324 use crate::syntax::SyntaxNode;
2325 use rowan::GreenNode;
2326
2327 let text = "*text on\nline two*";
2328 let config = Config::default();
2329 let mut builder = GreenNodeBuilder::new();
2330
2331 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2332
2333 assert_eq!(
2335 result,
2336 Some((text.len(), 1)),
2337 "Emphasis should parse multiline content"
2338 );
2339
2340 let green: GreenNode = builder.finish();
2342 let node = SyntaxNode::new_root(green);
2343
2344 assert_eq!(node.kind(), SyntaxKind::EMPHASIS);
2346
2347 assert_eq!(node.text().to_string(), text);
2349 assert!(
2350 node.text().to_string().contains('\n'),
2351 "Should preserve newline in emphasis content"
2352 );
2353}
2354
2355#[test]
2356fn test_parse_strong_multiline() {
2357 use crate::config::Config;
2359 use crate::syntax::SyntaxNode;
2360 use rowan::GreenNode;
2361
2362 let text = "**strong on\nline two**";
2363 let config = Config::default();
2364 let mut builder = GreenNodeBuilder::new();
2365
2366 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2367
2368 assert_eq!(
2370 result,
2371 Some((text.len(), 2)),
2372 "Strong emphasis should parse multiline content"
2373 );
2374
2375 let green: GreenNode = builder.finish();
2377 let node = SyntaxNode::new_root(green);
2378
2379 assert_eq!(node.kind(), SyntaxKind::STRONG);
2381
2382 assert_eq!(node.text().to_string(), text);
2384 assert!(
2385 node.text().to_string().contains('\n'),
2386 "Should preserve newline in strong content"
2387 );
2388}
2389
2390#[test]
2391fn test_parse_triple_emphasis_multiline() {
2392 use crate::config::Config;
2394 use crate::syntax::SyntaxNode;
2395 use rowan::GreenNode;
2396
2397 let text = "***both on\nline two***";
2398 let config = Config::default();
2399 let mut builder = GreenNodeBuilder::new();
2400
2401 let result = try_parse_emphasis(text, 0, text.len(), &config, &mut builder);
2402
2403 assert_eq!(
2405 result,
2406 Some((text.len(), 3)),
2407 "Triple emphasis should parse multiline content"
2408 );
2409
2410 let green: GreenNode = builder.finish();
2412 let node = SyntaxNode::new_root(green);
2413
2414 let has_strong = node.descendants().any(|n| n.kind() == SyntaxKind::STRONG);
2416 assert!(has_strong, "Should have STRONG node");
2417
2418 assert_eq!(node.text().to_string(), text);
2420 assert!(
2421 node.text().to_string().contains('\n'),
2422 "Should preserve newline in triple emphasis content"
2423 );
2424}