1use crate::ast::node::TextSpan;
47use crate::ast::value::PropertyValue;
48
49const BOLD_WEIGHT: &str = "700";
53
54const HIGHLIGHT_DEFAULT: &str = "#fff59d";
59
60#[derive(Debug, Clone, Default, PartialEq, Eq)]
63struct MarkSet {
64 bold: bool,
65 italic: bool,
66 underline: bool,
67 strikethrough: bool,
68 highlight: bool,
69 code: bool,
70}
71
72impl MarkSet {
73 fn span(&self, text: String, link: Option<String>) -> TextSpan {
76 TextSpan {
77 text,
78 fill: None,
79 font_weight: if self.bold {
80 Some(PropertyValue::Literal(BOLD_WEIGHT.to_owned()))
81 } else {
82 None
83 },
84 italic: if self.italic { Some(true) } else { None },
85 underline: if self.underline { Some(true) } else { None },
86 strikethrough: if self.strikethrough { Some(true) } else { None },
87 vertical_align: None,
88 footnote_ref: None,
89 data_ref: None,
90 data_format: None,
91 highlight: if self.highlight {
92 Some(PropertyValue::Literal(HIGHLIGHT_DEFAULT.to_owned()))
93 } else {
94 None
95 },
96 code: if self.code { Some(true) } else { None },
97 link,
98 }
99 }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq)]
105enum Delim {
106 Bold, Italic, Strikethrough, Underline, Highlight, }
112
113#[derive(Debug, Clone)]
119enum Token {
120 Text(String),
122 Code(String),
124 Link(Vec<TextSpan>),
126 Marker {
131 delim: Delim,
132 literal: String,
133 can_open: bool,
134 can_close: bool,
135 role: MarkerRole,
136 },
137}
138
139#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141enum MarkerRole {
142 Unresolved,
145 Open,
147 Close,
149}
150
151pub fn parse_inline_markdown(input: &str) -> Vec<TextSpan> {
156 let chars: Vec<char> = input.chars().collect();
157 let link: Option<String> = None;
158 let mut out: Vec<TextSpan> = Vec::new();
159 parse_run(&chars, link, &mut out);
160 out
161}
162
163fn parse_run(chars: &[char], link: Option<String>, out: &mut Vec<TextSpan>) {
170 let mut tokens = lex(chars);
171 resolve_markers(&mut tokens);
172 build_spans(&tokens, link, out);
173}
174
175fn lex(chars: &[char]) -> Vec<Token> {
180 let mut tokens: Vec<Token> = Vec::new();
181 let mut buf = String::new();
182 let mut i: usize = 0;
183
184 while i < chars.len() {
185 let Some(&c) = chars.get(i) else { break };
186
187 if c == '\\' {
189 match chars.get(i + 1) {
190 Some(&next) if is_escapable(next) => {
191 buf.push(next);
192 i += 2;
193 continue;
194 }
195 _ => {
196 buf.push('\\');
197 i += 1;
198 continue;
199 }
200 }
201 }
202
203 if c == '`' {
205 if let Some(end) = find_code_close(chars, i + 1) {
206 flush_text(&mut buf, &mut tokens);
207 let raw: String = chars.get(i + 1..end).unwrap_or(&[]).iter().collect();
208 tokens.push(Token::Code(raw));
209 i = end + 1;
210 continue;
211 }
212 buf.push('`');
213 i += 1;
214 continue;
215 }
216
217 if c == '[' {
219 if let Some((label, url, next)) = try_parse_link(chars, i) {
220 flush_text(&mut buf, &mut tokens);
221 let label_chars: Vec<char> = label.chars().collect();
222 let mut label_spans: Vec<TextSpan> = Vec::new();
223 parse_run(&label_chars, Some(url), &mut label_spans);
224 tokens.push(Token::Link(label_spans));
225 i = next;
226 continue;
227 }
228 buf.push('[');
229 i += 1;
230 continue;
231 }
232
233 if let Some((delim, lit)) = match_two_char(chars, i) {
235 flush_text(&mut buf, &mut tokens);
236 let (can_open, can_close) = flanking(chars, i, 2);
237 tokens.push(Token::Marker {
238 delim,
239 literal: lit,
240 can_open,
241 can_close,
242 role: MarkerRole::Unresolved,
243 });
244 i += 2;
245 continue;
246 }
247
248 if c == '*' || c == '_' {
250 flush_text(&mut buf, &mut tokens);
251 let (can_open, can_close) = flanking(chars, i, 1);
252 tokens.push(Token::Marker {
253 delim: Delim::Italic,
254 literal: c.to_string(),
255 can_open,
256 can_close,
257 role: MarkerRole::Unresolved,
258 });
259 i += 1;
260 continue;
261 }
262
263 buf.push(c);
265 i += 1;
266 }
267 flush_text(&mut buf, &mut tokens);
268 tokens
269}
270
271fn flush_text(buf: &mut String, tokens: &mut Vec<Token>) {
273 if !buf.is_empty() {
274 tokens.push(Token::Text(std::mem::take(buf)));
275 }
276}
277
278fn flanking(chars: &[char], i: usize, width: usize) -> (bool, bool) {
283 let before = if i == 0 {
284 None
285 } else {
286 chars.get(i - 1).copied()
287 };
288 let after = chars.get(i + width).copied();
289 let followed_by_nonspace = matches!(after, Some(ch) if !ch.is_whitespace());
290 let preceded_by_nonspace = matches!(before, Some(ch) if !ch.is_whitespace());
291 (followed_by_nonspace, preceded_by_nonspace)
292}
293
294fn resolve_markers(tokens: &mut [Token]) {
302 let mut open_stack: Vec<usize> = Vec::new();
305
306 for idx in 0..tokens.len() {
307 let (delim, can_open, can_close) = match tokens.get(idx) {
308 Some(Token::Marker {
309 delim,
310 can_open,
311 can_close,
312 ..
313 }) => (*delim, *can_open, *can_close),
314 _ => continue,
315 };
316
317 if can_close
319 && let Some(stack_pos) = open_stack.iter().rposition(
320 |&oi| matches!(tokens.get(oi), Some(Token::Marker { delim: d, .. }) if *d == delim),
321 )
322 && let Some(&open_idx) = open_stack.get(stack_pos)
323 {
324 open_stack.truncate(stack_pos);
328 set_role(tokens, open_idx, MarkerRole::Open);
329 set_role(tokens, idx, MarkerRole::Close);
330 continue;
331 }
332
333 if can_open {
335 open_stack.push(idx);
336 }
337 }
340
341 for idx in 0..tokens.len() {
343 if let Some(Token::Marker {
344 literal,
345 role: MarkerRole::Unresolved,
346 ..
347 }) = tokens.get(idx)
348 {
349 let lit = literal.clone();
350 if let Some(slot) = tokens.get_mut(idx) {
351 *slot = Token::Text(lit);
352 }
353 }
354 }
355}
356
357fn set_role(tokens: &mut [Token], idx: usize, new_role: MarkerRole) {
360 if let Some(Token::Marker { role, .. }) = tokens.get_mut(idx) {
361 *role = new_role;
362 }
363}
364
365fn build_spans(tokens: &[Token], link: Option<String>, out: &mut Vec<TextSpan>) {
369 let mut sink = SpanSink::new(link);
370 let mut stack: Vec<Delim> = Vec::new();
371
372 for tok in tokens {
373 match tok {
374 Token::Text(t) => {
375 for ch in t.chars() {
376 sink.push_char(&stack, ch);
377 }
378 }
379 Token::Code(raw) => {
380 let mut marks = sink.marks_from_stack(&stack);
381 marks.code = true;
382 sink.push_span(marks.span(raw.clone(), sink.link.clone()));
383 }
384 Token::Link(spans) => {
385 for s in spans {
386 sink.push_span(s.clone());
387 }
388 }
389 Token::Marker { delim, role, .. } => match role {
390 MarkerRole::Open => stack.push(*delim),
394 MarkerRole::Close => {
395 stack.pop();
396 }
397 MarkerRole::Unresolved => {}
400 },
401 }
402 }
403
404 sink.finish(out);
405}
406
407struct SpanSink {
410 link: Option<String>,
411 spans: Vec<TextSpan>,
412 pending_marks: MarkSet,
414 pending_text: String,
415 have_pending: bool,
416}
417
418impl SpanSink {
419 fn new(link: Option<String>) -> Self {
420 SpanSink {
421 link,
422 spans: Vec::new(),
423 pending_marks: MarkSet::default(),
424 pending_text: String::new(),
425 have_pending: false,
426 }
427 }
428
429 fn marks_from_stack(&self, stack: &[Delim]) -> MarkSet {
431 let mut m = MarkSet::default();
432 for delim in stack {
433 match delim {
434 Delim::Bold => m.bold = true,
435 Delim::Italic => m.italic = true,
436 Delim::Strikethrough => m.strikethrough = true,
437 Delim::Underline => m.underline = true,
438 Delim::Highlight => m.highlight = true,
439 }
440 }
441 m
442 }
443
444 fn push_char(&mut self, stack: &[Delim], c: char) {
446 let marks = self.marks_from_stack(stack);
447 if self.have_pending && marks == self.pending_marks {
448 self.pending_text.push(c);
449 } else {
450 self.flush_pending();
451 self.pending_marks = marks;
452 self.pending_text.push(c);
453 self.have_pending = true;
454 }
455 }
456
457 fn push_span(&mut self, span: TextSpan) {
461 if span.text.is_empty() {
462 return;
463 }
464 self.flush_pending();
465 if let Some(last) = self.spans.last_mut()
466 && spans_mergeable(last, &span)
467 {
468 last.text.push_str(&span.text);
469 return;
470 }
471 self.spans.push(span);
472 }
473
474 fn flush_pending(&mut self) {
476 if !self.have_pending {
477 return;
478 }
479 let text = std::mem::take(&mut self.pending_text);
480 let marks = std::mem::take(&mut self.pending_marks);
481 self.have_pending = false;
482 if text.is_empty() {
483 return;
484 }
485 let span = marks.span(text, self.link.clone());
486 if let Some(last) = self.spans.last_mut()
487 && spans_mergeable(last, &span)
488 {
489 last.text.push_str(&span.text);
490 return;
491 }
492 self.spans.push(span);
493 }
494
495 fn finish(mut self, out: &mut Vec<TextSpan>) {
497 self.flush_pending();
498 out.append(&mut self.spans);
499 }
500}
501
502fn spans_mergeable(a: &TextSpan, b: &TextSpan) -> bool {
504 a.fill == b.fill
505 && a.font_weight == b.font_weight
506 && a.italic == b.italic
507 && a.underline == b.underline
508 && a.strikethrough == b.strikethrough
509 && a.vertical_align == b.vertical_align
510 && a.footnote_ref == b.footnote_ref
511 && a.data_ref == b.data_ref
512 && a.data_format == b.data_format
513 && a.highlight == b.highlight
514 && a.code == b.code
515 && a.link == b.link
516}
517
518fn is_escapable(c: char) -> bool {
520 matches!(c, '*' | '_' | '~' | '=' | '+' | '`' | '[' | ']' | '\\')
521}
522
523fn match_two_char(chars: &[char], i: usize) -> Option<(Delim, String)> {
526 let a = *chars.get(i)?;
527 let b = *chars.get(i + 1)?;
528 let delim = match (a, b) {
529 ('*', '*') | ('_', '_') => Delim::Bold,
530 ('~', '~') => Delim::Strikethrough,
531 ('+', '+') => Delim::Underline,
532 ('=', '=') => Delim::Highlight,
533 _ => return None,
534 };
535 Some((delim, format!("{a}{b}")))
536}
537
538fn find_code_close(chars: &[char], start: usize) -> Option<usize> {
542 let mut j = start;
543 while j < chars.len() {
544 if chars.get(j) == Some(&'`') {
545 return Some(j);
546 }
547 j += 1;
548 }
549 None
550}
551
552fn try_parse_link(chars: &[char], open: usize) -> Option<(String, String, usize)> {
557 if chars.get(open) != Some(&'[') {
558 return None;
559 }
560 let mut j = open + 1;
562 let mut label: Vec<char> = Vec::new();
563 let mut closed_label: Option<usize> = None;
564 while j < chars.len() {
565 match chars.get(j) {
566 Some(&'\\') => {
567 if let Some(&next) = chars.get(j + 1) {
570 label.push('\\');
571 label.push(next);
572 j += 2;
573 continue;
574 }
575 label.push('\\');
576 j += 1;
577 }
578 Some(&']') => {
579 closed_label = Some(j);
580 break;
581 }
582 Some(&ch) => {
583 label.push(ch);
584 j += 1;
585 }
586 None => break,
587 }
588 }
589 let label_end = closed_label?;
590 let paren_open = label_end + 1;
592 if chars.get(paren_open) != Some(&'(') {
593 return None;
594 }
595 let mut k = paren_open + 1;
598 let mut url: Vec<char> = Vec::new();
599 let mut closed_url: Option<usize> = None;
600 while k < chars.len() {
601 match chars.get(k) {
602 Some(&')') => {
603 closed_url = Some(k);
604 break;
605 }
606 Some(&ch) => {
607 url.push(ch);
608 k += 1;
609 }
610 None => break,
611 }
612 }
613 let url_end = closed_url?;
614 Some((
615 label.into_iter().collect(),
616 url.into_iter().collect(),
617 url_end + 1,
618 ))
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624
625 fn texts(spans: &[TextSpan]) -> String {
626 spans.iter().map(|s| s.text.as_str()).collect()
627 }
628
629 fn bold() -> Option<PropertyValue> {
630 Some(PropertyValue::Literal(BOLD_WEIGHT.to_owned()))
631 }
632 fn hl() -> Option<PropertyValue> {
633 Some(PropertyValue::Literal(HIGHLIGHT_DEFAULT.to_owned()))
634 }
635
636 #[test]
637 fn empty_input_yields_no_spans() {
638 assert!(parse_inline_markdown("").is_empty());
639 }
640
641 #[test]
642 fn plain_text_single_span() {
643 let s = parse_inline_markdown("hello world");
644 assert_eq!(s.len(), 1);
645 assert_eq!(s[0].text, "hello world");
646 assert_eq!(s[0].font_weight, None);
647 assert_eq!(s[0].italic, None);
648 }
649
650 #[test]
651 fn bold_star_and_underscore() {
652 for src in ["**bold**", "__bold__"] {
653 let s = parse_inline_markdown(src);
654 assert_eq!(s.len(), 1, "src={src}");
655 assert_eq!(s[0].text, "bold");
656 assert_eq!(s[0].font_weight, bold());
657 }
658 }
659
660 #[test]
661 fn italic_star_and_underscore() {
662 for src in ["*it*", "_it_"] {
663 let s = parse_inline_markdown(src);
664 assert_eq!(s.len(), 1, "src={src}");
665 assert_eq!(s[0].text, "it");
666 assert_eq!(s[0].italic, Some(true));
667 assert_eq!(s[0].font_weight, None);
668 }
669 }
670
671 #[test]
672 fn strikethrough() {
673 let s = parse_inline_markdown("~~gone~~");
674 assert_eq!(s.len(), 1);
675 assert_eq!(s[0].text, "gone");
676 assert_eq!(s[0].strikethrough, Some(true));
677 }
678
679 #[test]
680 fn underline() {
681 let s = parse_inline_markdown("++under++");
682 assert_eq!(s.len(), 1);
683 assert_eq!(s[0].text, "under");
684 assert_eq!(s[0].underline, Some(true));
685 }
686
687 #[test]
688 fn highlight_uses_default_color() {
689 let s = parse_inline_markdown("==mark==");
690 assert_eq!(s.len(), 1);
691 assert_eq!(s[0].text, "mark");
692 assert_eq!(s[0].highlight, hl());
693 }
694
695 #[test]
696 fn code_span_basic() {
697 let s = parse_inline_markdown("`fn main()`");
698 assert_eq!(s.len(), 1);
699 assert_eq!(s[0].text, "fn main()");
700 assert_eq!(s[0].code, Some(true));
701 }
702
703 #[test]
704 fn code_span_is_verbatim_no_inner_parsing() {
705 let s = parse_inline_markdown("`**not bold** \\n _x_`");
706 assert_eq!(s.len(), 1);
707 assert_eq!(s[0].text, "**not bold** \\n _x_");
709 assert_eq!(s[0].code, Some(true));
710 assert_eq!(s[0].font_weight, None);
711 assert_eq!(s[0].italic, None);
712 }
713
714 #[test]
715 fn nested_bold_italic_single_span() {
716 let s = parse_inline_markdown("**_bold italic_**");
718 assert_eq!(s.len(), 1);
719 assert_eq!(s[0].text, "bold italic");
720 assert_eq!(s[0].font_weight, bold());
721 assert_eq!(s[0].italic, Some(true));
722 }
723
724 #[test]
725 fn nested_highlight_bold() {
726 let s = parse_inline_markdown("==**important**==");
728 assert_eq!(s.len(), 1);
729 assert_eq!(s[0].text, "important");
730 assert_eq!(s[0].highlight, hl());
731 assert_eq!(s[0].font_weight, bold());
732 }
733
734 #[test]
735 fn partial_nesting_splits_spans() {
736 let s = parse_inline_markdown("a **b _c_ d** e");
738 assert_eq!(texts(&s), "a b c d e");
739 let joined: Vec<(&str, bool, bool)> = s
741 .iter()
742 .map(|x| {
743 (
744 x.text.as_str(),
745 x.font_weight.is_some(),
746 x.italic == Some(true),
747 )
748 })
749 .collect();
750 assert_eq!(
751 joined,
752 vec![
753 ("a ", false, false),
754 ("b ", true, false),
755 ("c", true, true),
756 (" d", true, false),
757 (" e", false, false),
758 ]
759 );
760 }
761
762 #[test]
763 fn escapes_emit_literals() {
764 let s = parse_inline_markdown(r##"\*not italic\* \_ \~ \= \+ \` \[ \] \\"##);
765 assert_eq!(s.len(), 1);
766 assert_eq!(s[0].text, r##"*not italic* _ ~ = + ` [ ] \"##);
767 assert_eq!(s[0].italic, None);
768 assert_eq!(s[0].font_weight, None);
769 }
770
771 #[test]
772 fn backslash_before_normal_char_is_literal() {
773 let s = parse_inline_markdown(r##"a\b"##);
774 assert_eq!(texts(&s), r##"a\b"##);
775 }
776
777 #[test]
778 fn link_plain_label() {
779 let s = parse_inline_markdown("[Zenith](https://example.com)");
780 assert_eq!(s.len(), 1);
781 assert_eq!(s[0].text, "Zenith");
782 assert_eq!(s[0].link.as_deref(), Some("https://example.com"));
783 }
784
785 #[test]
786 fn link_label_with_inner_marks() {
787 let s = parse_inline_markdown("[**bold** link](u)");
788 assert_eq!(texts(&s), "bold link");
789 for sp in &s {
790 assert_eq!(sp.link.as_deref(), Some("u"));
791 }
792 assert_eq!(s[0].text, "bold");
793 assert_eq!(s[0].font_weight, bold());
794 assert_eq!(s[1].text, " link");
795 assert_eq!(s[1].font_weight, None);
796 }
797
798 #[test]
799 fn link_url_is_verbatim() {
800 let s = parse_inline_markdown("[x](http://a/**b**)");
802 assert_eq!(s.len(), 1);
803 assert_eq!(s[0].link.as_deref(), Some("http://a/**b**"));
804 }
805
806 #[test]
807 fn bracket_without_link_is_literal() {
808 let s = parse_inline_markdown("[just text]");
809 assert_eq!(texts(&s), "[just text]");
810 assert!(s.iter().all(|sp| sp.link.is_none()));
811 }
812
813 #[test]
814 fn bracket_with_label_but_no_paren_is_literal() {
815 let s = parse_inline_markdown("[label] (noturl)");
816 assert_eq!(texts(&s), "[label] (noturl)");
817 assert!(s.iter().all(|sp| sp.link.is_none()));
818 }
819
820 #[test]
821 fn dangling_bold_is_literal() {
822 let s = parse_inline_markdown("**oops");
823 assert_eq!(texts(&s), "**oops");
824 assert!(s.iter().all(|sp| sp.font_weight.is_none()));
825 }
826
827 #[test]
828 fn lone_star_is_literal() {
829 let s = parse_inline_markdown("a * b");
830 assert_eq!(texts(&s), "a * b");
831 assert!(s.iter().all(|sp| sp.italic.is_none()));
832 }
833
834 #[test]
835 fn unmatched_closing_underscore_is_literal() {
836 let s = parse_inline_markdown("end_");
837 assert_eq!(texts(&s), "end_");
838 assert!(s.iter().all(|sp| sp.italic.is_none()));
839 }
840
841 #[test]
842 fn whitespace_flanked_double_delim_is_literal_in_place() {
843 let s = parse_inline_markdown("a ** b");
846 assert_eq!(texts(&s), "a ** b");
847 assert!(s.iter().all(|sp| sp.font_weight.is_none()));
848 }
849
850 #[test]
851 fn dangling_opener_emits_literal_in_original_position() {
852 let s = parse_inline_markdown("x *unclosed");
855 assert_eq!(texts(&s), "x *unclosed");
856 assert!(s.iter().all(|sp| sp.italic.is_none()));
857 let joined = texts(&s);
859 let star = joined.find('*').expect("literal star present");
860 assert!(joined[star + 1..].starts_with("unclosed"));
861 }
862
863 #[test]
864 fn opener_needs_following_nonspace() {
865 let s = parse_inline_markdown("* a*");
867 assert_eq!(texts(&s), "* a*");
868 assert!(s.iter().all(|sp| sp.italic.is_none()));
869 }
870
871 #[test]
872 fn closer_needs_preceding_nonspace() {
873 let s = parse_inline_markdown("*a *");
876 assert_eq!(texts(&s), "*a *");
877 assert!(s.iter().all(|sp| sp.italic.is_none()));
878 }
879
880 #[test]
881 fn same_delim_nested_pairs_keep_marks() {
882 let s = parse_inline_markdown("**a **b** c**");
885 assert_eq!(texts(&s), "a b c");
886 assert!(s.iter().all(|sp| sp.font_weight == bold()));
887 }
888
889 #[test]
890 fn no_character_loss_consumes_only_delimiters() {
891 let s = parse_inline_markdown("**a** _b_ ~~c~~ ++d++ ==e==");
893 assert_eq!(texts(&s), "a b c d e");
894 }
895
896 #[test]
897 fn no_character_loss_with_escapes() {
898 let s = parse_inline_markdown(r##"x \* y"##);
900 assert_eq!(texts(&s), "x * y");
901 }
902
903 #[test]
904 fn determinism_parse_twice_equal() {
905 let src = "a **b _c_** ~~d~~ `e` [f](g) ==h== \\* ++i++";
906 let a = parse_inline_markdown(src);
907 let b = parse_inline_markdown(src);
908 assert_eq!(a, b);
909 }
910
911 #[test]
912 fn combined_all_marks() {
913 let s = parse_inline_markdown("==++~~**_x_**~~++==");
914 assert_eq!(s.len(), 1);
915 assert_eq!(s[0].text, "x");
916 assert_eq!(s[0].highlight, hl());
917 assert_eq!(s[0].underline, Some(true));
918 assert_eq!(s[0].strikethrough, Some(true));
919 assert_eq!(s[0].font_weight, bold());
920 assert_eq!(s[0].italic, Some(true));
921 }
922
923 #[test]
924 fn code_inside_text_run() {
925 let s = parse_inline_markdown("use `cargo build` now");
926 assert_eq!(texts(&s), "use cargo build now");
927 assert_eq!(s[0].text, "use ");
928 assert_eq!(s[0].code, None);
929 assert_eq!(s[1].text, "cargo build");
930 assert_eq!(s[1].code, Some(true));
931 assert_eq!(s[2].text, " now");
932 }
933
934 #[test]
935 fn unclosed_code_is_literal_backtick() {
936 let s = parse_inline_markdown("a `b c");
937 assert_eq!(texts(&s), "a `b c");
938 assert!(s.iter().all(|sp| sp.code.is_none()));
939 }
940
941 #[test]
942 fn adjacent_same_marks_coalesce() {
943 let s = parse_inline_markdown("**a****b**");
945 assert_eq!(s.len(), 1);
946 assert_eq!(s[0].text, "ab");
947 assert_eq!(s[0].font_weight, bold());
948 }
949}