1#[derive(Debug, Clone, PartialEq, Eq, Default)]
34pub struct SpanAttributes {
35 pub font_family: Option<String>,
37 pub size: Option<String>,
39 pub foreground: Option<String>,
41 pub background: Option<String>,
43 pub weight: Option<String>,
45 pub style: Option<String>,
47}
48
49#[derive(Debug, Clone, PartialEq)]
55pub enum TextSpan {
56 Plain(String),
58 Bold(Vec<TextSpan>),
60 Italic(Vec<TextSpan>),
62 Highlight(Vec<TextSpan>),
64 Comment(Vec<TextSpan>),
66 Span(SpanAttributes, Vec<TextSpan>),
68}
69
70impl TextSpan {
71 #[must_use]
75 pub fn plain_text(&self) -> String {
76 match self {
77 TextSpan::Plain(s) => s.clone(),
78 TextSpan::Bold(children)
79 | TextSpan::Italic(children)
80 | TextSpan::Highlight(children)
81 | TextSpan::Comment(children)
82 | TextSpan::Span(_, children) => children.iter().map(TextSpan::plain_text).collect(),
83 }
84 }
85}
86
87#[must_use]
92pub fn has_inline_markup(text: &str) -> bool {
93 let mut remaining = text;
94 while let Some(pos) = remaining.find('<') {
95 let after = &remaining[pos + 1..];
96 if tag_name_at_start(after).is_some() {
97 return true;
98 }
99 if span_tag_at_start(after).is_some() {
101 return true;
102 }
103 if let Some(rest) = after.strip_prefix('/') {
105 if tag_name_at_start(rest).is_some() {
106 return true;
107 }
108 if rest.len() >= 5 && rest[..5].eq_ignore_ascii_case("span>") {
110 return true;
111 }
112 }
113 remaining = &remaining[pos + 1..];
114 }
115 false
116}
117
118#[must_use]
146pub fn parse_inline_markup(text: &str) -> Vec<TextSpan> {
147 if !has_inline_markup(text) {
148 if text.is_empty() {
149 return Vec::new();
150 }
151 return vec![TextSpan::Plain(text.to_string())];
152 }
153
154 let mut parser = InlineMarkupParser::new(text);
155 let spans = parser.parse_spans(&[]);
156
157 if spans.is_empty() && !text.is_empty() {
159 return vec![TextSpan::Plain(text.to_string())];
160 }
161
162 normalize_spans(spans)
163}
164
165#[must_use]
170pub fn spans_to_plain_text(spans: &[TextSpan]) -> String {
171 spans.iter().map(TextSpan::plain_text).collect()
172}
173
174#[derive(Debug, Clone, PartialEq, Eq)]
180enum TagType {
181 Bold,
182 Italic,
183 Highlight,
184 Comment,
185 Span(SpanAttributes),
186}
187
188fn tag_name_at_start(s: &str) -> Option<(TagType, usize)> {
196 let tags: &[(&str, TagType)] = &[
198 ("highlight>", TagType::Highlight),
199 ("comment>", TagType::Comment),
200 ("italic>", TagType::Italic),
201 ("bold>", TagType::Bold),
202 ("b>", TagType::Bold),
203 ("i>", TagType::Italic),
204 ];
205
206 for (name, tag_type) in tags {
207 if s.len() >= name.len() {
208 let candidate = &s[..name.len()];
210 if candidate.eq_ignore_ascii_case(name) {
211 return Some((tag_type.clone(), name.len()));
212 }
213 }
214 }
215
216 None
217}
218
219fn span_tag_at_start(s: &str) -> Option<(SpanAttributes, usize)> {
224 if s.len() < 4 {
226 return None;
227 }
228 if !s[..4].eq_ignore_ascii_case("span") {
229 return None;
230 }
231
232 let after_name = &s[4..];
233
234 if after_name.starts_with('>') {
236 return Some((SpanAttributes::default(), 5)); }
238
239 if !after_name.starts_with(|c: char| c.is_ascii_whitespace()) {
241 return None;
242 }
243
244 let closing = s.find('>')?;
246
247 let attr_str = &s[4..closing].trim();
249 let attrs = parse_span_attributes(attr_str);
250
251 Some((attrs, closing + 1))
252}
253
254fn parse_span_attributes(s: &str) -> SpanAttributes {
256 let mut attrs = SpanAttributes::default();
257 let mut remaining = s.trim();
258
259 while !remaining.is_empty() {
260 remaining = remaining.trim_start();
262 if remaining.is_empty() {
263 break;
264 }
265
266 let eq_pos = match remaining.find('=') {
268 Some(pos) => pos,
269 None => break,
270 };
271
272 let key = remaining[..eq_pos].trim();
273 let after_eq = remaining[eq_pos + 1..].trim_start();
274
275 let (quote_char, after_quote) = if let Some(rest) = after_eq.strip_prefix('"') {
277 ('"', rest)
278 } else if let Some(rest) = after_eq.strip_prefix('\'') {
279 ('\'', rest)
280 } else {
281 break;
283 };
284
285 let end_quote = match after_quote.find(quote_char) {
287 Some(pos) => pos,
288 None => break,
289 };
290
291 let value = &after_quote[..end_quote];
292
293 let key_lower = key.to_ascii_lowercase();
295 match key_lower.as_str() {
296 "font_family" => attrs.font_family = Some(value.to_string()),
297 "size" => attrs.size = Some(value.to_string()),
298 "foreground" | "color" => attrs.foreground = Some(value.to_string()),
299 "background" => attrs.background = Some(value.to_string()),
300 "weight" => attrs.weight = Some(value.to_string()),
301 "style" => attrs.style = Some(value.to_string()),
302 _ => {} }
304
305 remaining = &after_quote[end_quote + 1..];
306 }
307
308 attrs
309}
310
311fn closing_tag_at_start(s: &str) -> Option<(TagType, usize)> {
315 if s.len() >= 5 && s[..5].eq_ignore_ascii_case("span>") {
317 return Some((TagType::Span(SpanAttributes::default()), 5));
318 }
319 tag_name_at_start(s)
320}
321
322fn tag_type_to_span(tag_type: TagType, children: Vec<TextSpan>) -> TextSpan {
328 match tag_type {
329 TagType::Bold => TextSpan::Bold(children),
330 TagType::Italic => TextSpan::Italic(children),
331 TagType::Highlight => TextSpan::Highlight(children),
332 TagType::Comment => TextSpan::Comment(children),
333 TagType::Span(attrs) => TextSpan::Span(attrs, children),
334 }
335}
336
337fn closers_contain(closers: &[TagType], tag: &TagType) -> bool {
342 closers.iter().any(|c| match (c, tag) {
343 (TagType::Span(_), TagType::Span(_)) => true,
344 (a, b) => a == b,
345 })
346}
347
348const MAX_NESTING_DEPTH: usize = 32;
353
354struct InlineMarkupParser<'a> {
356 input: &'a str,
358 pos: usize,
360}
361
362impl<'a> InlineMarkupParser<'a> {
363 fn new(input: &'a str) -> Self {
365 Self { input, pos: 0 }
366 }
367
368 fn remaining(&self) -> &'a str {
370 &self.input[self.pos..]
371 }
372
373 fn parse_spans(&mut self, expected_closers: &[TagType]) -> Vec<TextSpan> {
380 let mut spans: Vec<TextSpan> = Vec::new();
381 let mut plain_start = self.pos;
382
383 while self.pos < self.input.len() {
384 let remaining = self.remaining();
385
386 if remaining.starts_with('<') {
387 let after_lt = &self.input[self.pos + 1..];
388
389 if after_lt.starts_with('/') {
391 let after_slash = &self.input[self.pos + 2..];
392 if let Some((tag_type, name_len)) = closing_tag_at_start(after_slash) {
393 if closers_contain(expected_closers, &tag_type) {
396 if plain_start < self.pos {
398 spans.push(TextSpan::Plain(
399 self.input[plain_start..self.pos].to_string(),
400 ));
401 }
402 self.pos += 2 + name_len;
404 return spans;
405 }
406 self.pos += 1;
408 continue;
409 }
410 self.pos += 1;
412 continue;
413 }
414
415 if expected_closers.len() >= MAX_NESTING_DEPTH {
417 self.pos += 1;
419 continue;
420 }
421
422 if let Some((attrs, tag_len)) = span_tag_at_start(after_lt) {
425 if plain_start < self.pos {
427 spans.push(TextSpan::Plain(
428 self.input[plain_start..self.pos].to_string(),
429 ));
430 }
431
432 self.pos += 1 + tag_len;
434
435 let mut closers = expected_closers.to_vec();
436 closers.push(TagType::Span(attrs.clone()));
437 let children = self.parse_spans(&closers);
438 spans.push(TextSpan::Span(attrs, children));
439
440 plain_start = self.pos;
441 continue;
442 }
443
444 if let Some((tag_type, name_len)) = tag_name_at_start(after_lt) {
446 if plain_start < self.pos {
448 spans.push(TextSpan::Plain(
449 self.input[plain_start..self.pos].to_string(),
450 ));
451 }
452
453 self.pos += 1 + name_len;
455
456 let mut closers = expected_closers.to_vec();
460 closers.push(tag_type.clone());
461 let children = self.parse_spans(&closers);
462 let span = tag_type_to_span(tag_type, children);
463 spans.push(span);
464
465 plain_start = self.pos;
466 continue;
467 }
468
469 self.pos += 1;
471 continue;
472 }
473
474 let ch_len = remaining.chars().next().map_or(1, |c| c.len_utf8());
475 self.pos += ch_len;
476 }
477
478 if plain_start < self.pos {
480 spans.push(TextSpan::Plain(
481 self.input[plain_start..self.pos].to_string(),
482 ));
483 }
484
485 spans
486 }
487}
488
489fn normalize_spans(spans: Vec<TextSpan>) -> Vec<TextSpan> {
491 let mut result: Vec<TextSpan> = Vec::new();
492
493 for span in spans {
494 match span {
495 TextSpan::Plain(text) => {
496 if let Some(TextSpan::Plain(prev)) = result.last_mut() {
497 prev.push_str(&text);
498 } else {
499 result.push(TextSpan::Plain(text));
500 }
501 }
502 TextSpan::Bold(children) => {
503 result.push(TextSpan::Bold(normalize_spans(children)));
504 }
505 TextSpan::Italic(children) => {
506 result.push(TextSpan::Italic(normalize_spans(children)));
507 }
508 TextSpan::Highlight(children) => {
509 result.push(TextSpan::Highlight(normalize_spans(children)));
510 }
511 TextSpan::Comment(children) => {
512 result.push(TextSpan::Comment(normalize_spans(children)));
513 }
514 TextSpan::Span(attrs, children) => {
515 result.push(TextSpan::Span(attrs, normalize_spans(children)));
516 }
517 }
518 }
519
520 result
521}
522
523#[cfg(test)]
528mod tests {
529 use super::*;
530
531 #[test]
534 fn no_markup_plain_text() {
535 assert!(!has_inline_markup("Hello world"));
536 }
537
538 #[test]
539 fn no_markup_with_angle_bracket() {
540 assert!(!has_inline_markup("x < y"));
541 }
542
543 #[test]
544 fn has_bold_tag() {
545 assert!(has_inline_markup("<b>bold</b>"));
546 }
547
548 #[test]
549 fn has_italic_tag() {
550 assert!(has_inline_markup("<i>italic</i>"));
551 }
552
553 #[test]
554 fn has_long_bold_tag() {
555 assert!(has_inline_markup("<bold>text</bold>"));
556 }
557
558 #[test]
559 fn has_long_italic_tag() {
560 assert!(has_inline_markup("<italic>text</italic>"));
561 }
562
563 #[test]
564 fn has_highlight_tag() {
565 assert!(has_inline_markup("<highlight>text</highlight>"));
566 }
567
568 #[test]
569 fn has_comment_tag() {
570 assert!(has_inline_markup("<comment>text</comment>"));
571 }
572
573 #[test]
576 fn parse_plain_text() {
577 let spans = parse_inline_markup("Hello world");
578 assert_eq!(spans, vec![TextSpan::Plain("Hello world".to_string())]);
579 }
580
581 #[test]
582 fn parse_empty_text() {
583 let spans = parse_inline_markup("");
584 assert_eq!(spans, Vec::<TextSpan>::new());
585 }
586
587 #[test]
590 fn parse_bold_short() {
591 let spans = parse_inline_markup("<b>bold</b>");
592 assert_eq!(
593 spans,
594 vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]
595 );
596 }
597
598 #[test]
599 fn parse_bold_long() {
600 let spans = parse_inline_markup("<bold>bold</bold>");
601 assert_eq!(
602 spans,
603 vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]
604 );
605 }
606
607 #[test]
608 fn parse_italic_short() {
609 let spans = parse_inline_markup("<i>italic</i>");
610 assert_eq!(
611 spans,
612 vec![TextSpan::Italic(vec![TextSpan::Plain(
613 "italic".to_string()
614 )])]
615 );
616 }
617
618 #[test]
619 fn parse_italic_long() {
620 let spans = parse_inline_markup("<italic>italic</italic>");
621 assert_eq!(
622 spans,
623 vec![TextSpan::Italic(vec![TextSpan::Plain(
624 "italic".to_string()
625 )])]
626 );
627 }
628
629 #[test]
630 fn parse_highlight() {
631 let spans = parse_inline_markup("<highlight>highlighted</highlight>");
632 assert_eq!(
633 spans,
634 vec![TextSpan::Highlight(vec![TextSpan::Plain(
635 "highlighted".to_string()
636 )])]
637 );
638 }
639
640 #[test]
641 fn parse_comment() {
642 let spans = parse_inline_markup("<comment>commented</comment>");
643 assert_eq!(
644 spans,
645 vec![TextSpan::Comment(vec![TextSpan::Plain(
646 "commented".to_string()
647 )])]
648 );
649 }
650
651 #[test]
654 fn parse_text_before_and_after_tag() {
655 let spans = parse_inline_markup("Hello <b>world</b> foo");
656 assert_eq!(
657 spans,
658 vec![
659 TextSpan::Plain("Hello ".to_string()),
660 TextSpan::Bold(vec![TextSpan::Plain("world".to_string())]),
661 TextSpan::Plain(" foo".to_string()),
662 ]
663 );
664 }
665
666 #[test]
667 fn parse_multiple_tags() {
668 let spans = parse_inline_markup("<b>bold</b> and <i>italic</i>");
669 assert_eq!(
670 spans,
671 vec![
672 TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())]),
673 TextSpan::Plain(" and ".to_string()),
674 TextSpan::Italic(vec![TextSpan::Plain("italic".to_string())]),
675 ]
676 );
677 }
678
679 #[test]
682 fn parse_nested_bold_italic() {
683 let spans = parse_inline_markup("<b><i>both</i></b>");
684 assert_eq!(
685 spans,
686 vec![TextSpan::Bold(vec![TextSpan::Italic(vec![
687 TextSpan::Plain("both".to_string())
688 ])])]
689 );
690 }
691
692 #[test]
693 fn parse_nested_with_surrounding_text() {
694 let spans = parse_inline_markup("<b>bold <i>and italic</i> text</b>");
695 assert_eq!(
696 spans,
697 vec![TextSpan::Bold(vec![
698 TextSpan::Plain("bold ".to_string()),
699 TextSpan::Italic(vec![TextSpan::Plain("and italic".to_string())]),
700 TextSpan::Plain(" text".to_string()),
701 ])]
702 );
703 }
704
705 #[test]
708 fn parse_case_insensitive_tags() {
709 let spans = parse_inline_markup("<B>bold</B>");
710 assert_eq!(
711 spans,
712 vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]
713 );
714 }
715
716 #[test]
717 fn parse_mixed_case_tags() {
718 let spans = parse_inline_markup("<Bold>text</Bold>");
719 assert_eq!(
720 spans,
721 vec![TextSpan::Bold(vec![TextSpan::Plain("text".to_string())])]
722 );
723 }
724
725 #[test]
728 fn unclosed_tag_wraps_remaining_text() {
729 let spans = parse_inline_markup("<b>unclosed");
731 assert_eq!(
732 spans,
733 vec![TextSpan::Bold(vec![TextSpan::Plain(
734 "unclosed".to_string()
735 )])]
736 );
737 }
738
739 #[test]
740 fn depth_limit_prevents_stack_overflow() {
741 let open_tags: String = "<b>".repeat(MAX_NESTING_DEPTH + 1);
743 let close_tags: String = "</b>".repeat(MAX_NESTING_DEPTH + 1);
744 let input = format!("{}text{}", open_tags, close_tags);
745 let spans = parse_inline_markup(&input);
747 assert!(!spans.is_empty());
748 }
749
750 #[test]
751 fn unrecognized_tag_treated_as_plain() {
752 let spans = parse_inline_markup("<unknown>text</unknown>");
753 assert_eq!(
754 spans,
755 vec![TextSpan::Plain("<unknown>text</unknown>".to_string())]
756 );
757 }
758
759 #[test]
760 fn lone_angle_bracket_is_plain() {
761 let spans = parse_inline_markup("x < y");
762 assert_eq!(spans, vec![TextSpan::Plain("x < y".to_string())]);
763 }
764
765 #[test]
766 fn stray_closing_tag_is_plain() {
767 let spans = parse_inline_markup("text </b> more");
768 assert_eq!(spans, vec![TextSpan::Plain("text </b> more".to_string())]);
769 }
770
771 #[test]
774 fn has_span_tag_no_attrs() {
775 assert!(has_inline_markup("<span>text</span>"));
776 }
777
778 #[test]
779 fn has_span_tag_with_attrs() {
780 assert!(has_inline_markup(r#"<span foreground="red">text</span>"#));
781 }
782
783 #[test]
784 fn has_span_closing_tag_only() {
785 assert!(has_inline_markup("text </span> more"));
786 }
787
788 #[test]
791 fn parse_span_no_attrs() {
792 let spans = parse_inline_markup("<span>styled</span>");
793 assert_eq!(
794 spans,
795 vec![TextSpan::Span(
796 SpanAttributes::default(),
797 vec![TextSpan::Plain("styled".to_string())]
798 )]
799 );
800 }
801
802 #[test]
803 fn parse_span_single_attr() {
804 let spans = parse_inline_markup(r#"<span foreground="red">text</span>"#);
805 assert_eq!(
806 spans,
807 vec![TextSpan::Span(
808 SpanAttributes {
809 foreground: Some("red".to_string()),
810 ..Default::default()
811 },
812 vec![TextSpan::Plain("text".to_string())]
813 )]
814 );
815 }
816
817 #[test]
818 fn parse_span_multiple_attrs() {
819 let spans = parse_inline_markup(
820 r#"<span font_family="Serif" size="12" foreground="blue" background="yellow" weight="bold" style="italic">text</span>"#,
821 );
822 assert_eq!(
823 spans,
824 vec![TextSpan::Span(
825 SpanAttributes {
826 font_family: Some("Serif".to_string()),
827 size: Some("12".to_string()),
828 foreground: Some("blue".to_string()),
829 background: Some("yellow".to_string()),
830 weight: Some("bold".to_string()),
831 style: Some("italic".to_string()),
832 },
833 vec![TextSpan::Plain("text".to_string())]
834 )]
835 );
836 }
837
838 #[test]
839 fn parse_span_single_quoted_attrs() {
840 let spans = parse_inline_markup("<span foreground='green'>text</span>");
841 assert_eq!(
842 spans,
843 vec![TextSpan::Span(
844 SpanAttributes {
845 foreground: Some("green".to_string()),
846 ..Default::default()
847 },
848 vec![TextSpan::Plain("text".to_string())]
849 )]
850 );
851 }
852
853 #[test]
854 fn parse_span_color_alias() {
855 let spans = parse_inline_markup(r#"<span color="red">text</span>"#);
856 assert_eq!(
857 spans,
858 vec![TextSpan::Span(
859 SpanAttributes {
860 foreground: Some("red".to_string()),
861 ..Default::default()
862 },
863 vec![TextSpan::Plain("text".to_string())]
864 )]
865 );
866 }
867
868 #[test]
869 fn parse_span_case_insensitive() {
870 let spans = parse_inline_markup(r#"<SPAN Foreground="red">text</SPAN>"#);
871 assert_eq!(
872 spans,
873 vec![TextSpan::Span(
874 SpanAttributes {
875 foreground: Some("red".to_string()),
876 ..Default::default()
877 },
878 vec![TextSpan::Plain("text".to_string())]
879 )]
880 );
881 }
882
883 #[test]
884 fn parse_span_nested_inside_bold() {
885 let spans = parse_inline_markup(r#"<b><span foreground="red">text</span></b>"#);
886 assert_eq!(
887 spans,
888 vec![TextSpan::Bold(vec![TextSpan::Span(
889 SpanAttributes {
890 foreground: Some("red".to_string()),
891 ..Default::default()
892 },
893 vec![TextSpan::Plain("text".to_string())]
894 )])]
895 );
896 }
897
898 #[test]
899 fn parse_bold_nested_inside_span() {
900 let spans = parse_inline_markup(r#"<span foreground="red"><b>text</b></span>"#);
901 assert_eq!(
902 spans,
903 vec![TextSpan::Span(
904 SpanAttributes {
905 foreground: Some("red".to_string()),
906 ..Default::default()
907 },
908 vec![TextSpan::Bold(vec![TextSpan::Plain("text".to_string())])]
909 )]
910 );
911 }
912
913 #[test]
914 fn parse_span_with_surrounding_text() {
915 let spans = parse_inline_markup(r#"Hello <span foreground="red">world</span> foo"#);
916 assert_eq!(
917 spans,
918 vec![
919 TextSpan::Plain("Hello ".to_string()),
920 TextSpan::Span(
921 SpanAttributes {
922 foreground: Some("red".to_string()),
923 ..Default::default()
924 },
925 vec![TextSpan::Plain("world".to_string())]
926 ),
927 TextSpan::Plain(" foo".to_string()),
928 ]
929 );
930 }
931
932 #[test]
933 fn parse_span_unclosed_wraps_remaining() {
934 let spans = parse_inline_markup(r#"<span foreground="red">unclosed"#);
935 assert_eq!(
936 spans,
937 vec![TextSpan::Span(
938 SpanAttributes {
939 foreground: Some("red".to_string()),
940 ..Default::default()
941 },
942 vec![TextSpan::Plain("unclosed".to_string())]
943 )]
944 );
945 }
946
947 #[test]
948 fn parse_span_unknown_attrs_ignored() {
949 let spans = parse_inline_markup(r#"<span unknown="val" foreground="red">text</span>"#);
950 assert_eq!(
951 spans,
952 vec![TextSpan::Span(
953 SpanAttributes {
954 foreground: Some("red".to_string()),
955 ..Default::default()
956 },
957 vec![TextSpan::Plain("text".to_string())]
958 )]
959 );
960 }
961
962 #[test]
965 fn plain_text_extraction_simple() {
966 let spans = vec![TextSpan::Plain("hello".to_string())];
967 assert_eq!(spans_to_plain_text(&spans), "hello");
968 }
969
970 #[test]
971 fn plain_text_extraction_with_markup() {
972 let spans = vec![
973 TextSpan::Plain("Hello ".to_string()),
974 TextSpan::Bold(vec![TextSpan::Plain("world".to_string())]),
975 ];
976 assert_eq!(spans_to_plain_text(&spans), "Hello world");
977 }
978
979 #[test]
980 fn plain_text_extraction_nested() {
981 let spans = vec![TextSpan::Bold(vec![TextSpan::Italic(vec![
982 TextSpan::Plain("nested".to_string()),
983 ])])];
984 assert_eq!(spans_to_plain_text(&spans), "nested");
985 }
986
987 #[test]
988 fn plain_text_extraction_span() {
989 let spans = vec![TextSpan::Span(
990 SpanAttributes {
991 foreground: Some("red".to_string()),
992 ..Default::default()
993 },
994 vec![TextSpan::Plain("colored".to_string())],
995 )];
996 assert_eq!(spans_to_plain_text(&spans), "colored");
997 }
998}