Skip to main content

chordsketch_core/
inline_markup.rs

1//! Inline markup parser for ChordPro lyrics text.
2//!
3//! ChordPro lyrics can contain inline markup tags for formatting:
4//!
5//! - `<bold>text</bold>` or `<b>text</b>` — bold text
6//! - `<italic>text</italic>` or `<i>text</i>` — italic text
7//! - `<highlight>text</highlight>` — highlighted text
8//! - `<comment>text</comment>` — comment-styled text
9//! - `<span font_family="..." size="..." foreground="...">text</span>` — styled text
10//!
11//! Tags may be nested: `<b><i>text</i></b>`.
12//!
13//! Unclosed tags wrap all remaining text (per ChordPro spec). Unrecognized or
14//! malformed tags are treated as plain text (graceful degradation).
15//!
16//! # Examples
17//!
18//! ```
19//! use chordsketch_core::inline_markup::{TextSpan, parse_inline_markup};
20//!
21//! let spans = parse_inline_markup("<b>Hello</b> world");
22//! assert_eq!(spans, vec![
23//!     TextSpan::Bold(vec![TextSpan::Plain("Hello".to_string())]),
24//!     TextSpan::Plain(" world".to_string()),
25//! ]);
26//! ```
27
28/// Attributes for a `<span>` inline markup tag.
29///
30/// Each field corresponds to a style attribute that can be specified on the
31/// `<span>` tag. All fields are optional — only attributes present in the
32/// source markup are populated.
33#[derive(Debug, Clone, PartialEq, Eq, Default)]
34pub struct SpanAttributes {
35    /// Font family name (e.g., `"Serif"`, `"Monospace"`).
36    pub font_family: Option<String>,
37    /// Font size (e.g., `"12"`, `"120%"`).
38    pub size: Option<String>,
39    /// Foreground (text) color (e.g., `"red"`, `"#FF0000"`).
40    pub foreground: Option<String>,
41    /// Background color (e.g., `"yellow"`, `"#FFFF00"`).
42    pub background: Option<String>,
43    /// Font weight (e.g., `"bold"`, `"normal"`).
44    pub weight: Option<String>,
45    /// Font style (e.g., `"italic"`, `"normal"`).
46    pub style: Option<String>,
47}
48
49/// A segment of text that may contain inline markup formatting.
50///
51/// `TextSpan` represents a tree structure where each node is either plain text
52/// or a formatting tag wrapping child spans. This allows arbitrary nesting of
53/// inline markup.
54#[derive(Debug, Clone, PartialEq)]
55pub enum TextSpan {
56    /// Plain text with no formatting.
57    Plain(String),
58    /// Bold text (`<bold>` / `<b>`).
59    Bold(Vec<TextSpan>),
60    /// Italic text (`<italic>` / `<i>`).
61    Italic(Vec<TextSpan>),
62    /// Highlighted text (`<highlight>`).
63    Highlight(Vec<TextSpan>),
64    /// Comment-styled text (`<comment>`).
65    Comment(Vec<TextSpan>),
66    /// Styled text with attributes (`<span ...>`).
67    Span(SpanAttributes, Vec<TextSpan>),
68}
69
70impl TextSpan {
71    /// Extracts the plain text content of this span, stripping all formatting.
72    ///
73    /// This recursively collects all `Plain` text within nested spans.
74    #[must_use]
75    pub fn plain_text(&self) -> String {
76        match self {
77            TextSpan::Plain(s) => s.clone(),
78            TextSpan::Bold(children)
79            | TextSpan::Italic(children)
80            | TextSpan::Highlight(children)
81            | TextSpan::Comment(children)
82            | TextSpan::Span(_, children) => children.iter().map(TextSpan::plain_text).collect(),
83        }
84    }
85}
86
87/// Returns `true` if the input text contains any inline markup tags.
88///
89/// This performs a quick scan for `<` followed by a known tag name and `>`.
90/// It is used to decide whether full markup parsing is needed.
91#[must_use]
92pub fn has_inline_markup(text: &str) -> bool {
93    let mut remaining = text;
94    while let Some(pos) = remaining.find('<') {
95        let after = &remaining[pos + 1..];
96        if tag_name_at_start(after).is_some() {
97            return true;
98        }
99        // Check for <span with attributes
100        if span_tag_at_start(after).is_some() {
101            return true;
102        }
103        // Also check for closing tags: </tagname>
104        if let Some(rest) = after.strip_prefix('/') {
105            if tag_name_at_start(rest).is_some() {
106                return true;
107            }
108            // Check for </span>
109            if rest.len() >= 5 && rest[..5].eq_ignore_ascii_case("span>") {
110                return true;
111            }
112        }
113        remaining = &remaining[pos + 1..];
114    }
115    false
116}
117
118/// Parses inline markup tags from text and returns a list of [`TextSpan`]s.
119///
120/// When the text contains no markup, a single `TextSpan::Plain` is returned.
121/// Unclosed tags wrap all remaining text (per ChordPro spec). Unrecognized
122/// tags are treated as plain text.
123///
124/// # Examples
125///
126/// ```
127/// use chordsketch_core::inline_markup::{TextSpan, parse_inline_markup};
128///
129/// // No markup
130/// let spans = parse_inline_markup("plain text");
131/// assert_eq!(spans, vec![TextSpan::Plain("plain text".to_string())]);
132///
133/// // Bold markup
134/// let spans = parse_inline_markup("<b>bold</b>");
135/// assert_eq!(spans, vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]);
136///
137/// // Nested markup
138/// let spans = parse_inline_markup("<b><i>both</i></b>");
139/// assert_eq!(spans, vec![
140///     TextSpan::Bold(vec![
141///         TextSpan::Italic(vec![TextSpan::Plain("both".to_string())])
142///     ])
143/// ]);
144/// ```
145#[must_use]
146pub fn parse_inline_markup(text: &str) -> Vec<TextSpan> {
147    if !has_inline_markup(text) {
148        if text.is_empty() {
149            return Vec::new();
150        }
151        return vec![TextSpan::Plain(text.to_string())];
152    }
153
154    let mut parser = InlineMarkupParser::new(text);
155    let spans = parser.parse_spans(&[]);
156
157    // If parsing resulted in no spans but there was text, return plain text
158    if spans.is_empty() && !text.is_empty() {
159        return vec![TextSpan::Plain(text.to_string())];
160    }
161
162    normalize_spans(spans)
163}
164
165/// Extracts the plain text content from a list of spans, stripping all markup.
166///
167/// This is used to populate the backward-compatible `text` field in
168/// `LyricsSegment` when markup is present.
169#[must_use]
170pub fn spans_to_plain_text(spans: &[TextSpan]) -> String {
171    spans.iter().map(TextSpan::plain_text).collect()
172}
173
174// ---------------------------------------------------------------------------
175// Tag types
176// ---------------------------------------------------------------------------
177
178/// The recognized inline markup tag types.
179#[derive(Debug, Clone, PartialEq, Eq)]
180enum TagType {
181    Bold,
182    Italic,
183    Highlight,
184    Comment,
185    Span(SpanAttributes),
186}
187
188/// Attempts to match a known tag name at the start of the string.
189///
190/// Returns the tag type and the length consumed (including the closing `>`).
191/// The input should start after the `<` (or `</`).
192///
193/// Note: This does NOT match `<span ...>` tags — those are handled by
194/// [`span_tag_at_start`] because they require attribute parsing.
195fn tag_name_at_start(s: &str) -> Option<(TagType, usize)> {
196    // Try each tag name (longest first to avoid prefix conflicts)
197    let tags: &[(&str, TagType)] = &[
198        ("highlight>", TagType::Highlight),
199        ("comment>", TagType::Comment),
200        ("italic>", TagType::Italic),
201        ("bold>", TagType::Bold),
202        ("b>", TagType::Bold),
203        ("i>", TagType::Italic),
204    ];
205
206    for (name, tag_type) in tags {
207        if s.len() >= name.len() {
208            // Case-insensitive comparison
209            let candidate = &s[..name.len()];
210            if candidate.eq_ignore_ascii_case(name) {
211                return Some((tag_type.clone(), name.len()));
212            }
213        }
214    }
215
216    None
217}
218
219/// Attempts to match `<span ...>` or `<span>` at the start of the string.
220///
221/// The input should start after the `<`. Returns `SpanAttributes` and the
222/// length consumed (including the closing `>`).
223fn span_tag_at_start(s: &str) -> Option<(SpanAttributes, usize)> {
224    // Must start with "span" (case-insensitive)
225    if s.len() < 4 {
226        return None;
227    }
228    if !s[..4].eq_ignore_ascii_case("span") {
229        return None;
230    }
231
232    let after_name = &s[4..];
233
234    // <span> with no attributes
235    if after_name.starts_with('>') {
236        return Some((SpanAttributes::default(), 5)); // "span>"
237    }
238
239    // Must be followed by whitespace for attributes
240    if !after_name.starts_with(|c: char| c.is_ascii_whitespace()) {
241        return None;
242    }
243
244    // Find the closing '>'
245    let closing = s.find('>')?;
246
247    // Parse attributes from the region between "span " and ">"
248    let attr_str = &s[4..closing].trim();
249    let attrs = parse_span_attributes(attr_str);
250
251    Some((attrs, closing + 1))
252}
253
254/// Parses key="value" or key='value' attribute pairs from a span tag.
255fn parse_span_attributes(s: &str) -> SpanAttributes {
256    let mut attrs = SpanAttributes::default();
257    let mut remaining = s.trim();
258
259    while !remaining.is_empty() {
260        // Skip whitespace
261        remaining = remaining.trim_start();
262        if remaining.is_empty() {
263            break;
264        }
265
266        // Find '='
267        let eq_pos = match remaining.find('=') {
268            Some(pos) => pos,
269            None => break,
270        };
271
272        let key = remaining[..eq_pos].trim();
273        let after_eq = remaining[eq_pos + 1..].trim_start();
274
275        // Value must be quoted
276        let (quote_char, after_quote) = if let Some(rest) = after_eq.strip_prefix('"') {
277            ('"', rest)
278        } else if let Some(rest) = after_eq.strip_prefix('\'') {
279            ('\'', rest)
280        } else {
281            // No quote — skip to next whitespace or end
282            break;
283        };
284
285        // Find closing quote
286        let end_quote = match after_quote.find(quote_char) {
287            Some(pos) => pos,
288            None => break,
289        };
290
291        let value = &after_quote[..end_quote];
292
293        // Set the attribute (case-insensitive key matching)
294        let key_lower = key.to_ascii_lowercase();
295        match key_lower.as_str() {
296            "font_family" => attrs.font_family = Some(value.to_string()),
297            "size" => attrs.size = Some(value.to_string()),
298            "foreground" | "color" => attrs.foreground = Some(value.to_string()),
299            "background" => attrs.background = Some(value.to_string()),
300            "weight" => attrs.weight = Some(value.to_string()),
301            "style" => attrs.style = Some(value.to_string()),
302            _ => {} // Ignore unknown attributes
303        }
304
305        remaining = &after_quote[end_quote + 1..];
306    }
307
308    attrs
309}
310
311/// Attempts to match a closing tag at the start of the string.
312///
313/// The input should start after the `</`. Matches both simple tags and `</span>`.
314fn closing_tag_at_start(s: &str) -> Option<(TagType, usize)> {
315    // Check for </span> first
316    if s.len() >= 5 && s[..5].eq_ignore_ascii_case("span>") {
317        return Some((TagType::Span(SpanAttributes::default()), 5));
318    }
319    tag_name_at_start(s)
320}
321
322// ---------------------------------------------------------------------------
323// Internal parser
324// ---------------------------------------------------------------------------
325
326/// Converts a `TagType` and its children into a `TextSpan`.
327fn tag_type_to_span(tag_type: TagType, children: Vec<TextSpan>) -> TextSpan {
328    match tag_type {
329        TagType::Bold => TextSpan::Bold(children),
330        TagType::Italic => TextSpan::Italic(children),
331        TagType::Highlight => TextSpan::Highlight(children),
332        TagType::Comment => TextSpan::Comment(children),
333        TagType::Span(attrs) => TextSpan::Span(attrs, children),
334    }
335}
336
337/// Checks whether a closing tag matches any of the expected closers.
338///
339/// For `Span` tags, only the tag type matters — attributes are not compared
340/// because `</span>` has no attributes.
341fn closers_contain(closers: &[TagType], tag: &TagType) -> bool {
342    closers.iter().any(|c| match (c, tag) {
343        (TagType::Span(_), TagType::Span(_)) => true,
344        (a, b) => a == b,
345    })
346}
347
348/// Maximum nesting depth for inline markup tags.
349///
350/// Tags nested beyond this limit are treated as plain text to prevent
351/// stack overflow on adversarial input.
352const MAX_NESTING_DEPTH: usize = 32;
353
354/// Internal parser state for inline markup.
355struct InlineMarkupParser<'a> {
356    /// The input text being parsed.
357    input: &'a str,
358    /// Current byte position in the input.
359    pos: usize,
360}
361
362impl<'a> InlineMarkupParser<'a> {
363    /// Creates a new parser for the given input.
364    fn new(input: &'a str) -> Self {
365        Self { input, pos: 0 }
366    }
367
368    /// Returns the remaining unparsed input.
369    fn remaining(&self) -> &'a str {
370        &self.input[self.pos..]
371    }
372
373    /// Parses spans until end of input or a closing tag matching one of the
374    /// `expected_closers` is found. Returns the parsed spans.
375    ///
376    /// `expected_closers` is the stack of tag types we're inside. When we find
377    /// a closing tag that matches one of them, we stop and let the caller
378    /// handle it.
379    fn parse_spans(&mut self, expected_closers: &[TagType]) -> Vec<TextSpan> {
380        let mut spans: Vec<TextSpan> = Vec::new();
381        let mut plain_start = self.pos;
382
383        while self.pos < self.input.len() {
384            let remaining = self.remaining();
385
386            if remaining.starts_with('<') {
387                let after_lt = &self.input[self.pos + 1..];
388
389                // Check for closing tag
390                if after_lt.starts_with('/') {
391                    let after_slash = &self.input[self.pos + 2..];
392                    if let Some((tag_type, name_len)) = closing_tag_at_start(after_slash) {
393                        // If this closing tag matches one of our expected closers,
394                        // flush plain text and return
395                        if closers_contain(expected_closers, &tag_type) {
396                            // Flush accumulated plain text
397                            if plain_start < self.pos {
398                                spans.push(TextSpan::Plain(
399                                    self.input[plain_start..self.pos].to_string(),
400                                ));
401                            }
402                            // Consume the closing tag: </ + name_len
403                            self.pos += 2 + name_len;
404                            return spans;
405                        }
406                        // Not our closer — treat as plain text
407                        self.pos += 1;
408                        continue;
409                    }
410                    // Not a recognized closing tag
411                    self.pos += 1;
412                    continue;
413                }
414
415                // Enforce depth limit to prevent stack overflow on adversarial input
416                if expected_closers.len() >= MAX_NESTING_DEPTH {
417                    // Treat `<` as plain text — don't recurse deeper
418                    self.pos += 1;
419                    continue;
420                }
421
422                // Check for <span ...> opening tag (before simple tags, since
423                // "span" is not in the simple-tag list)
424                if let Some((attrs, tag_len)) = span_tag_at_start(after_lt) {
425                    // Flush accumulated plain text before this tag
426                    if plain_start < self.pos {
427                        spans.push(TextSpan::Plain(
428                            self.input[plain_start..self.pos].to_string(),
429                        ));
430                    }
431
432                    // Consume the opening tag: < + tag_len
433                    self.pos += 1 + tag_len;
434
435                    let mut closers = expected_closers.to_vec();
436                    closers.push(TagType::Span(attrs.clone()));
437                    let children = self.parse_spans(&closers);
438                    spans.push(TextSpan::Span(attrs, children));
439
440                    plain_start = self.pos;
441                    continue;
442                }
443
444                // Check for simple opening tag
445                if let Some((tag_type, name_len)) = tag_name_at_start(after_lt) {
446                    // Flush accumulated plain text before this tag
447                    if plain_start < self.pos {
448                        spans.push(TextSpan::Plain(
449                            self.input[plain_start..self.pos].to_string(),
450                        ));
451                    }
452
453                    // Consume the opening tag: < + name_len
454                    self.pos += 1 + name_len;
455
456                    // Parse children, expecting a closing tag for this type.
457                    // Per ChordPro spec, unclosed tags apply to all remaining text,
458                    // so we always wrap whatever children were collected.
459                    let mut closers = expected_closers.to_vec();
460                    closers.push(tag_type.clone());
461                    let children = self.parse_spans(&closers);
462                    let span = tag_type_to_span(tag_type, children);
463                    spans.push(span);
464
465                    plain_start = self.pos;
466                    continue;
467                }
468
469                // Not a recognized tag — treat `<` as plain text
470                self.pos += 1;
471                continue;
472            }
473
474            let ch_len = remaining.chars().next().map_or(1, |c| c.len_utf8());
475            self.pos += ch_len;
476        }
477
478        // Flush remaining plain text
479        if plain_start < self.pos {
480            spans.push(TextSpan::Plain(
481                self.input[plain_start..self.pos].to_string(),
482            ));
483        }
484
485        spans
486    }
487}
488
489/// Merges adjacent `Plain` spans into a single `Plain` span.
490fn normalize_spans(spans: Vec<TextSpan>) -> Vec<TextSpan> {
491    let mut result: Vec<TextSpan> = Vec::new();
492
493    for span in spans {
494        match span {
495            TextSpan::Plain(text) => {
496                if let Some(TextSpan::Plain(prev)) = result.last_mut() {
497                    prev.push_str(&text);
498                } else {
499                    result.push(TextSpan::Plain(text));
500                }
501            }
502            TextSpan::Bold(children) => {
503                result.push(TextSpan::Bold(normalize_spans(children)));
504            }
505            TextSpan::Italic(children) => {
506                result.push(TextSpan::Italic(normalize_spans(children)));
507            }
508            TextSpan::Highlight(children) => {
509                result.push(TextSpan::Highlight(normalize_spans(children)));
510            }
511            TextSpan::Comment(children) => {
512                result.push(TextSpan::Comment(normalize_spans(children)));
513            }
514            TextSpan::Span(attrs, children) => {
515                result.push(TextSpan::Span(attrs, normalize_spans(children)));
516            }
517        }
518    }
519
520    result
521}
522
523// ---------------------------------------------------------------------------
524// Tests
525// ---------------------------------------------------------------------------
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    // -- has_inline_markup --------------------------------------------------
532
533    #[test]
534    fn no_markup_plain_text() {
535        assert!(!has_inline_markup("Hello world"));
536    }
537
538    #[test]
539    fn no_markup_with_angle_bracket() {
540        assert!(!has_inline_markup("x < y"));
541    }
542
543    #[test]
544    fn has_bold_tag() {
545        assert!(has_inline_markup("<b>bold</b>"));
546    }
547
548    #[test]
549    fn has_italic_tag() {
550        assert!(has_inline_markup("<i>italic</i>"));
551    }
552
553    #[test]
554    fn has_long_bold_tag() {
555        assert!(has_inline_markup("<bold>text</bold>"));
556    }
557
558    #[test]
559    fn has_long_italic_tag() {
560        assert!(has_inline_markup("<italic>text</italic>"));
561    }
562
563    #[test]
564    fn has_highlight_tag() {
565        assert!(has_inline_markup("<highlight>text</highlight>"));
566    }
567
568    #[test]
569    fn has_comment_tag() {
570        assert!(has_inline_markup("<comment>text</comment>"));
571    }
572
573    // -- parse_inline_markup: plain text ------------------------------------
574
575    #[test]
576    fn parse_plain_text() {
577        let spans = parse_inline_markup("Hello world");
578        assert_eq!(spans, vec![TextSpan::Plain("Hello world".to_string())]);
579    }
580
581    #[test]
582    fn parse_empty_text() {
583        let spans = parse_inline_markup("");
584        assert_eq!(spans, Vec::<TextSpan>::new());
585    }
586
587    // -- parse_inline_markup: simple tags -----------------------------------
588
589    #[test]
590    fn parse_bold_short() {
591        let spans = parse_inline_markup("<b>bold</b>");
592        assert_eq!(
593            spans,
594            vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]
595        );
596    }
597
598    #[test]
599    fn parse_bold_long() {
600        let spans = parse_inline_markup("<bold>bold</bold>");
601        assert_eq!(
602            spans,
603            vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]
604        );
605    }
606
607    #[test]
608    fn parse_italic_short() {
609        let spans = parse_inline_markup("<i>italic</i>");
610        assert_eq!(
611            spans,
612            vec![TextSpan::Italic(vec![TextSpan::Plain(
613                "italic".to_string()
614            )])]
615        );
616    }
617
618    #[test]
619    fn parse_italic_long() {
620        let spans = parse_inline_markup("<italic>italic</italic>");
621        assert_eq!(
622            spans,
623            vec![TextSpan::Italic(vec![TextSpan::Plain(
624                "italic".to_string()
625            )])]
626        );
627    }
628
629    #[test]
630    fn parse_highlight() {
631        let spans = parse_inline_markup("<highlight>highlighted</highlight>");
632        assert_eq!(
633            spans,
634            vec![TextSpan::Highlight(vec![TextSpan::Plain(
635                "highlighted".to_string()
636            )])]
637        );
638    }
639
640    #[test]
641    fn parse_comment() {
642        let spans = parse_inline_markup("<comment>commented</comment>");
643        assert_eq!(
644            spans,
645            vec![TextSpan::Comment(vec![TextSpan::Plain(
646                "commented".to_string()
647            )])]
648        );
649    }
650
651    // -- parse_inline_markup: mixed content ---------------------------------
652
653    #[test]
654    fn parse_text_before_and_after_tag() {
655        let spans = parse_inline_markup("Hello <b>world</b> foo");
656        assert_eq!(
657            spans,
658            vec![
659                TextSpan::Plain("Hello ".to_string()),
660                TextSpan::Bold(vec![TextSpan::Plain("world".to_string())]),
661                TextSpan::Plain(" foo".to_string()),
662            ]
663        );
664    }
665
666    #[test]
667    fn parse_multiple_tags() {
668        let spans = parse_inline_markup("<b>bold</b> and <i>italic</i>");
669        assert_eq!(
670            spans,
671            vec![
672                TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())]),
673                TextSpan::Plain(" and ".to_string()),
674                TextSpan::Italic(vec![TextSpan::Plain("italic".to_string())]),
675            ]
676        );
677    }
678
679    // -- parse_inline_markup: nested tags -----------------------------------
680
681    #[test]
682    fn parse_nested_bold_italic() {
683        let spans = parse_inline_markup("<b><i>both</i></b>");
684        assert_eq!(
685            spans,
686            vec![TextSpan::Bold(vec![TextSpan::Italic(vec![
687                TextSpan::Plain("both".to_string())
688            ])])]
689        );
690    }
691
692    #[test]
693    fn parse_nested_with_surrounding_text() {
694        let spans = parse_inline_markup("<b>bold <i>and italic</i> text</b>");
695        assert_eq!(
696            spans,
697            vec![TextSpan::Bold(vec![
698                TextSpan::Plain("bold ".to_string()),
699                TextSpan::Italic(vec![TextSpan::Plain("and italic".to_string())]),
700                TextSpan::Plain(" text".to_string()),
701            ])]
702        );
703    }
704
705    // -- parse_inline_markup: case insensitive ------------------------------
706
707    #[test]
708    fn parse_case_insensitive_tags() {
709        let spans = parse_inline_markup("<B>bold</B>");
710        assert_eq!(
711            spans,
712            vec![TextSpan::Bold(vec![TextSpan::Plain("bold".to_string())])]
713        );
714    }
715
716    #[test]
717    fn parse_mixed_case_tags() {
718        let spans = parse_inline_markup("<Bold>text</Bold>");
719        assert_eq!(
720            spans,
721            vec![TextSpan::Bold(vec![TextSpan::Plain("text".to_string())])]
722        );
723    }
724
725    // -- parse_inline_markup: graceful degradation --------------------------
726
727    #[test]
728    fn unclosed_tag_wraps_remaining_text() {
729        // Per ChordPro spec, unclosed tags apply to all remaining text.
730        let spans = parse_inline_markup("<b>unclosed");
731        assert_eq!(
732            spans,
733            vec![TextSpan::Bold(vec![TextSpan::Plain(
734                "unclosed".to_string()
735            )])]
736        );
737    }
738
739    #[test]
740    fn depth_limit_prevents_stack_overflow() {
741        // Deeply nested tags beyond MAX_NESTING_DEPTH are treated as plain text.
742        let open_tags: String = "<b>".repeat(MAX_NESTING_DEPTH + 1);
743        let close_tags: String = "</b>".repeat(MAX_NESTING_DEPTH + 1);
744        let input = format!("{}text{}", open_tags, close_tags);
745        // Must not panic/overflow; just verify it returns something reasonable.
746        let spans = parse_inline_markup(&input);
747        assert!(!spans.is_empty());
748    }
749
750    #[test]
751    fn unrecognized_tag_treated_as_plain() {
752        let spans = parse_inline_markup("<unknown>text</unknown>");
753        assert_eq!(
754            spans,
755            vec![TextSpan::Plain("<unknown>text</unknown>".to_string())]
756        );
757    }
758
759    #[test]
760    fn lone_angle_bracket_is_plain() {
761        let spans = parse_inline_markup("x < y");
762        assert_eq!(spans, vec![TextSpan::Plain("x < y".to_string())]);
763    }
764
765    #[test]
766    fn stray_closing_tag_is_plain() {
767        let spans = parse_inline_markup("text </b> more");
768        assert_eq!(spans, vec![TextSpan::Plain("text </b> more".to_string())]);
769    }
770
771    // -- has_inline_markup: span tags -----------------------------------------
772
773    #[test]
774    fn has_span_tag_no_attrs() {
775        assert!(has_inline_markup("<span>text</span>"));
776    }
777
778    #[test]
779    fn has_span_tag_with_attrs() {
780        assert!(has_inline_markup(r#"<span foreground="red">text</span>"#));
781    }
782
783    #[test]
784    fn has_span_closing_tag_only() {
785        assert!(has_inline_markup("text </span> more"));
786    }
787
788    // -- parse_inline_markup: span tags ---------------------------------------
789
790    #[test]
791    fn parse_span_no_attrs() {
792        let spans = parse_inline_markup("<span>styled</span>");
793        assert_eq!(
794            spans,
795            vec![TextSpan::Span(
796                SpanAttributes::default(),
797                vec![TextSpan::Plain("styled".to_string())]
798            )]
799        );
800    }
801
802    #[test]
803    fn parse_span_single_attr() {
804        let spans = parse_inline_markup(r#"<span foreground="red">text</span>"#);
805        assert_eq!(
806            spans,
807            vec![TextSpan::Span(
808                SpanAttributes {
809                    foreground: Some("red".to_string()),
810                    ..Default::default()
811                },
812                vec![TextSpan::Plain("text".to_string())]
813            )]
814        );
815    }
816
817    #[test]
818    fn parse_span_multiple_attrs() {
819        let spans = parse_inline_markup(
820            r#"<span font_family="Serif" size="12" foreground="blue" background="yellow" weight="bold" style="italic">text</span>"#,
821        );
822        assert_eq!(
823            spans,
824            vec![TextSpan::Span(
825                SpanAttributes {
826                    font_family: Some("Serif".to_string()),
827                    size: Some("12".to_string()),
828                    foreground: Some("blue".to_string()),
829                    background: Some("yellow".to_string()),
830                    weight: Some("bold".to_string()),
831                    style: Some("italic".to_string()),
832                },
833                vec![TextSpan::Plain("text".to_string())]
834            )]
835        );
836    }
837
838    #[test]
839    fn parse_span_single_quoted_attrs() {
840        let spans = parse_inline_markup("<span foreground='green'>text</span>");
841        assert_eq!(
842            spans,
843            vec![TextSpan::Span(
844                SpanAttributes {
845                    foreground: Some("green".to_string()),
846                    ..Default::default()
847                },
848                vec![TextSpan::Plain("text".to_string())]
849            )]
850        );
851    }
852
853    #[test]
854    fn parse_span_color_alias() {
855        let spans = parse_inline_markup(r#"<span color="red">text</span>"#);
856        assert_eq!(
857            spans,
858            vec![TextSpan::Span(
859                SpanAttributes {
860                    foreground: Some("red".to_string()),
861                    ..Default::default()
862                },
863                vec![TextSpan::Plain("text".to_string())]
864            )]
865        );
866    }
867
868    #[test]
869    fn parse_span_case_insensitive() {
870        let spans = parse_inline_markup(r#"<SPAN Foreground="red">text</SPAN>"#);
871        assert_eq!(
872            spans,
873            vec![TextSpan::Span(
874                SpanAttributes {
875                    foreground: Some("red".to_string()),
876                    ..Default::default()
877                },
878                vec![TextSpan::Plain("text".to_string())]
879            )]
880        );
881    }
882
883    #[test]
884    fn parse_span_nested_inside_bold() {
885        let spans = parse_inline_markup(r#"<b><span foreground="red">text</span></b>"#);
886        assert_eq!(
887            spans,
888            vec![TextSpan::Bold(vec![TextSpan::Span(
889                SpanAttributes {
890                    foreground: Some("red".to_string()),
891                    ..Default::default()
892                },
893                vec![TextSpan::Plain("text".to_string())]
894            )])]
895        );
896    }
897
898    #[test]
899    fn parse_bold_nested_inside_span() {
900        let spans = parse_inline_markup(r#"<span foreground="red"><b>text</b></span>"#);
901        assert_eq!(
902            spans,
903            vec![TextSpan::Span(
904                SpanAttributes {
905                    foreground: Some("red".to_string()),
906                    ..Default::default()
907                },
908                vec![TextSpan::Bold(vec![TextSpan::Plain("text".to_string())])]
909            )]
910        );
911    }
912
913    #[test]
914    fn parse_span_with_surrounding_text() {
915        let spans = parse_inline_markup(r#"Hello <span foreground="red">world</span> foo"#);
916        assert_eq!(
917            spans,
918            vec![
919                TextSpan::Plain("Hello ".to_string()),
920                TextSpan::Span(
921                    SpanAttributes {
922                        foreground: Some("red".to_string()),
923                        ..Default::default()
924                    },
925                    vec![TextSpan::Plain("world".to_string())]
926                ),
927                TextSpan::Plain(" foo".to_string()),
928            ]
929        );
930    }
931
932    #[test]
933    fn parse_span_unclosed_wraps_remaining() {
934        let spans = parse_inline_markup(r#"<span foreground="red">unclosed"#);
935        assert_eq!(
936            spans,
937            vec![TextSpan::Span(
938                SpanAttributes {
939                    foreground: Some("red".to_string()),
940                    ..Default::default()
941                },
942                vec![TextSpan::Plain("unclosed".to_string())]
943            )]
944        );
945    }
946
947    #[test]
948    fn parse_span_unknown_attrs_ignored() {
949        let spans = parse_inline_markup(r#"<span unknown="val" foreground="red">text</span>"#);
950        assert_eq!(
951            spans,
952            vec![TextSpan::Span(
953                SpanAttributes {
954                    foreground: Some("red".to_string()),
955                    ..Default::default()
956                },
957                vec![TextSpan::Plain("text".to_string())]
958            )]
959        );
960    }
961
962    // -- spans_to_plain_text ------------------------------------------------
963
964    #[test]
965    fn plain_text_extraction_simple() {
966        let spans = vec![TextSpan::Plain("hello".to_string())];
967        assert_eq!(spans_to_plain_text(&spans), "hello");
968    }
969
970    #[test]
971    fn plain_text_extraction_with_markup() {
972        let spans = vec![
973            TextSpan::Plain("Hello ".to_string()),
974            TextSpan::Bold(vec![TextSpan::Plain("world".to_string())]),
975        ];
976        assert_eq!(spans_to_plain_text(&spans), "Hello world");
977    }
978
979    #[test]
980    fn plain_text_extraction_nested() {
981        let spans = vec![TextSpan::Bold(vec![TextSpan::Italic(vec![
982            TextSpan::Plain("nested".to_string()),
983        ])])];
984        assert_eq!(spans_to_plain_text(&spans), "nested");
985    }
986
987    #[test]
988    fn plain_text_extraction_span() {
989        let spans = vec![TextSpan::Span(
990            SpanAttributes {
991                foreground: Some("red".to_string()),
992                ..Default::default()
993            },
994            vec![TextSpan::Plain("colored".to_string())],
995        )];
996        assert_eq!(spans_to_plain_text(&spans), "colored");
997    }
998}