Skip to main content

zenith_core/markdown/
inline.rs

1//! Pure, deterministic inline-markdown → [`TextSpan`] parser.
2//!
3//! This converts an INLINE markdown string (emphasis within a single text
4//! block) into a `Vec<TextSpan>`, setting the per-span marks Zenith already
5//! supports. It is **inline only** — there is no block-level structure here
6//! (no headings, lists, tables, or paragraph splitting). It is intended to be
7//! invoked when a text node opts into `data-format="markdown"`; this module is
8//! just the parser.
9//!
10//! # Supported syntax
11//!
12//! | Markdown                  | Span mark set                                   |
13//! |---------------------------|-------------------------------------------------|
14//! | `**bold**` / `__bold__`   | `font_weight = Literal("700")` (resolves to 700)|
15//! | `*italic*` / `_italic_`   | `italic = Some(true)`                           |
16//! | `~~strike~~`              | `strikethrough = Some(true)`                    |
17//! | `++underline++`           | `underline = Some(true)`                        |
18//! | `==highlight==`           | `highlight = Literal("#fff59d")` (marker yellow)|
19//! | `` `code` ``              | `code = Some(true)` (RAW: no inner parsing)     |
20//! | `[label](url)`            | span(s) with `link = Some(url)`; label parsed   |
21//!
22//! # Rules
23//!
24//! - Plain text between marks becomes plain spans (no marks set).
25//! - Backslash escapes (`\*`, `\_`, `\~`, `\=`, `\+`, `` \` ``, `\[`, `\]`,
26//!   `\\`) emit the literal character, not a delimiter.
27//! - Marks may nest when they cleanly close in LIFO order, e.g.
28//!   `**_bold italic_**` → one span with bold weight + italic.
29//! - A code span (`` ` ``) is a RAW context: no other marks and no escapes are
30//!   parsed inside it; its text is verbatim.
31//! - In `[label](url)`, the `label` is parsed for inline marks (all carrying
32//!   the link); the `url` is taken verbatim. A `[` with no matching `](...)`
33//!   is literal text.
34//! - Flanking rule: a delimiter run only OPENS emphasis when immediately
35//!   followed by a non-whitespace char, and only CLOSES when immediately
36//!   preceded by a non-whitespace char (start/end of input count as
37//!   whitespace). So `a * b` and `a ** b` are literal `*` / `**`.
38//! - Unmatched / dangling delimiters degrade to literal text AT THEIR ORIGINAL
39//!   POSITION. The function is infallible: malformed markdown never errors and
40//!   never drops or reorders input — concatenating the span texts reproduces the
41//!   input minus exactly the characters consumed as MATCHED delimiters or escape
42//!   backslashes.
43//! - Adjacent runs with identical mark sets are coalesced into one span.
44//! - Fully deterministic: same input → same `Vec<TextSpan>`.
45
46use crate::ast::node::TextSpan;
47use crate::ast::value::PropertyValue;
48
49/// Font-weight literal for `**bold**` text. Stored as a bare numeric literal so
50/// the scene `resolve_font_weight` resolver parses it directly to `700` without
51/// requiring a token. (See `zenith-scene` `compile/text/shape.rs`.)
52const BOLD_WEIGHT: &str = "700";
53
54/// Default highlight color for `==highlight==` (markdown highlight carries no
55/// color). A conventional marker yellow; stored as a raw sRGB hex literal that
56/// the scene `resolve_property_color` resolver parses directly via `parse_color`
57/// / `parse_srgb_hex`. (See `zenith-scene` `compile/paint.rs`.)
58const HIGHLIGHT_DEFAULT: &str = "#fff59d";
59
60/// The set of inline marks active at a point in the scan. Pure value type so
61/// it can be cheaply cloned/compared while descending and ascending delimiters.
62#[derive(Debug, Clone, Default, PartialEq, Eq)]
63struct MarkSet {
64    bold: bool,
65    italic: bool,
66    underline: bool,
67    strikethrough: bool,
68    highlight: bool,
69    code: bool,
70}
71
72impl MarkSet {
73    /// Build a [`TextSpan`] carrying `text` styled by this mark set plus an
74    /// optional `link`.
75    fn span(&self, text: String, link: Option<String>) -> TextSpan {
76        TextSpan {
77            text,
78            fill: None,
79            font_weight: if self.bold {
80                Some(PropertyValue::Literal(BOLD_WEIGHT.to_owned()))
81            } else {
82                None
83            },
84            italic: if self.italic { Some(true) } else { None },
85            underline: if self.underline { Some(true) } else { None },
86            strikethrough: if self.strikethrough { Some(true) } else { None },
87            vertical_align: None,
88            footnote_ref: None,
89            data_ref: None,
90            data_format: None,
91            highlight: if self.highlight {
92                Some(PropertyValue::Literal(HIGHLIGHT_DEFAULT.to_owned()))
93            } else {
94                None
95            },
96            code: if self.code { Some(true) } else { None },
97            link,
98        }
99    }
100}
101
102/// Which delimiter a given marker run corresponds to. Used to track the open
103/// delimiter stack so a closing run pops the matching mark (LIFO nesting).
104#[derive(Debug, Clone, Copy, PartialEq, Eq)]
105enum Delim {
106    Bold,          // ** or __
107    Italic,        // * or _
108    Strikethrough, // ~~
109    Underline,     // ++
110    Highlight,     // ==
111}
112
113/// A token produced by the first (lexing) pass over the input.
114///
115/// Delimiter markers are emitted as-is and only RESOLVED into mark
116/// open/close (or demoted to literal text) in the second pass, so an unmatched
117/// delimiter can be re-surfaced as literal text AT ITS ORIGINAL POSITION.
118#[derive(Debug, Clone)]
119enum Token {
120    /// A literal text fragment (escapes already decoded to their literal char).
121    Text(String),
122    /// A code span's verbatim contents (no inner parsing).
123    Code(String),
124    /// A resolved link: the label spans, each already carrying the link url.
125    Link(Vec<TextSpan>),
126    /// A delimiter marker run. `literal` is the exact source glyphs (`"**"`,
127    /// `"_"`, …) so it can be demoted to literal text if it never pairs. `can_open`
128    /// / `can_close` are the flanking flags computed at lex time. `role` is set by
129    /// [`resolve_markers`]: an unpaired marker becomes [`Token::Text`] instead.
130    Marker {
131        delim: Delim,
132        literal: String,
133        can_open: bool,
134        can_close: bool,
135        role: MarkerRole,
136    },
137}
138
139/// The pairing decision for a [`Token::Marker`], set by [`resolve_markers`].
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141enum MarkerRole {
142    /// Not yet resolved (lex-time default). Any marker left as `Unresolved`
143    /// after [`resolve_markers`] is demoted to literal text.
144    Unresolved,
145    /// A matched opener: the build pass pushes `delim` onto the mark stack.
146    Open,
147    /// A matched closer: the build pass pops `delim` off the mark stack.
148    Close,
149}
150
151/// Parse an inline-markdown string into styled [`TextSpan`]s.
152///
153/// Infallible: malformed markdown degrades to literal text (it never errors and
154/// never drops input). See the module docs for the supported syntax and rules.
155pub fn parse_inline_markdown(input: &str) -> Vec<TextSpan> {
156    let chars: Vec<char> = input.chars().collect();
157    let link: Option<String> = None;
158    let mut out: Vec<TextSpan> = Vec::new();
159    parse_run(&chars, link, &mut out);
160    out
161}
162
163/// Parse a character slice as a styled run, appending coalesced spans to `out`.
164/// All spans produced carry `link` (the active hyperlink, if any).
165///
166/// Three passes: lex into [`Token`]s, resolve which delimiter markers pair up
167/// (the rest are demoted to literal text in place), then build spans by walking
168/// the resolved tokens with a live mark stack.
169fn parse_run(chars: &[char], link: Option<String>, out: &mut Vec<TextSpan>) {
170    let mut tokens = lex(chars);
171    resolve_markers(&mut tokens);
172    build_spans(&tokens, link, out);
173}
174
175/// First pass: turn the character slice into a flat list of [`Token`]s. Escapes
176/// are decoded here, code spans and links are fully consumed, and delimiter runs
177/// become [`Token::Marker`]s carrying their flanking flags. Nothing is matched or
178/// demoted yet.
179fn lex(chars: &[char]) -> Vec<Token> {
180    let mut tokens: Vec<Token> = Vec::new();
181    let mut buf = String::new();
182    let mut i: usize = 0;
183
184    while i < chars.len() {
185        let Some(&c) = chars.get(i) else { break };
186
187        // --- Backslash escape: the next char is literal. ---
188        if c == '\\' {
189            match chars.get(i + 1) {
190                Some(&next) if is_escapable(next) => {
191                    buf.push(next);
192                    i += 2;
193                    continue;
194                }
195                _ => {
196                    buf.push('\\');
197                    i += 1;
198                    continue;
199                }
200            }
201        }
202
203        // --- Code span: raw, verbatim, no inner parsing. ---
204        if c == '`' {
205            if let Some(end) = find_code_close(chars, i + 1) {
206                flush_text(&mut buf, &mut tokens);
207                let raw: String = chars.get(i + 1..end).unwrap_or(&[]).iter().collect();
208                tokens.push(Token::Code(raw));
209                i = end + 1;
210                continue;
211            }
212            buf.push('`');
213            i += 1;
214            continue;
215        }
216
217        // --- Link: [label](url) ---
218        if c == '[' {
219            if let Some((label, url, next)) = try_parse_link(chars, i) {
220                flush_text(&mut buf, &mut tokens);
221                let label_chars: Vec<char> = label.chars().collect();
222                let mut label_spans: Vec<TextSpan> = Vec::new();
223                parse_run(&label_chars, Some(url), &mut label_spans);
224                tokens.push(Token::Link(label_spans));
225                i = next;
226                continue;
227            }
228            buf.push('[');
229            i += 1;
230            continue;
231        }
232
233        // --- Two-character delimiters: ** __ ~~ ++ == ---
234        if let Some((delim, lit)) = match_two_char(chars, i) {
235            flush_text(&mut buf, &mut tokens);
236            let (can_open, can_close) = flanking(chars, i, 2);
237            tokens.push(Token::Marker {
238                delim,
239                literal: lit,
240                can_open,
241                can_close,
242                role: MarkerRole::Unresolved,
243            });
244            i += 2;
245            continue;
246        }
247
248        // --- One-character emphasis: * or _ ---
249        if c == '*' || c == '_' {
250            flush_text(&mut buf, &mut tokens);
251            let (can_open, can_close) = flanking(chars, i, 1);
252            tokens.push(Token::Marker {
253                delim: Delim::Italic,
254                literal: c.to_string(),
255                can_open,
256                can_close,
257                role: MarkerRole::Unresolved,
258            });
259            i += 1;
260            continue;
261        }
262
263        // --- Ordinary character. ---
264        buf.push(c);
265        i += 1;
266    }
267    flush_text(&mut buf, &mut tokens);
268    tokens
269}
270
271/// Flush the pending literal-text buffer into a [`Token::Text`] (if non-empty).
272fn flush_text(buf: &mut String, tokens: &mut Vec<Token>) {
273    if !buf.is_empty() {
274        tokens.push(Token::Text(std::mem::take(buf)));
275    }
276}
277
278/// Compute the `(can_open, can_close)` flanking flags for a delimiter run of
279/// `width` chars starting at `i`. A run can OPEN only when immediately followed
280/// by a non-whitespace char, and can CLOSE only when immediately preceded by a
281/// non-whitespace char. (End-of-input / start-of-input count as whitespace.)
282fn flanking(chars: &[char], i: usize, width: usize) -> (bool, bool) {
283    let before = if i == 0 {
284        None
285    } else {
286        chars.get(i - 1).copied()
287    };
288    let after = chars.get(i + width).copied();
289    let followed_by_nonspace = matches!(after, Some(ch) if !ch.is_whitespace());
290    let preceded_by_nonspace = matches!(before, Some(ch) if !ch.is_whitespace());
291    (followed_by_nonspace, preceded_by_nonspace)
292}
293
294/// Second pass: decide which delimiter markers pair into open/close and which are
295/// demoted to literal text. A marker that never pairs is rewritten to a
296/// [`Token::Text`] of its own literal glyphs, IN PLACE — so no character moves or
297/// vanishes. Matched pairs are left as `Marker`s for the build pass to act on.
298///
299/// Matching is greedy + LIFO: scanning left to right, a marker that `can_close`
300/// is matched against the nearest still-open same-delim marker that `can_open`.
301fn resolve_markers(tokens: &mut [Token]) {
302    // Indices of open candidate markers, as a single stack preserving source
303    // order so closing honors strict LIFO nesting across all delimiter kinds.
304    let mut open_stack: Vec<usize> = Vec::new();
305
306    for idx in 0..tokens.len() {
307        let (delim, can_open, can_close) = match tokens.get(idx) {
308            Some(Token::Marker {
309                delim,
310                can_open,
311                can_close,
312                ..
313            }) => (*delim, *can_open, *can_close),
314            _ => continue,
315        };
316
317        // Try to CLOSE against the nearest matching open on the stack (LIFO).
318        if can_close
319            && let Some(stack_pos) = open_stack.iter().rposition(
320                |&oi| matches!(tokens.get(oi), Some(Token::Marker { delim: d, .. }) if *d == delim),
321            )
322            && let Some(&open_idx) = open_stack.get(stack_pos)
323        {
324            // Strict LIFO: any opens sitting ABOVE the matched one are now
325            // crossed/unreachable. Drop them from the candidate set so they
326            // fall through to literal-text demotion (no character is lost).
327            open_stack.truncate(stack_pos);
328            set_role(tokens, open_idx, MarkerRole::Open);
329            set_role(tokens, idx, MarkerRole::Close);
330            continue;
331        }
332
333        // Otherwise, if it can open, push as a candidate.
334        if can_open {
335            open_stack.push(idx);
336        }
337        // A marker that can neither close (here) nor open stays `Unresolved` and
338        // is demoted to literal text below.
339    }
340
341    // Demote every still-unresolved marker to literal text IN PLACE.
342    for idx in 0..tokens.len() {
343        if let Some(Token::Marker {
344            literal,
345            role: MarkerRole::Unresolved,
346            ..
347        }) = tokens.get(idx)
348        {
349            let lit = literal.clone();
350            if let Some(slot) = tokens.get_mut(idx) {
351                *slot = Token::Text(lit);
352            }
353        }
354    }
355}
356
357/// Set the resolved [`MarkerRole`] of the marker token at `idx` (no-op if the
358/// token at `idx` is not a marker).
359fn set_role(tokens: &mut [Token], idx: usize, new_role: MarkerRole) {
360    if let Some(Token::Marker { role, .. }) = tokens.get_mut(idx) {
361        *role = new_role;
362    }
363}
364
365/// Third pass: walk the resolved tokens with a live mark stack, emitting spans.
366/// A matched `Marker` toggles its mark (push on first sight = open, pop on second
367/// = close); `Text` / `Code` / `Link` tokens emit styled content.
368fn build_spans(tokens: &[Token], link: Option<String>, out: &mut Vec<TextSpan>) {
369    let mut sink = SpanSink::new(link);
370    let mut stack: Vec<Delim> = Vec::new();
371
372    for tok in tokens {
373        match tok {
374            Token::Text(t) => {
375                for ch in t.chars() {
376                    sink.push_char(&stack, ch);
377                }
378            }
379            Token::Code(raw) => {
380                let mut marks = sink.marks_from_stack(&stack);
381                marks.code = true;
382                sink.push_span(marks.span(raw.clone(), sink.link.clone()));
383            }
384            Token::Link(spans) => {
385                for s in spans {
386                    sink.push_span(s.clone());
387                }
388            }
389            Token::Marker { delim, role, .. } => match role {
390                // `resolve_markers` paired these as clean LIFO open/close, so an
391                // `Open` always pushes and a `Close` always pops its partner
392                // (the topmost entry, which is the matching delim).
393                MarkerRole::Open => stack.push(*delim),
394                MarkerRole::Close => {
395                    stack.pop();
396                }
397                // Unresolved markers were rewritten to `Text` already; this arm
398                // is unreachable in practice but kept exhaustive (no `_`).
399                MarkerRole::Unresolved => {}
400            },
401        }
402    }
403
404    sink.finish(out);
405}
406
407/// Accumulates characters into spans, coalescing adjacent runs that share an
408/// identical mark set and link.
409struct SpanSink {
410    link: Option<String>,
411    spans: Vec<TextSpan>,
412    /// The mark set the current pending buffer is being styled with.
413    pending_marks: MarkSet,
414    pending_text: String,
415    have_pending: bool,
416}
417
418impl SpanSink {
419    fn new(link: Option<String>) -> Self {
420        SpanSink {
421            link,
422            spans: Vec::new(),
423            pending_marks: MarkSet::default(),
424            pending_text: String::new(),
425            have_pending: false,
426        }
427    }
428
429    /// Derive the active mark set from the open delimiter stack.
430    fn marks_from_stack(&self, stack: &[Delim]) -> MarkSet {
431        let mut m = MarkSet::default();
432        for delim in stack {
433            match delim {
434                Delim::Bold => m.bold = true,
435                Delim::Italic => m.italic = true,
436                Delim::Strikethrough => m.strikethrough = true,
437                Delim::Underline => m.underline = true,
438                Delim::Highlight => m.highlight = true,
439            }
440        }
441        m
442    }
443
444    /// Push one character, styled by the marks currently active on `stack`.
445    fn push_char(&mut self, stack: &[Delim], c: char) {
446        let marks = self.marks_from_stack(stack);
447        if self.have_pending && marks == self.pending_marks {
448            self.pending_text.push(c);
449        } else {
450            self.flush_pending();
451            self.pending_marks = marks;
452            self.pending_text.push(c);
453            self.have_pending = true;
454        }
455    }
456
457    /// Push a fully-formed span (used for code spans and link sub-spans), first
458    /// flushing any pending buffered text. Empty-text spans are dropped (an
459    /// empty `` `` `` code span or `[](u)` link has no glyphs to render).
460    fn push_span(&mut self, span: TextSpan) {
461        if span.text.is_empty() {
462            return;
463        }
464        self.flush_pending();
465        if let Some(last) = self.spans.last_mut()
466            && spans_mergeable(last, &span)
467        {
468            last.text.push_str(&span.text);
469            return;
470        }
471        self.spans.push(span);
472    }
473
474    /// Flush the pending buffered text into a span (coalescing if possible).
475    fn flush_pending(&mut self) {
476        if !self.have_pending {
477            return;
478        }
479        let text = std::mem::take(&mut self.pending_text);
480        let marks = std::mem::take(&mut self.pending_marks);
481        self.have_pending = false;
482        if text.is_empty() {
483            return;
484        }
485        let span = marks.span(text, self.link.clone());
486        if let Some(last) = self.spans.last_mut()
487            && spans_mergeable(last, &span)
488        {
489            last.text.push_str(&span.text);
490            return;
491        }
492        self.spans.push(span);
493    }
494
495    /// Finalize: flush pending text and append spans to `out`.
496    fn finish(mut self, out: &mut Vec<TextSpan>) {
497        self.flush_pending();
498        out.append(&mut self.spans);
499    }
500}
501
502/// Two spans may be merged when every styling field and the link are identical.
503fn spans_mergeable(a: &TextSpan, b: &TextSpan) -> bool {
504    a.fill == b.fill
505        && a.font_weight == b.font_weight
506        && a.italic == b.italic
507        && a.underline == b.underline
508        && a.strikethrough == b.strikethrough
509        && a.vertical_align == b.vertical_align
510        && a.footnote_ref == b.footnote_ref
511        && a.data_ref == b.data_ref
512        && a.data_format == b.data_format
513        && a.highlight == b.highlight
514        && a.code == b.code
515        && a.link == b.link
516}
517
518/// Whether `c` is a character a backslash may escape into a literal.
519fn is_escapable(c: char) -> bool {
520    matches!(c, '*' | '_' | '~' | '=' | '+' | '`' | '[' | ']' | '\\')
521}
522
523/// Match a two-character delimiter starting at `i`. Returns the delimiter and
524/// its literal text, or `None`.
525fn match_two_char(chars: &[char], i: usize) -> Option<(Delim, String)> {
526    let a = *chars.get(i)?;
527    let b = *chars.get(i + 1)?;
528    let delim = match (a, b) {
529        ('*', '*') | ('_', '_') => Delim::Bold,
530        ('~', '~') => Delim::Strikethrough,
531        ('+', '+') => Delim::Underline,
532        ('=', '=') => Delim::Highlight,
533        _ => return None,
534    };
535    Some((delim, format!("{a}{b}")))
536}
537
538/// Find the index of the closing backtick for a code span that opened just
539/// before `start`. Returns the index of the closing `` ` ``, or `None` if there
540/// is no closing backtick.
541fn find_code_close(chars: &[char], start: usize) -> Option<usize> {
542    let mut j = start;
543    while j < chars.len() {
544        if chars.get(j) == Some(&'`') {
545            return Some(j);
546        }
547        j += 1;
548    }
549    None
550}
551
552/// Attempt to parse a `[label](url)` link beginning at `open` (which must be the
553/// `[`). Returns `(label, url, next_index)` on success, where `next_index` is
554/// the index just past the closing `)`. The label scan respects backslash
555/// escapes for `]` so an escaped bracket does not close the label.
556fn try_parse_link(chars: &[char], open: usize) -> Option<(String, String, usize)> {
557    if chars.get(open) != Some(&'[') {
558        return None;
559    }
560    // Scan label until an unescaped ']'.
561    let mut j = open + 1;
562    let mut label: Vec<char> = Vec::new();
563    let mut closed_label: Option<usize> = None;
564    while j < chars.len() {
565        match chars.get(j) {
566            Some(&'\\') => {
567                // Preserve the escape sequence verbatim into the label so the
568                // recursive label parse re-handles it.
569                if let Some(&next) = chars.get(j + 1) {
570                    label.push('\\');
571                    label.push(next);
572                    j += 2;
573                    continue;
574                }
575                label.push('\\');
576                j += 1;
577            }
578            Some(&']') => {
579                closed_label = Some(j);
580                break;
581            }
582            Some(&ch) => {
583                label.push(ch);
584                j += 1;
585            }
586            None => break,
587        }
588    }
589    let label_end = closed_label?;
590    // Immediately after ']' must come '('.
591    let paren_open = label_end + 1;
592    if chars.get(paren_open) != Some(&'(') {
593        return None;
594    }
595    // Scan url verbatim until the matching ')'. No nested parens handling
596    // (basic markdown); the first ')' closes.
597    let mut k = paren_open + 1;
598    let mut url: Vec<char> = Vec::new();
599    let mut closed_url: Option<usize> = None;
600    while k < chars.len() {
601        match chars.get(k) {
602            Some(&')') => {
603                closed_url = Some(k);
604                break;
605            }
606            Some(&ch) => {
607                url.push(ch);
608                k += 1;
609            }
610            None => break,
611        }
612    }
613    let url_end = closed_url?;
614    Some((
615        label.into_iter().collect(),
616        url.into_iter().collect(),
617        url_end + 1,
618    ))
619}
620
621#[cfg(test)]
622mod tests {
623    use super::*;
624
625    fn texts(spans: &[TextSpan]) -> String {
626        spans.iter().map(|s| s.text.as_str()).collect()
627    }
628
629    fn bold() -> Option<PropertyValue> {
630        Some(PropertyValue::Literal(BOLD_WEIGHT.to_owned()))
631    }
632    fn hl() -> Option<PropertyValue> {
633        Some(PropertyValue::Literal(HIGHLIGHT_DEFAULT.to_owned()))
634    }
635
636    #[test]
637    fn empty_input_yields_no_spans() {
638        assert!(parse_inline_markdown("").is_empty());
639    }
640
641    #[test]
642    fn plain_text_single_span() {
643        let s = parse_inline_markdown("hello world");
644        assert_eq!(s.len(), 1);
645        assert_eq!(s[0].text, "hello world");
646        assert_eq!(s[0].font_weight, None);
647        assert_eq!(s[0].italic, None);
648    }
649
650    #[test]
651    fn bold_star_and_underscore() {
652        for src in ["**bold**", "__bold__"] {
653            let s = parse_inline_markdown(src);
654            assert_eq!(s.len(), 1, "src={src}");
655            assert_eq!(s[0].text, "bold");
656            assert_eq!(s[0].font_weight, bold());
657        }
658    }
659
660    #[test]
661    fn italic_star_and_underscore() {
662        for src in ["*it*", "_it_"] {
663            let s = parse_inline_markdown(src);
664            assert_eq!(s.len(), 1, "src={src}");
665            assert_eq!(s[0].text, "it");
666            assert_eq!(s[0].italic, Some(true));
667            assert_eq!(s[0].font_weight, None);
668        }
669    }
670
671    #[test]
672    fn strikethrough() {
673        let s = parse_inline_markdown("~~gone~~");
674        assert_eq!(s.len(), 1);
675        assert_eq!(s[0].text, "gone");
676        assert_eq!(s[0].strikethrough, Some(true));
677    }
678
679    #[test]
680    fn underline() {
681        let s = parse_inline_markdown("++under++");
682        assert_eq!(s.len(), 1);
683        assert_eq!(s[0].text, "under");
684        assert_eq!(s[0].underline, Some(true));
685    }
686
687    #[test]
688    fn highlight_uses_default_color() {
689        let s = parse_inline_markdown("==mark==");
690        assert_eq!(s.len(), 1);
691        assert_eq!(s[0].text, "mark");
692        assert_eq!(s[0].highlight, hl());
693    }
694
695    #[test]
696    fn code_span_basic() {
697        let s = parse_inline_markdown("`fn main()`");
698        assert_eq!(s.len(), 1);
699        assert_eq!(s[0].text, "fn main()");
700        assert_eq!(s[0].code, Some(true));
701    }
702
703    #[test]
704    fn code_span_is_verbatim_no_inner_parsing() {
705        let s = parse_inline_markdown("`**not bold** \\n _x_`");
706        assert_eq!(s.len(), 1);
707        // Backticks content is raw: delimiters and backslash are literal.
708        assert_eq!(s[0].text, "**not bold** \\n _x_");
709        assert_eq!(s[0].code, Some(true));
710        assert_eq!(s[0].font_weight, None);
711        assert_eq!(s[0].italic, None);
712    }
713
714    #[test]
715    fn nested_bold_italic_single_span() {
716        // **_bold italic_** → one span with bold + italic.
717        let s = parse_inline_markdown("**_bold italic_**");
718        assert_eq!(s.len(), 1);
719        assert_eq!(s[0].text, "bold italic");
720        assert_eq!(s[0].font_weight, bold());
721        assert_eq!(s[0].italic, Some(true));
722    }
723
724    #[test]
725    fn nested_highlight_bold() {
726        // ==**important**== → highlight + bold.
727        let s = parse_inline_markdown("==**important**==");
728        assert_eq!(s.len(), 1);
729        assert_eq!(s[0].text, "important");
730        assert_eq!(s[0].highlight, hl());
731        assert_eq!(s[0].font_weight, bold());
732    }
733
734    #[test]
735    fn partial_nesting_splits_spans() {
736        // a **b _c_ d** e
737        let s = parse_inline_markdown("a **b _c_ d** e");
738        assert_eq!(texts(&s), "a b c d e");
739        // "a " plain, "b " bold, "c" bold+italic, " d" bold, " e" plain.
740        let joined: Vec<(&str, bool, bool)> = s
741            .iter()
742            .map(|x| {
743                (
744                    x.text.as_str(),
745                    x.font_weight.is_some(),
746                    x.italic == Some(true),
747                )
748            })
749            .collect();
750        assert_eq!(
751            joined,
752            vec![
753                ("a ", false, false),
754                ("b ", true, false),
755                ("c", true, true),
756                (" d", true, false),
757                (" e", false, false),
758            ]
759        );
760    }
761
762    #[test]
763    fn escapes_emit_literals() {
764        let s = parse_inline_markdown(r##"\*not italic\* \_ \~ \= \+ \` \[ \] \\"##);
765        assert_eq!(s.len(), 1);
766        assert_eq!(s[0].text, r##"*not italic* _ ~ = + ` [ ] \"##);
767        assert_eq!(s[0].italic, None);
768        assert_eq!(s[0].font_weight, None);
769    }
770
771    #[test]
772    fn backslash_before_normal_char_is_literal() {
773        let s = parse_inline_markdown(r##"a\b"##);
774        assert_eq!(texts(&s), r##"a\b"##);
775    }
776
777    #[test]
778    fn link_plain_label() {
779        let s = parse_inline_markdown("[Zenith](https://example.com)");
780        assert_eq!(s.len(), 1);
781        assert_eq!(s[0].text, "Zenith");
782        assert_eq!(s[0].link.as_deref(), Some("https://example.com"));
783    }
784
785    #[test]
786    fn link_label_with_inner_marks() {
787        let s = parse_inline_markdown("[**bold** link](u)");
788        assert_eq!(texts(&s), "bold link");
789        for sp in &s {
790            assert_eq!(sp.link.as_deref(), Some("u"));
791        }
792        assert_eq!(s[0].text, "bold");
793        assert_eq!(s[0].font_weight, bold());
794        assert_eq!(s[1].text, " link");
795        assert_eq!(s[1].font_weight, None);
796    }
797
798    #[test]
799    fn link_url_is_verbatim() {
800        // Markdown inside the url is NOT parsed.
801        let s = parse_inline_markdown("[x](http://a/**b**)");
802        assert_eq!(s.len(), 1);
803        assert_eq!(s[0].link.as_deref(), Some("http://a/**b**"));
804    }
805
806    #[test]
807    fn bracket_without_link_is_literal() {
808        let s = parse_inline_markdown("[just text]");
809        assert_eq!(texts(&s), "[just text]");
810        assert!(s.iter().all(|sp| sp.link.is_none()));
811    }
812
813    #[test]
814    fn bracket_with_label_but_no_paren_is_literal() {
815        let s = parse_inline_markdown("[label] (noturl)");
816        assert_eq!(texts(&s), "[label] (noturl)");
817        assert!(s.iter().all(|sp| sp.link.is_none()));
818    }
819
820    #[test]
821    fn dangling_bold_is_literal() {
822        let s = parse_inline_markdown("**oops");
823        assert_eq!(texts(&s), "**oops");
824        assert!(s.iter().all(|sp| sp.font_weight.is_none()));
825    }
826
827    #[test]
828    fn lone_star_is_literal() {
829        let s = parse_inline_markdown("a * b");
830        assert_eq!(texts(&s), "a * b");
831        assert!(s.iter().all(|sp| sp.italic.is_none()));
832    }
833
834    #[test]
835    fn unmatched_closing_underscore_is_literal() {
836        let s = parse_inline_markdown("end_");
837        assert_eq!(texts(&s), "end_");
838        assert!(s.iter().all(|sp| sp.italic.is_none()));
839    }
840
841    #[test]
842    fn whitespace_flanked_double_delim_is_literal_in_place() {
843        // `a ** b` — the `**` is surrounded by spaces so it can neither open nor
844        // close; it must appear as literal text AT ITS ORIGINAL POSITION.
845        let s = parse_inline_markdown("a ** b");
846        assert_eq!(texts(&s), "a ** b");
847        assert!(s.iter().all(|sp| sp.font_weight.is_none()));
848    }
849
850    #[test]
851    fn dangling_opener_emits_literal_in_original_position() {
852        // The unmatched `*` sits BETWEEN "x " and " y" and must stay there, not
853        // move to the end. (Regression: earlier design re-emitted at the tail.)
854        let s = parse_inline_markdown("x *unclosed");
855        assert_eq!(texts(&s), "x *unclosed");
856        assert!(s.iter().all(|sp| sp.italic.is_none()));
857        // And the literal `*` is immediately followed by "unclosed".
858        let joined = texts(&s);
859        let star = joined.find('*').expect("literal star present");
860        assert!(joined[star + 1..].starts_with("unclosed"));
861    }
862
863    #[test]
864    fn opener_needs_following_nonspace() {
865        // `* a*` — first `*` is followed by a space → cannot open → literal.
866        let s = parse_inline_markdown("* a*");
867        assert_eq!(texts(&s), "* a*");
868        assert!(s.iter().all(|sp| sp.italic.is_none()));
869    }
870
871    #[test]
872    fn closer_needs_preceding_nonspace() {
873        // `*a *` — closing `*` is preceded by a space → cannot close → both
874        // delimiters fall through to literal text.
875        let s = parse_inline_markdown("*a *");
876        assert_eq!(texts(&s), "*a *");
877        assert!(s.iter().all(|sp| sp.italic.is_none()));
878    }
879
880    #[test]
881    fn same_delim_nested_pairs_keep_marks() {
882        // `**a **b** c**` — both bold pairs resolve; every word stays bold and no
883        // character is lost.
884        let s = parse_inline_markdown("**a **b** c**");
885        assert_eq!(texts(&s), "a b c");
886        assert!(s.iter().all(|sp| sp.font_weight == bold()));
887    }
888
889    #[test]
890    fn no_character_loss_consumes_only_delimiters() {
891        // Concatenated span text equals input minus the markdown delimiters.
892        let s = parse_inline_markdown("**a** _b_ ~~c~~ ++d++ ==e==");
893        assert_eq!(texts(&s), "a b c d e");
894    }
895
896    #[test]
897    fn no_character_loss_with_escapes() {
898        // Escapes consume the backslash; everything else preserved.
899        let s = parse_inline_markdown(r##"x \* y"##);
900        assert_eq!(texts(&s), "x * y");
901    }
902
903    #[test]
904    fn determinism_parse_twice_equal() {
905        let src = "a **b _c_** ~~d~~ `e` [f](g) ==h== \\* ++i++";
906        let a = parse_inline_markdown(src);
907        let b = parse_inline_markdown(src);
908        assert_eq!(a, b);
909    }
910
911    #[test]
912    fn combined_all_marks() {
913        let s = parse_inline_markdown("==++~~**_x_**~~++==");
914        assert_eq!(s.len(), 1);
915        assert_eq!(s[0].text, "x");
916        assert_eq!(s[0].highlight, hl());
917        assert_eq!(s[0].underline, Some(true));
918        assert_eq!(s[0].strikethrough, Some(true));
919        assert_eq!(s[0].font_weight, bold());
920        assert_eq!(s[0].italic, Some(true));
921    }
922
923    #[test]
924    fn code_inside_text_run() {
925        let s = parse_inline_markdown("use `cargo build` now");
926        assert_eq!(texts(&s), "use cargo build now");
927        assert_eq!(s[0].text, "use ");
928        assert_eq!(s[0].code, None);
929        assert_eq!(s[1].text, "cargo build");
930        assert_eq!(s[1].code, Some(true));
931        assert_eq!(s[2].text, " now");
932    }
933
934    #[test]
935    fn unclosed_code_is_literal_backtick() {
936        let s = parse_inline_markdown("a `b c");
937        assert_eq!(texts(&s), "a `b c");
938        assert!(s.iter().all(|sp| sp.code.is_none()));
939    }
940
941    #[test]
942    fn adjacent_same_marks_coalesce() {
943        // "**a****b**" → bold a then bold b; adjacent identical marks coalesce.
944        let s = parse_inline_markdown("**a****b**");
945        assert_eq!(s.len(), 1);
946        assert_eq!(s[0].text, "ab");
947        assert_eq!(s[0].font_weight, bold());
948    }
949}