Skip to main content

rustdown_md/
parse.rs

1#![forbid(unsafe_code)]
2//! Markdown parsing: converts source text into a flat list of render blocks.
3
4use std::rc::Rc;
5
6use pulldown_cmark::{Event, HeadingLevel, Options, Parser, Tag, TagEnd};
7
8/// A single renderable block produced by parsing.
9///
10/// Large variants (`Table`) are boxed to keep the enum compact (~56 bytes
11/// instead of ~72), improving cache locality during block iteration.
12///
13/// Immutable string fields use `Box<str>` to avoid the 8-byte capacity
14/// overhead of `String` — these values are never modified after construction.
15#[derive(Clone, Debug)]
16pub enum Block {
17    Heading {
18        level: u8,
19        text: StyledText,
20    },
21    Paragraph(StyledText),
22    Code {
23        /// Language tag from fenced code blocks (e.g. "rust", "python").
24        language: Box<str>,
25        code: Box<str>,
26    },
27    Quote(Vec<Self>),
28    UnorderedList(Vec<ListItem>),
29    OrderedList {
30        start: u64,
31        items: Vec<ListItem>,
32    },
33    ThematicBreak,
34    Table(Box<TableData>),
35    Image {
36        url: Box<str>,
37        alt: Box<str>,
38    },
39}
40
41/// Table block data, boxed inside `Block::Table` to keep enum size down.
42#[derive(Clone, Debug)]
43pub struct TableData {
44    pub header: Vec<StyledText>,
45    pub alignments: Vec<Alignment>,
46    pub rows: Vec<Vec<StyledText>>,
47}
48
49/// Alignment for table columns.
50#[derive(Clone, Copy, Debug, PartialEq, Eq)]
51pub enum Alignment {
52    None,
53    Left,
54    Center,
55    Right,
56}
57
58/// A single list item (may contain nested blocks).
59#[derive(Clone, Debug)]
60pub struct ListItem {
61    pub content: StyledText,
62    pub children: Vec<Block>,
63    /// Task-list checkbox state: `Some(true)` = checked, `Some(false)` = unchecked, `None` = normal item.
64    pub checked: Option<bool>,
65}
66
67/// Styled text: a string with inline formatting spans.
68#[derive(Clone, Debug)]
69pub struct StyledText {
70    pub text: String,
71    pub spans: Vec<Span>,
72    /// Deduplicated link URLs referenced by `SpanStyle::link_idx`.
73    pub links: Vec<Rc<str>>,
74    /// Cached character count (avoids repeated O(n) UTF-8 scans for non-ASCII text).
75    pub char_count: u32,
76    /// Whether any span has a link (avoids linear scan in render path).
77    pub has_links: bool,
78    /// Whether the accumulated text is entirely ASCII.
79    pub is_ascii: bool,
80}
81
82/// Inline formatting flags that can be combined (e.g., bold + italic).
83#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
84pub struct SpanStyle {
85    /// Bitfield: bit 0 = strong, 1 = emphasis, 2 = strikethrough, 3 = code.
86    flags: u8,
87    /// Index into `StyledText::links`, or `NO_LINK` if no link.
88    pub(crate) link_idx: u8,
89}
90
91const FLAG_STRONG: u8 = 1;
92const FLAG_EMPHASIS: u8 = 2;
93const FLAG_STRIKETHROUGH: u8 = 4;
94const FLAG_CODE: u8 = 8;
95const NO_LINK: u8 = u8::MAX;
96
97impl SpanStyle {
98    #[cfg(test)]
99    #[must_use]
100    pub const fn plain() -> Self {
101        Self {
102            flags: 0,
103            link_idx: NO_LINK,
104        }
105    }
106
107    #[inline]
108    #[must_use]
109    pub const fn has_link(self) -> bool {
110        self.link_idx != NO_LINK
111    }
112
113    #[inline]
114    #[must_use]
115    pub const fn strong(self) -> bool {
116        self.flags & FLAG_STRONG != 0
117    }
118
119    #[cfg(test)]
120    pub const fn set_strong(&mut self) {
121        self.flags |= FLAG_STRONG;
122    }
123
124    #[inline]
125    #[must_use]
126    pub const fn emphasis(self) -> bool {
127        self.flags & FLAG_EMPHASIS != 0
128    }
129
130    #[cfg(test)]
131    pub const fn set_emphasis(&mut self) {
132        self.flags |= FLAG_EMPHASIS;
133    }
134
135    #[inline]
136    #[must_use]
137    pub const fn strikethrough(self) -> bool {
138        self.flags & FLAG_STRIKETHROUGH != 0
139    }
140
141    #[inline]
142    #[must_use]
143    pub const fn code(self) -> bool {
144        self.flags & FLAG_CODE != 0
145    }
146
147    #[cfg(test)]
148    pub const fn set_code(&mut self) {
149        self.flags |= FLAG_CODE;
150    }
151}
152
153/// An inline formatting span within a `StyledText`.
154///
155/// Uses `u32` offsets and a 2-byte `SpanStyle` (flags + link index) to keep
156/// the struct at 12 bytes, improving cache locality during block iteration.
157#[derive(Clone, Debug)]
158pub struct Span {
159    pub start: u32,
160    pub end: u32,
161    pub style: SpanStyle,
162}
163
164impl StyledText {
165    /// Pre-allocate both text and spans storage.
166    fn with_capacity(text_cap: usize, span_cap: usize) -> Self {
167        Self {
168            text: String::with_capacity(text_cap),
169            spans: Vec::with_capacity(span_cap),
170            ..Self::default()
171        }
172    }
173
174    #[inline]
175    #[allow(clippy::cast_possible_truncation)] // Saturates at u32::MAX
176    fn push_text(&mut self, s: &str, style: SpanStyle) {
177        let start = u32::try_from(self.text.len()).unwrap_or(u32::MAX);
178        self.text.push_str(s);
179        let end = u32::try_from(self.text.len()).unwrap_or(u32::MAX);
180        let fragment_is_ascii = s.is_ascii();
181        let char_count = if fragment_is_ascii {
182            s.len()
183        } else {
184            s.chars().count()
185        };
186        self.char_count = self.char_count.saturating_add(char_count as u32);
187        self.is_ascii &= fragment_is_ascii;
188        if style.has_link() {
189            self.has_links = true;
190        }
191        if start < end {
192            // Merge adjacent spans of the same style.
193            if let Some(last) = self.spans.last_mut()
194                && last.end == start
195                && last.style == style
196            {
197                last.end = end;
198                return;
199            }
200            self.spans.push(Span { start, end, style });
201        }
202    }
203
204    /// Look up a link URL by index, returning `None` for `NO_LINK`.
205    #[inline]
206    #[must_use]
207    pub fn link_url(&self, link_idx: u8) -> Option<&Rc<str>> {
208        if link_idx == NO_LINK {
209            None
210        } else {
211            self.links.get(link_idx as usize)
212        }
213    }
214
215    /// Return the displayed character count, reusing the parser-populated cache
216    /// when available and falling back safely for manually-constructed values.
217    #[inline]
218    #[must_use]
219    pub(crate) fn char_len(&self) -> usize {
220        if self.text.is_empty() {
221            0
222        } else if self.char_count > 0 {
223            self.char_count as usize
224        } else if self.text.is_ascii() {
225            self.text.len()
226        } else {
227            self.text.chars().count()
228        }
229    }
230
231    /// Intern a link URL, reusing an existing entry if the same URL is already stored.
232    #[inline]
233    fn intern_link(&mut self, url: Rc<str>) -> u8 {
234        if let Some((last, rest)) = self.links.split_last() {
235            let idx = rest.len();
236            if Rc::ptr_eq(last, &url) || **last == *url {
237                return idx as u8;
238            }
239        }
240        for (i, existing) in self
241            .links
242            .iter()
243            .enumerate()
244            .take(self.links.len().saturating_sub(1))
245        {
246            if Rc::ptr_eq(existing, &url) || **existing == *url {
247                return i as u8;
248            }
249        }
250        let idx = self.links.len();
251        if idx >= NO_LINK as usize {
252            return NO_LINK; // saturate — won't create hyperlink but won't crash
253        }
254        self.links.push(url);
255        idx as u8
256    }
257}
258
259impl Default for StyledText {
260    fn default() -> Self {
261        Self {
262            text: String::new(),
263            spans: Vec::new(),
264            links: Vec::new(),
265            char_count: 0,
266            has_links: false,
267            is_ascii: true,
268        }
269    }
270}
271
272/// Parse markdown source into blocks (convenience wrapper for tests).
273#[cfg(test)]
274pub fn parse_markdown(source: &str) -> Vec<Block> {
275    let mut blocks = Vec::new();
276    parse_markdown_into(source, &mut blocks);
277    blocks
278}
279
280/// Maximum source size (in bytes) that the parser will process.
281///
282/// Documents above this limit are silently truncated at the last newline
283/// boundary within the limit, preventing denial-of-service via enormous
284/// inputs. 64 MiB is generous for any realistic markdown document.
285const MAX_PARSE_BYTES: usize = 64 * 1024 * 1024;
286
287/// Parse markdown source, appending blocks to an existing `Vec`.
288/// Reuses the existing allocation when possible.
289///
290/// Sources larger than [`MAX_PARSE_BYTES`] are truncated at the last
291/// newline within the limit to prevent unbounded memory allocation.
292pub fn parse_markdown_into(source: &str, blocks: &mut Vec<Block>) {
293    let source = if source.len() > MAX_PARSE_BYTES {
294        // Truncate at the last newline within the limit for clean output.
295        match source[..MAX_PARSE_BYTES].rfind('\n') {
296            Some(pos) => &source[..pos],
297            None => &source[..MAX_PARSE_BYTES],
298        }
299    } else {
300        source
301    };
302    let opts = Options::ENABLE_STRIKETHROUGH
303        | Options::ENABLE_TABLES
304        | Options::ENABLE_HEADING_ATTRIBUTES
305        | Options::ENABLE_TASKLISTS
306        | Options::ENABLE_SMART_PUNCTUATION
307        | Options::ENABLE_GFM;
308    let parser = Parser::new_ext(source, opts);
309    // Collect into Vec — required for our indexed recursive descent.
310    // Pre-allocate based on source size heuristic.
311    let events: Vec<Event<'_>> = {
312        let capacity = source.len() / 20 + 16;
313        let mut v = Vec::with_capacity(capacity);
314        v.extend(parser);
315        v
316    };
317    blocks.reserve(events.len() / 4 + 4);
318    let mut fmt = InlineState::new();
319    let mut pos = 0;
320    while pos < events.len() {
321        parse_block(&events, &mut pos, blocks, &mut fmt);
322    }
323}
324
325/// Lightweight scan: sum byte lengths of text/code events until `end_tag`.
326fn estimate_text_capacity(events: &[Event<'_>], end_tag: TagEnd) -> usize {
327    let mut cap = 0;
328    for ev in events {
329        match ev {
330            Event::End(tag) if *tag == end_tag => break,
331            Event::Text(t) | Event::Code(t) => cap += t.len(),
332            Event::SoftBreak | Event::HardBreak => cap += 1,
333            _ => {}
334        }
335    }
336    cap.max(16)
337}
338
339/// Collect alt text from inline events following a `Start(Image)`.
340///
341/// Advances `pos` past the `End(Image)` event.
342fn collect_image_alt(events: &[Event<'_>], pos: &mut usize) -> String {
343    let mut alt = String::with_capacity(64);
344    while *pos < events.len() {
345        match &events[*pos] {
346            Event::End(TagEnd::Image) => {
347                *pos += 1;
348                break;
349            }
350            Event::Text(t) => {
351                alt.push_str(t);
352                *pos += 1;
353            }
354            Event::Code(c) => {
355                alt.push_str(c);
356                *pos += 1;
357            }
358            Event::SoftBreak | Event::HardBreak => {
359                alt.push(' ');
360                *pos += 1;
361            }
362            _ => *pos += 1,
363        }
364    }
365    alt
366}
367
368fn parse_block(
369    events: &[Event<'_>],
370    pos: &mut usize,
371    blocks: &mut Vec<Block>,
372    fmt: &mut InlineState,
373) {
374    match &events[*pos] {
375        Event::Start(Tag::Heading { level, .. }) => {
376            let level = *level;
377            parse_heading(events, pos, level, blocks, fmt);
378        }
379        Event::Start(Tag::Paragraph) => parse_paragraph(events, pos, blocks, fmt),
380        Event::Start(Tag::CodeBlock(kind)) => {
381            let lang: Box<str> = match kind {
382                pulldown_cmark::CodeBlockKind::Fenced(l) if !l.is_empty() => l.as_ref().into(),
383                _ => Box::from(""),
384            };
385            parse_code_block(events, pos, lang, blocks);
386        }
387        Event::Start(Tag::BlockQuote(_)) => parse_blockquote(events, pos, blocks, fmt),
388        Event::Start(Tag::List(start)) => {
389            let start = *start;
390            parse_list(events, pos, start, blocks, fmt);
391        }
392        Event::Start(Tag::Table(aligns)) => {
393            let aligns = aligns.clone();
394            parse_table(events, pos, &aligns, blocks, fmt);
395        }
396        Event::Start(Tag::Image { dest_url, .. }) => {
397            let url: Box<str> = dest_url.as_ref().into();
398            *pos += 1;
399            let alt = collect_image_alt(events, pos);
400            blocks.push(Block::Image {
401                url,
402                alt: alt.into_boxed_str(),
403            });
404        }
405        Event::Rule => {
406            blocks.push(Block::ThematicBreak);
407            *pos += 1;
408        }
409        // Skip events not handled at block level (e.g. stray End tags,
410        // FootnoteDefinition, metadata blocks).  Consuming 1 event advances
411        // the cursor past the unknown token.
412        _ => *pos += 1,
413    }
414}
415
416fn parse_heading(
417    events: &[Event<'_>],
418    pos: &mut usize,
419    level: HeadingLevel,
420    blocks: &mut Vec<Block>,
421    fmt: &mut InlineState,
422) {
423    let lvl = heading_level_to_u8(level);
424    let mut styled = StyledText::with_capacity(64, 4);
425    *pos += 1;
426    fmt.clear();
427    while *pos < events.len() {
428        match &events[*pos] {
429            Event::End(TagEnd::Heading(_)) => {
430                *pos += 1;
431                break;
432            }
433            ev => {
434                consume_inline(ev, &mut styled, fmt);
435                *pos += 1;
436            }
437        }
438    }
439    blocks.push(Block::Heading {
440        level: lvl,
441        text: styled,
442    });
443}
444
445fn parse_paragraph(
446    events: &[Event<'_>],
447    pos: &mut usize,
448    blocks: &mut Vec<Block>,
449    fmt: &mut InlineState,
450) {
451    // Check if this paragraph is a standalone image (the only inline content
452    // inside the paragraph is a single Image tag). If so, emit Block::Image
453    // instead of a paragraph containing alt text.
454    if try_parse_standalone_image(events, pos, blocks) {
455        return;
456    }
457
458    let text_cap = estimate_text_capacity(&events[*pos + 1..], TagEnd::Paragraph);
459    let mut styled = StyledText::with_capacity(text_cap, text_cap / 20 + 2);
460    *pos += 1;
461    fmt.clear();
462    while *pos < events.len() {
463        match &events[*pos] {
464            Event::End(TagEnd::Paragraph) => {
465                *pos += 1;
466                break;
467            }
468            ev => {
469                consume_inline(ev, &mut styled, fmt);
470                *pos += 1;
471            }
472        }
473    }
474    blocks.push(Block::Paragraph(styled));
475}
476
477/// If the paragraph's *only* child is a single `Image` tag, emit
478/// `Block::Image` and advance `pos` past the closing `End(Paragraph)`.
479/// Returns `true` if consumed, `false` if the caller should parse normally.
480fn try_parse_standalone_image(
481    events: &[Event<'_>],
482    pos: &mut usize,
483    blocks: &mut Vec<Block>,
484) -> bool {
485    // events[*pos] is Start(Paragraph). Expect:
486    //   [pos+0] Start(Paragraph)
487    //   [pos+1] Start(Image { dest_url, .. })
488    //   ... inline text events (alt text) ...
489    //   [k] End(Image)
490    //   [k+1] End(Paragraph)
491    let start = *pos;
492    if events.len() - start < 4 {
493        return false;
494    }
495    let dest_url: Box<str> = match &events[start + 1] {
496        Event::Start(Tag::Image { dest_url, .. }) => dest_url.as_ref().into(),
497        _ => return false,
498    };
499
500    // Reuse shared alt-text collector starting after Start(Image).
501    let mut scan = start + 2;
502    let alt = collect_image_alt(events, &mut scan);
503
504    // The very next event must be End(Paragraph).
505    if scan >= events.len() || !matches!(&events[scan], Event::End(TagEnd::Paragraph)) {
506        return false;
507    }
508
509    blocks.push(Block::Image {
510        url: dest_url,
511        alt: alt.into_boxed_str(),
512    });
513    *pos = scan + 1; // +1 to consume End(Paragraph)
514    true
515}
516
517fn parse_code_block(
518    events: &[Event<'_>],
519    pos: &mut usize,
520    language: Box<str>,
521    blocks: &mut Vec<Block>,
522) {
523    let mut code = String::with_capacity(256);
524    *pos += 1;
525    while *pos < events.len() {
526        match &events[*pos] {
527            Event::End(TagEnd::CodeBlock) => {
528                *pos += 1;
529                break;
530            }
531            Event::Text(t) => {
532                code.push_str(t);
533                *pos += 1;
534            }
535            _ => *pos += 1,
536        }
537    }
538    blocks.push(Block::Code {
539        language,
540        code: code.into_boxed_str(),
541    });
542}
543
544fn parse_blockquote(
545    events: &[Event<'_>],
546    pos: &mut usize,
547    blocks: &mut Vec<Block>,
548    fmt: &mut InlineState,
549) {
550    let mut inner = Vec::with_capacity(4);
551    *pos += 1;
552    while *pos < events.len() {
553        if let Event::End(TagEnd::BlockQuote(_)) = &events[*pos] {
554            *pos += 1;
555            break;
556        }
557        parse_block(events, pos, &mut inner, fmt);
558    }
559    blocks.push(Block::Quote(inner));
560}
561
562fn parse_list(
563    events: &[Event<'_>],
564    pos: &mut usize,
565    start: Option<u64>,
566    blocks: &mut Vec<Block>,
567    fmt: &mut InlineState,
568) {
569    let mut items = Vec::with_capacity(8);
570    *pos += 1;
571    while *pos < events.len() {
572        match &events[*pos] {
573            Event::End(TagEnd::List(_)) => {
574                *pos += 1;
575                break;
576            }
577            Event::Start(Tag::Item) => {
578                *pos += 1;
579                let mut item_text = StyledText::with_capacity(128, 4);
580                let mut children = Vec::new();
581                fmt.clear();
582                let mut checked: Option<bool> = None;
583                // Track whether the first paragraph has been fully consumed.
584                // In loose lists, pulldown-cmark wraps each item's text in
585                // Paragraph start/end events; subsequent paragraphs become
586                // child blocks.
587                let mut first_para_done = false;
588                // Collect inline text for a secondary paragraph inside the
589                // item, to be flushed as `Block::Paragraph` into `children`.
590                let mut extra_para: Option<StyledText> = None;
591                while *pos < events.len() {
592                    match &events[*pos] {
593                        Event::End(TagEnd::Item) => {
594                            // Flush any trailing extra paragraph.
595                            if let Some(ep) = extra_para.take()
596                                && !ep.text.is_empty()
597                            {
598                                children.push(Block::Paragraph(ep));
599                            }
600                            *pos += 1;
601                            break;
602                        }
603                        Event::Start(Tag::Paragraph) => {
604                            *pos += 1;
605                            if first_para_done {
606                                // Start collecting a new paragraph into
607                                // `extra_para`; it will be flushed on
608                                // `End(Paragraph)` or `End(Item)`.
609                                extra_para = Some(StyledText::with_capacity(128, 4));
610                                fmt.clear();
611                            }
612                        }
613                        Event::End(TagEnd::Paragraph) => {
614                            *pos += 1;
615                            if let Some(ep) = extra_para.take()
616                                && !ep.text.is_empty()
617                            {
618                                children.push(Block::Paragraph(ep));
619                            }
620                            first_para_done = true;
621                        }
622                        // Block-level children: delegate to `parse_block`.
623                        Event::Start(
624                            Tag::List(_)
625                            | Tag::CodeBlock(_)
626                            | Tag::BlockQuote(_)
627                            | Tag::Heading { .. }
628                            | Tag::Table(_)
629                            | Tag::HtmlBlock,
630                        ) => {
631                            // Flush any in-progress extra paragraph first.
632                            if let Some(ep) = extra_para.take()
633                                && !ep.text.is_empty()
634                            {
635                                children.push(Block::Paragraph(ep));
636                            }
637                            parse_block(events, pos, &mut children, fmt);
638                        }
639                        Event::Rule => {
640                            if let Some(ep) = extra_para.take()
641                                && !ep.text.is_empty()
642                            {
643                                children.push(Block::Paragraph(ep));
644                            }
645                            children.push(Block::ThematicBreak);
646                            *pos += 1;
647                        }
648                        Event::TaskListMarker(is_checked) => {
649                            checked = Some(*is_checked);
650                            *pos += 1;
651                        }
652                        ev => {
653                            // Inline content: route to current paragraph
654                            // target (extra_para if active, else item_text).
655                            if let Some(ref mut ep) = extra_para {
656                                consume_inline(ev, ep, fmt);
657                            } else {
658                                consume_inline(ev, &mut item_text, fmt);
659                            }
660                            *pos += 1;
661                        }
662                    }
663                }
664                items.push(ListItem {
665                    content: item_text,
666                    children,
667                    checked,
668                });
669            }
670            _ => *pos += 1,
671        }
672    }
673    if let Some(s) = start {
674        blocks.push(Block::OrderedList { start: s, items });
675    } else {
676        blocks.push(Block::UnorderedList(items));
677    }
678}
679
680fn parse_table(
681    events: &[Event<'_>],
682    pos: &mut usize,
683    aligns: &[pulldown_cmark::Alignment],
684    blocks: &mut Vec<Block>,
685    fmt: &mut InlineState,
686) {
687    let alignments: Vec<Alignment> = aligns
688        .iter()
689        .map(|a| match a {
690            pulldown_cmark::Alignment::None => Alignment::None,
691            pulldown_cmark::Alignment::Left => Alignment::Left,
692            pulldown_cmark::Alignment::Center => Alignment::Center,
693            pulldown_cmark::Alignment::Right => Alignment::Right,
694        })
695        .collect();
696
697    let num_cols = aligns.len();
698    let mut header = Vec::with_capacity(num_cols);
699    let mut rows: Vec<Vec<StyledText>> = Vec::with_capacity(16);
700    let mut in_head = false;
701    let mut current_row: Vec<StyledText> = Vec::with_capacity(num_cols);
702    let mut current_cell = StyledText::with_capacity(32, 2);
703    fmt.clear();
704    *pos += 1;
705
706    while *pos < events.len() {
707        match &events[*pos] {
708            Event::End(TagEnd::Table) => {
709                *pos += 1;
710                break;
711            }
712            Event::Start(Tag::TableHead) => {
713                in_head = true;
714                *pos += 1;
715            }
716            Event::End(TagEnd::TableHead) => {
717                in_head = false;
718                header = std::mem::replace(&mut current_row, Vec::with_capacity(num_cols));
719                *pos += 1;
720            }
721            Event::Start(Tag::TableRow) => {
722                current_row.clear();
723                *pos += 1;
724            }
725            Event::End(TagEnd::TableRow) => {
726                if in_head {
727                    current_row.clear();
728                } else {
729                    rows.push(std::mem::replace(
730                        &mut current_row,
731                        Vec::with_capacity(num_cols),
732                    ));
733                }
734                *pos += 1;
735            }
736            Event::Start(Tag::TableCell) => {
737                current_cell = StyledText::with_capacity(32, 2);
738                fmt.clear();
739                *pos += 1;
740            }
741            Event::End(TagEnd::TableCell) => {
742                current_row.push(std::mem::take(&mut current_cell));
743                *pos += 1;
744            }
745            ev => {
746                consume_inline(ev, &mut current_cell, fmt);
747                *pos += 1;
748            }
749        }
750    }
751
752    blocks.push(Block::Table(Box::new(TableData {
753        header,
754        alignments,
755        rows,
756    })));
757}
758
759/// Formatting flag for the inline stack.
760#[derive(Clone, Debug, PartialEq, Eq)]
761enum InlineFlag {
762    Strong,
763    Emphasis,
764    Strikethrough,
765    Link(Rc<str>),
766}
767
768#[derive(Clone, Debug)]
769struct ActiveLink {
770    url: Rc<str>,
771    idx: u8,
772}
773
774/// Maintains the inline formatting stack with counter-based state updates.
775///
776/// Balanced markdown closes inline tags in stack order, so the hot path is a
777/// plain `Vec::pop()` plus counter updates. We keep a fallback search for
778/// malformed or unexpected nesting so parsing remains resilient.
779struct InlineState {
780    stack: Vec<InlineFlag>,
781    /// Per-flag reference counts — avoids O(n) rebuild on pop.
782    strong_count: u8,
783    emphasis_count: u8,
784    strikethrough_count: u8,
785    /// Active links in nesting order. The cached `idx` avoids repeatedly
786    /// interning the same URL for every text fragment inside one link span.
787    link_stack: Vec<ActiveLink>,
788}
789
790impl InlineState {
791    fn new() -> Self {
792        Self {
793            stack: Vec::with_capacity(4),
794            strong_count: 0,
795            emphasis_count: 0,
796            strikethrough_count: 0,
797            link_stack: Vec::new(),
798        }
799    }
800
801    fn clear(&mut self) {
802        self.stack.clear();
803        self.link_stack.clear();
804        self.strong_count = 0;
805        self.emphasis_count = 0;
806        self.strikethrough_count = 0;
807    }
808
809    /// Compute the flags bitfield from counters — O(1).
810    #[inline]
811    const fn flags(&self) -> u8 {
812        let mut f = 0u8;
813        if self.strong_count > 0 {
814            f |= FLAG_STRONG;
815        }
816        if self.emphasis_count > 0 {
817            f |= FLAG_EMPHASIS;
818        }
819        if self.strikethrough_count > 0 {
820            f |= FLAG_STRIKETHROUGH;
821        }
822        f
823    }
824
825    #[inline]
826    fn push(&mut self, flag: InlineFlag) {
827        match &flag {
828            InlineFlag::Strong => self.strong_count += 1,
829            InlineFlag::Emphasis => self.emphasis_count += 1,
830            InlineFlag::Strikethrough => self.strikethrough_count += 1,
831            InlineFlag::Link(url) => {
832                self.link_stack.push(ActiveLink {
833                    url: Rc::clone(url),
834                    idx: NO_LINK,
835                });
836            }
837        }
838        self.stack.push(flag);
839    }
840
841    #[inline]
842    fn pop(&mut self, flag: &InlineFlag) {
843        if self.stack.last().is_some_and(|last| last == flag) {
844            if let Some(removed) = self.stack.pop() {
845                self.decrement(&removed);
846            }
847            return;
848        }
849        if let Some(pos) = self.stack.iter().rposition(|k| k == flag) {
850            let removed = self.stack.swap_remove(pos);
851            self.decrement(&removed);
852        }
853    }
854
855    #[inline]
856    fn pop_link(&mut self) {
857        if matches!(self.stack.last(), Some(InlineFlag::Link(_))) {
858            self.stack.pop();
859        } else if let Some(pos) = self
860            .stack
861            .iter()
862            .rposition(|k| matches!(k, InlineFlag::Link(_)))
863        {
864            self.stack.swap_remove(pos);
865        }
866        self.link_stack.pop();
867    }
868
869    #[inline]
870    fn current_link_idx(&mut self, styled: &mut StyledText) -> u8 {
871        let Some(link) = self.link_stack.last_mut() else {
872            return NO_LINK;
873        };
874        if link.idx == NO_LINK {
875            link.idx = styled.intern_link(Rc::clone(&link.url));
876        }
877        link.idx
878    }
879
880    /// Decrement the counter for a removed flag.
881    #[inline]
882    fn decrement(&mut self, flag: &InlineFlag) {
883        match flag {
884            InlineFlag::Strong => self.strong_count = self.strong_count.saturating_sub(1),
885            InlineFlag::Emphasis => self.emphasis_count = self.emphasis_count.saturating_sub(1),
886            InlineFlag::Strikethrough => {
887                self.strikethrough_count = self.strikethrough_count.saturating_sub(1);
888            }
889            InlineFlag::Link(_) => {
890                self.link_stack.pop();
891            }
892        }
893    }
894}
895
896#[inline]
897fn consume_inline(event: &Event<'_>, styled: &mut StyledText, state: &mut InlineState) {
898    /// Build the current `SpanStyle` with optional extra flags.
899    #[inline]
900    fn current_style(state: &mut InlineState, styled: &mut StyledText, extra: u8) -> SpanStyle {
901        SpanStyle {
902            flags: state.flags() | extra,
903            link_idx: state.current_link_idx(styled),
904        }
905    }
906
907    match event {
908        Event::Text(t) => {
909            let s = current_style(state, styled, 0);
910            styled.push_text(t, s);
911        }
912        Event::Code(c) => {
913            let s = current_style(state, styled, FLAG_CODE);
914            styled.push_text(c, s);
915        }
916        Event::SoftBreak => {
917            let s = current_style(state, styled, 0);
918            styled.push_text(" ", s);
919        }
920        Event::HardBreak => {
921            let s = current_style(state, styled, 0);
922            styled.push_text("\n", s);
923        }
924        Event::Start(Tag::Strong) => state.push(InlineFlag::Strong),
925        Event::End(TagEnd::Strong) => state.pop(&InlineFlag::Strong),
926        Event::Start(Tag::Emphasis) => state.push(InlineFlag::Emphasis),
927        Event::End(TagEnd::Emphasis) => state.pop(&InlineFlag::Emphasis),
928        Event::Start(Tag::Strikethrough) => state.push(InlineFlag::Strikethrough),
929        Event::End(TagEnd::Strikethrough) => state.pop(&InlineFlag::Strikethrough),
930        Event::Start(Tag::Link { dest_url, .. }) => {
931            state.push(InlineFlag::Link(Rc::from(dest_url.as_ref())));
932        }
933        Event::End(TagEnd::Link) => state.pop_link(),
934        Event::FootnoteReference(label) => {
935            let s = current_style(state, styled, 0);
936            styled.push_text("[", s);
937            styled.push_text(label, s);
938            styled.push_text("]", s);
939        }
940        Event::InlineHtml(html) | Event::Html(html) => {
941            let s = current_style(state, styled, FLAG_CODE);
942            styled.push_text(html, s);
943        }
944        Event::InlineMath(math) | Event::DisplayMath(math) => {
945            let s = current_style(state, styled, FLAG_CODE);
946            styled.push_text(math, s);
947        }
948        _ => {}
949    }
950}
951
952#[inline]
953#[must_use]
954pub const fn heading_level_to_u8(level: HeadingLevel) -> u8 {
955    match level {
956        HeadingLevel::H1 => 1,
957        HeadingLevel::H2 => 2,
958        HeadingLevel::H3 => 3,
959        HeadingLevel::H4 => 4,
960        HeadingLevel::H5 => 5,
961        HeadingLevel::H6 => 6,
962    }
963}
964
965#[cfg(test)]
966#[allow(clippy::panic, clippy::expect_used)]
967mod tests {
968    use super::*;
969    use std::fmt::Write;
970
971    fn validate_styled_text(st: &StyledText) {
972        let text_len = st.text.len() as u32;
973        if st.text.is_empty() {
974            assert!(st.spans.is_empty(), "empty text should have no spans");
975            return;
976        }
977        assert!(!st.spans.is_empty(), "non-empty text should have spans");
978        for (i, span) in st.spans.iter().enumerate() {
979            assert!(span.start < span.end, "span {i}: start >= end");
980            assert!(span.end <= text_len, "span {i}: end exceeds text len");
981        }
982        assert_eq!(st.spans[0].start, 0, "first span should start at 0");
983        assert_eq!(
984            st.spans.last().expect("non-empty").end,
985            text_len,
986            "last span should end at text len"
987        );
988        for i in 1..st.spans.len() {
989            assert_eq!(
990                st.spans[i].start,
991                st.spans[i - 1].end,
992                "gap between span {} and {i}",
993                i - 1
994            );
995        }
996    }
997
998    fn parse_paragraph(md: &str) -> StyledText {
999        let blocks = parse_markdown(md);
1000        match blocks.into_iter().next() {
1001            Some(Block::Paragraph(st)) => st,
1002            other => panic!("expected Paragraph, got {other:?}"),
1003        }
1004    }
1005
1006    // ── Type size assertions ──────────────────────────────────────
1007
1008    #[test]
1009    fn type_sizes_are_compact() {
1010        assert_eq!(
1011            std::mem::size_of::<SpanStyle>(),
1012            2,
1013            "SpanStyle should be 2 bytes"
1014        );
1015        assert_eq!(std::mem::size_of::<Span>(), 12, "Span should be 12 bytes");
1016        // Block enum is 88 bytes due to StyledText containing String + Vec<Span> + Vec<Rc<str>>.
1017        // The Span size reduction (24→12) more than compensates at typical span-per-block ratios.
1018        let block_size = std::mem::size_of::<Block>();
1019        assert!(
1020            block_size <= 96,
1021            "Block is {block_size} bytes, should be <= 96"
1022        );
1023    }
1024
1025    // ── Heading parsing ──────────────────────────────────────────
1026
1027    #[test]
1028    fn heading_parsing() {
1029        for (label, md, expected) in [
1030            ("simple", "# Hello World", vec![(1_u8, "Hello World")]),
1031            (
1032                "levels_1_to_6",
1033                "# H1\n## H2\n### H3\n#### H4\n##### H5\n###### H6\n",
1034                vec![
1035                    (1, "H1"),
1036                    (2, "H2"),
1037                    (3, "H3"),
1038                    (4, "H4"),
1039                    (5, "H5"),
1040                    (6, "H6"),
1041                ],
1042            ),
1043            (
1044                "unicode",
1045                "# 你好世界\n## 🚀 Rocket\n",
1046                vec![(1, "你好世界"), (2, "🚀 Rocket")],
1047            ),
1048            ("trailing_hashes", "## Title ##\n", vec![(2, "Title")]),
1049        ] {
1050            let blocks = parse_markdown(md);
1051            let headings: Vec<_> = blocks
1052                .iter()
1053                .filter_map(|b| match b {
1054                    Block::Heading { level, text } => Some((*level, text.text.as_str())),
1055                    _ => None,
1056                })
1057                .collect();
1058            assert_eq!(headings.len(), expected.len(), "{label}: count");
1059            for (i, ((gl, gt), (el, et))) in headings.iter().zip(expected.iter()).enumerate() {
1060                assert_eq!(gl, el, "{label}[{i}]: level");
1061                assert!(
1062                    gt.trim().contains(et),
1063                    "{label}[{i}]: text {gt:?} missing {et:?}"
1064                );
1065            }
1066        }
1067        // Inline formatting in headings
1068        let has_style = |text: &StyledText, check: &str| -> bool {
1069            match check {
1070                "strong" => text.spans.iter().any(|s| s.style.strong()),
1071                "emphasis" => text.spans.iter().any(|s| s.style.emphasis()),
1072                "code" => text.spans.iter().any(|s| s.style.code()),
1073                "link" => text.spans.iter().any(|s| s.style.has_link()),
1074                "strikethrough" => text.spans.iter().any(|s| s.style.strikethrough()),
1075                _ => false,
1076            }
1077        };
1078        for (label, md, checks) in [
1079            (
1080                "mixed",
1081                "# **bold** and *italic*\n",
1082                &["strong", "emphasis"] as &[&str],
1083            ),
1084            (
1085                "all_inline",
1086                "## **bold** *italic* `code` [link](url) ~~strike~~\n",
1087                &["strong", "emphasis", "code", "link", "strikethrough"] as &[&str],
1088            ),
1089            (
1090                "link_and_code",
1091                "### [`parse`](https://docs.rs) function\n",
1092                &["code", "link"] as &[&str],
1093            ),
1094        ] {
1095            match &parse_markdown(md)[0] {
1096                Block::Heading { text, .. } => {
1097                    for check in checks {
1098                        assert!(has_style(text, check), "{label}: {check}");
1099                    }
1100                    validate_styled_text(text);
1101                }
1102                other => panic!("{label}: expected heading, got {other:?}"),
1103            }
1104        }
1105    }
1106
1107    // ── Inline formatting ────────────────────────────────────────
1108
1109    #[test]
1110    fn inline_formatting_parsing() {
1111        // (label, md, text_sub, strong, emphasis, strikethrough, strong_and_emph)
1112        for (label, md, text_sub, strong, emph, strike, combined) in [
1113            (
1114                "emphasis_and_bold",
1115                "Hello **world** and *italic*",
1116                "world",
1117                true,
1118                true,
1119                false,
1120                false,
1121            ),
1122            (
1123                "strikethrough",
1124                "This is ~~deleted~~ text",
1125                "deleted",
1126                false,
1127                false,
1128                true,
1129                false,
1130            ),
1131            (
1132                "triple_emphasis",
1133                "***bold and italic***",
1134                "bold and italic",
1135                true,
1136                true,
1137                false,
1138                true,
1139            ),
1140            (
1141                "strike_with_code",
1142                "~~deleted `code` deleted~~",
1143                "code",
1144                false,
1145                false,
1146                true,
1147                false,
1148            ),
1149            (
1150                "gfm_strike",
1151                "~~deleted~~\n",
1152                "deleted",
1153                false,
1154                false,
1155                true,
1156                false,
1157            ),
1158        ] {
1159            let blocks = parse_markdown(md);
1160            match &blocks[0] {
1161                Block::Paragraph(st) => {
1162                    assert!(st.text.contains(text_sub), "{label}: text");
1163                    if strong {
1164                        assert!(st.spans.iter().any(|s| s.style.strong()), "{label}: strong");
1165                    }
1166                    if emph {
1167                        assert!(st.spans.iter().any(|s| s.style.emphasis()), "{label}: emph");
1168                    }
1169                    if strike {
1170                        assert!(
1171                            st.spans.iter().any(|s| s.style.strikethrough()),
1172                            "{label}: strike"
1173                        );
1174                    }
1175                    if combined {
1176                        assert!(
1177                            st.spans
1178                                .iter()
1179                                .any(|s| s.style.strong() && s.style.emphasis()),
1180                            "{label}: combined"
1181                        );
1182                    }
1183                }
1184                other => panic!("{label}: expected paragraph, got {other:?}"),
1185            }
1186        }
1187    }
1188
1189    // ── Code block parsing ───────────────────────────────────────
1190
1191    #[test]
1192    fn code_block_parsing() {
1193        for (label, md, lang, code_sub) in [
1194            (
1195                "fenced_rust",
1196                "```rust\nfn main() {}\n```",
1197                "rust",
1198                "fn main()",
1199            ),
1200            ("indented", "    fn foo() {}\n    bar()\n", "", "fn foo()"),
1201            ("empty_fenced", "```\n```\n", "", ""),
1202            ("unclosed", "```rust\ncode\n", "rust", "code"),
1203            (
1204                "indented_two",
1205                "    code line 1\n    code line 2\n",
1206                "",
1207                "code line 1",
1208            ),
1209        ] {
1210            let blocks = parse_markdown(md);
1211            assert_eq!(blocks.len(), 1, "{label}");
1212            match &blocks[0] {
1213                Block::Code { language, code } => {
1214                    assert_eq!(&**language, lang, "{label}: lang");
1215                    if code_sub.is_empty() {
1216                        assert!(code.is_empty(), "{label}: empty");
1217                    } else {
1218                        assert!(code.contains(code_sub), "{label}: code");
1219                    }
1220                }
1221                other => panic!("{label}: expected Code, got {other:?}"),
1222            }
1223        }
1224        // Nested backtick fences (4-tick wrapping 3-tick)
1225        let b = parse_markdown("````\n```rust\nfn main() {}\n```\n````\n");
1226        match &b[0] {
1227            Block::Code { code, .. } => {
1228                assert!(code.contains("```rust"));
1229                assert!(code.contains("fn main()"));
1230            }
1231            other => panic!("nested_fence: expected Code, got {other:?}"),
1232        }
1233        // 5-tick wrapping 3+4 tick
1234        let b = parse_markdown("`````\n```\n````\nsome code\n`````\n");
1235        match &b[0] {
1236            Block::Code { code, .. } => {
1237                assert!(code.contains("```"));
1238                assert!(code.contains("````"));
1239            }
1240            other => panic!("nested_fence_5: expected Code, got {other:?}"),
1241        }
1242    }
1243
1244    // ── List parsing ─────────────────────────────────────────────
1245
1246    #[test]
1247    fn list_parsing() {
1248        // Unordered lists
1249        for (label, md, count, first) in [
1250            ("basic", "- one\n- two\n- three", 3, "one"),
1251            ("empty_items", "- \n- text\n", 2, ""),
1252        ] {
1253            let blocks = parse_markdown(md);
1254            match &blocks[0] {
1255                Block::UnorderedList(items) => {
1256                    assert_eq!(items.len(), count, "{label}: count");
1257                    assert_eq!(items[0].content.text, first, "{label}: first");
1258                }
1259                other => panic!("{label}: expected UL, got {other:?}"),
1260            }
1261        }
1262        // Ordered lists
1263        for (label, md, start, count, first) in [
1264            ("basic", "1. first\n2. second", 1_u64, 2, "first"),
1265            ("start_zero", "0. zero\n1. one\n", 0, 2, "zero"),
1266            ("high_start", "42. answer\n43. next\n", 42, 2, "answer"),
1267        ] {
1268            let blocks = parse_markdown(md);
1269            match &blocks[0] {
1270                Block::OrderedList { start: s, items } => {
1271                    assert_eq!(*s, start, "{label}: start");
1272                    assert_eq!(items.len(), count, "{label}: count");
1273                    assert_eq!(items[0].content.text, first, "{label}: first");
1274                }
1275                other => panic!("{label}: expected OL, got {other:?}"),
1276            }
1277        }
1278        // Nesting
1279        for (label, md) in [
1280            ("nested_ul", "- parent\n  - child\n  - child2\n- sibling"),
1281            (
1282                "mixed",
1283                "- bullet\n  1. ordered a\n  2. ordered b\n- bullet2\n",
1284            ),
1285        ] {
1286            let blocks = parse_markdown(md);
1287            match &blocks[0] {
1288                Block::UnorderedList(items) => {
1289                    assert_eq!(items.len(), 2, "{label}");
1290                    assert!(!items[0].children.is_empty(), "{label}: children");
1291                }
1292                other => panic!("{label}: expected UL, got {other:?}"),
1293            }
1294        }
1295    }
1296
1297    // ── Image parsing ────────────────────────────────────────────
1298
1299    #[test]
1300    fn image_parsing() {
1301        for (label, md, url, alt) in [
1302            (
1303                "full",
1304                "![alt text](https://img.png \"title\")",
1305                "https://img.png",
1306                "alt text",
1307            ),
1308            ("no_alt", "![](image.png)", "image.png", ""),
1309            (
1310                "from_brackets",
1311                "![alt text](img.png)",
1312                "img.png",
1313                "alt text",
1314            ),
1315            ("empty_url", "![alt text]()\n", "", "alt text"),
1316        ] {
1317            let blocks = parse_markdown(md);
1318            match &blocks[0] {
1319                Block::Image { url: u, alt: a } => {
1320                    assert_eq!(&**u, url, "{label}: url");
1321                    assert_eq!(&**a, alt, "{label}: alt");
1322                }
1323                other => panic!("{label}: expected Image, got {other:?}"),
1324            }
1325        }
1326        // Inline with text stays as paragraph
1327        assert!(matches!(
1328            &parse_markdown("See ![pic](img.png) text.")[0],
1329            Block::Paragraph(_)
1330        ));
1331        // Multiple standalone images
1332        let imgs = parse_markdown("![a](1.png)\n\n![b](2.png)\n\n![c](3.png)\n")
1333            .iter()
1334            .filter(|b| matches!(b, Block::Image { .. }))
1335            .count();
1336        assert_eq!(imgs, 3);
1337        // Long alt text with formatting
1338        let long_alt = "A".repeat(500);
1339        let md = format!("![**bold** *italic* {long_alt}](img.png)");
1340        match &parse_markdown(&md)[0] {
1341            Block::Image { alt, url } => {
1342                assert_eq!(&**url, "img.png");
1343                assert!(alt.contains(&long_alt) && alt.contains("bold") && alt.contains("italic"));
1344            }
1345            other => panic!("expected Image, got {other:?}"),
1346        }
1347    }
1348
1349    // ── Link parsing ─────────────────────────────────────────────
1350
1351    #[test]
1352    fn link_parsing() {
1353        for (label, md, url) in [
1354            (
1355                "basic",
1356                "[link](https://example.com)",
1357                "https://example.com",
1358            ),
1359            ("with_title", "[text](url \"title\")\n", "url"),
1360            (
1361                "reference",
1362                "[text][ref]\n\n[ref]: https://example.com\n",
1363                "https://example.com",
1364            ),
1365            (
1366                "autolink",
1367                "Visit <https://example.com> for more.",
1368                "https://example.com",
1369            ),
1370        ] {
1371            let blocks = parse_markdown(md);
1372            match &blocks[0] {
1373                Block::Paragraph(st) => {
1374                    let has = st
1375                        .spans
1376                        .iter()
1377                        .any(|s| st.link_url(s.style.link_idx).map(Rc::as_ref) == Some(url));
1378                    assert!(has, "{label}: no span with URL {url:?}");
1379                }
1380                other => panic!("{label}: expected paragraph, got {other:?}"),
1381            }
1382        }
1383        // Multiple links
1384        let blocks = parse_markdown("Visit [a](https://a.com) and [b](https://b.com) today.");
1385        match &blocks[0] {
1386            Block::Paragraph(st) => {
1387                let n = st.spans.iter().filter(|s| s.style.has_link()).count();
1388                assert!(n >= 2, "expected >=2 links, got {n}");
1389            }
1390            other => panic!("expected paragraph, got {other:?}"),
1391        }
1392        // URL edge cases: encoded spaces, unicode, parentheses
1393        for (md, frag) in [
1394            (
1395                "[spaces](https://example.com/path%20with%20spaces)",
1396                "spaces",
1397            ),
1398            ("[unicode](https://example.com/日本語)", "日本語"),
1399            (
1400                "[parens](https://en.wikipedia.org/wiki/Rust_(programming_language))",
1401                "Rust_",
1402            ),
1403        ] {
1404            match &parse_markdown(md)[0] {
1405                Block::Paragraph(st) => {
1406                    let span = st
1407                        .spans
1408                        .iter()
1409                        .find(|s| s.style.has_link())
1410                        .unwrap_or_else(|| panic!("link span for {md:?}"));
1411                    let url = st.link_url(span.style.link_idx).expect("url");
1412                    assert!(
1413                        url.contains(frag),
1414                        "URL should contain {frag:?}, got {url:?}"
1415                    );
1416                }
1417                other => panic!("expected paragraph, got {other:?}"),
1418            }
1419        }
1420    }
1421
1422    // ── Table parsing ────────────────────────────────────────────
1423
1424    #[test]
1425    fn table_parsing() {
1426        for (label, md, hdr, rows, aligns) in [
1427            (
1428                "basic",
1429                "| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |",
1430                2,
1431                2,
1432                vec![Alignment::None, Alignment::None],
1433            ),
1434            (
1435                "alignment",
1436                "| L | C | R |\n|:---|:---:|---:|\n| a | b | c |\n",
1437                3,
1438                1,
1439                vec![Alignment::Left, Alignment::Center, Alignment::Right],
1440            ),
1441            (
1442                "header_only",
1443                "| A | B |\n|---|---|\n",
1444                2,
1445                0,
1446                vec![Alignment::None, Alignment::None],
1447            ),
1448            (
1449                "col_mismatch",
1450                "| A | B | C |\n|---|---|---|\n| 1 | 2 |\n",
1451                3,
1452                1,
1453                vec![Alignment::None, Alignment::None, Alignment::None],
1454            ),
1455        ] {
1456            let blocks = parse_markdown(md);
1457            match &blocks[0] {
1458                Block::Table(t) => {
1459                    assert_eq!(t.header.len(), hdr, "{label}: hdr");
1460                    assert_eq!(t.rows.len(), rows, "{label}: rows");
1461                    assert_eq!(t.alignments.len(), aligns.len(), "{label}: aligns len");
1462                    for (i, (g, e)) in t.alignments.iter().zip(aligns.iter()).enumerate() {
1463                        assert_eq!(g, e, "{label}: align[{i}]");
1464                    }
1465                }
1466                other => panic!("{label}: expected table, got {other:?}"),
1467            }
1468        }
1469        // Escaped pipe
1470        let blocks = parse_markdown("| A |\n|---|\n| a \\| b |\n");
1471        match &blocks[0] {
1472            Block::Table(t) => {
1473                assert!(
1474                    t.rows[0][0].text.contains("a | b") || t.rows[0][0].text.contains("a \\| b")
1475                );
1476            }
1477            other => panic!("expected Table, got {other:?}"),
1478        }
1479    }
1480
1481    // ── Blockquote parsing ───────────────────────────────────────
1482
1483    #[test]
1484    fn blockquote_parsing() {
1485        // Simple
1486        assert!(matches!(&parse_markdown("> quoted")[0], Block::Quote(_)));
1487        // Nested: 2 levels
1488        match &parse_markdown("> outer\n>> inner\n")[0] {
1489            Block::Quote(outer) => assert!(outer.iter().any(|b| matches!(b, Block::Quote(_)))),
1490            other => panic!("expected Quote, got {other:?}"),
1491        }
1492        // 3 levels
1493        match &parse_markdown("> > > deep\n")[0] {
1494            Block::Quote(l1) => {
1495                for b in l1 {
1496                    if let Block::Quote(l2) = b {
1497                        assert!(l2.iter().any(|b2| matches!(b2, Block::Quote(_))));
1498                    }
1499                }
1500            }
1501            other => panic!("expected Quote, got {other:?}"),
1502        }
1503        // Inner blocks
1504        for (label, md) in [
1505            ("code", "> ```rust\n> fn main() {}\n> ```\n"),
1506            ("table", "> | H1 | H2 |\n> |---|---|\n> | a | b |\n"),
1507            (
1508                "code_and_list",
1509                "> ```python\n> print('hi')\n> ```\n>\n> - item 1\n> - item 2\n",
1510            ),
1511        ] {
1512            match &parse_markdown(md)[0] {
1513                Block::Quote(inner) => assert!(!inner.is_empty(), "{label}"),
1514                other => panic!("{label}: expected Quote, got {other:?}"),
1515            }
1516        }
1517        // 5 levels deep
1518        let md = "> level 1\n>> level 2\n>>> level 3\n>>>> level 4\n>>>>> level 5\n";
1519        fn max_depth(blocks: &[Block]) -> usize {
1520            blocks
1521                .iter()
1522                .map(|b| {
1523                    if let Block::Quote(inner) = b {
1524                        1 + max_depth(inner)
1525                    } else {
1526                        0
1527                    }
1528                })
1529                .max()
1530                .unwrap_or(0)
1531        }
1532        assert!(max_depth(&parse_markdown(md)) >= 5);
1533    }
1534
1535    // ── Span coverage ────────────────────────────────────────────
1536
1537    #[test]
1538    fn spans_cover_all_block_types() {
1539        // Paragraphs with various inline formatting (also covers validate_various_inputs)
1540        for md in [
1541            "Hello world",
1542            "Hello **bold** world",
1543            "**bold** *italic* ~~strike~~ `code`",
1544            "A [link](https://x.com) here",
1545            "**bold *bold-italic* bold**",
1546            "Mixed **bold** and *italic* with `code` and [link](url)",
1547            "**你好** *世界* `🚀`",
1548            "plain **bold** *italic* ~~strike~~ `code` [link](url) ***bi*** end",
1549            "***~~all~~***",
1550        ] {
1551            for block in &parse_markdown(md) {
1552                if let Block::Paragraph(st) = block {
1553                    validate_styled_text(st);
1554                }
1555            }
1556        }
1557        for block in &parse_markdown("# Simple\n## **Bold** heading\n### `Code` in heading") {
1558            if let Block::Heading { text, .. } = block {
1559                validate_styled_text(text);
1560            }
1561        }
1562        for block in &parse_markdown("- Item with **bold**\n- Item with `code`\n- [Link](url) item")
1563        {
1564            if let Block::UnorderedList(items) = block {
1565                for item in items {
1566                    validate_styled_text(&item.content);
1567                }
1568            }
1569        }
1570        // Tables with formatting (simple + complex cells)
1571        for table_md in [
1572            "| **Bold** | `Code` | [Link](url) |\n|---|---|---|\n| a | b | c |",
1573            "| **Bold** `code` | *it* ~~s~~ [lnk](u) |\n|---|---|\n| **x** *y* | `a` ~~b~~ |",
1574        ] {
1575            for block in &parse_markdown(table_md) {
1576                if let Block::Table(t) = block {
1577                    for cell in &t.header {
1578                        validate_styled_text(cell);
1579                    }
1580                    for row in &t.rows {
1581                        for cell in row {
1582                            validate_styled_text(cell);
1583                        }
1584                    }
1585                }
1586            }
1587        }
1588    }
1589
1590    // ── Edge cases ───────────────────────────────────────────────
1591
1592    #[test]
1593    fn parse_edge_cases() {
1594        for (label, md, empty) in [
1595            ("empty", "", true),
1596            ("whitespace", "   \n\n   \n", true),
1597            ("newlines", "\n\n\n\n\n\n\n\n", true),
1598        ] {
1599            assert_eq!(parse_markdown(md).is_empty(), empty, "{label}");
1600        }
1601        // CRLF
1602        let b = parse_markdown("# Hello\r\n\r\nParagraph\r\n");
1603        assert!(matches!(&b[0], Block::Heading { level: 1, .. }));
1604        assert!(matches!(&b[1], Block::Paragraph(_)));
1605        // Multiple blank lines
1606        let p = parse_markdown("para1\n\n\n\n\npara2")
1607            .iter()
1608            .filter(|b| matches!(b, Block::Paragraph(_)))
1609            .count();
1610        assert_eq!(p, 2);
1611    }
1612
1613    #[test]
1614    fn parse_large_document_perf() {
1615        let mut doc = String::with_capacity(50_000);
1616        for i in 0..200 {
1617            write!(doc, "## Heading {i}\n\n").ok();
1618            doc.push_str("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ");
1619            doc.push_str("Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n\n");
1620            if i % 5 == 0 {
1621                doc.push_str("```rust\nfn example() { /* code */ }\n```\n\n");
1622            }
1623            if i % 3 == 0 {
1624                doc.push_str("- item one\n- item two\n- item three\n\n");
1625            }
1626        }
1627        let start = std::time::Instant::now();
1628        for _ in 0..100_u32 {
1629            assert!(!parse_markdown(&doc).is_empty());
1630        }
1631        let per_iter = start.elapsed() / 100;
1632        if cfg!(not(debug_assertions)) {
1633            assert!(per_iter.as_millis() < 5, "too slow: {per_iter:?}");
1634        }
1635    }
1636
1637    #[test]
1638    fn parse_task_lists() {
1639        // Unordered
1640        match &parse_markdown("- [x] checked\n- [ ] unchecked\n- normal\n")[0] {
1641            Block::UnorderedList(items) => {
1642                assert_eq!(items[0].checked, Some(true));
1643                assert_eq!(items[1].checked, Some(false));
1644                assert_eq!(items[2].checked, None);
1645            }
1646            other => panic!("expected UL, got {other:?}"),
1647        }
1648        // Ordered
1649        match &parse_markdown("1. [x] Done\n2. [ ] Todo\n3. Normal\n")[0] {
1650            Block::OrderedList { items, .. } => {
1651                assert_eq!(items[0].checked, Some(true));
1652                assert_eq!(items[1].checked, Some(false));
1653                assert_eq!(items[2].checked, None);
1654            }
1655            other => panic!("expected OL, got {other:?}"),
1656        }
1657        // Nested task lists
1658        let md = "- [x] parent done\n  - [ ] child todo\n  - [x] child done\n- [ ] parent todo\n  - [ ] nested todo\n";
1659        match &parse_markdown(md)[0] {
1660            Block::UnorderedList(items) => {
1661                assert_eq!(items[0].checked, Some(true));
1662                assert_eq!(items[1].checked, Some(false));
1663                if let Some(Block::UnorderedList(n)) = items[0].children.first() {
1664                    assert_eq!(n[0].checked, Some(false));
1665                    assert_eq!(n[1].checked, Some(true));
1666                } else {
1667                    panic!("nested list");
1668                }
1669                if let Some(Block::UnorderedList(n)) = items[1].children.first() {
1670                    assert_eq!(n[0].checked, Some(false));
1671                } else {
1672                    panic!("nested list");
1673                }
1674            }
1675            other => panic!("expected UL, got {other:?}"),
1676        }
1677    }
1678
1679    #[test]
1680    fn parse_misc_block_types() {
1681        // Thematic break
1682        assert!(matches!(&parse_markdown("---")[0], Block::ThematicBreak));
1683        // Setext headings
1684        let h: Vec<_> = parse_markdown("H1\n===\n\nH2\n---\n")
1685            .iter()
1686            .filter_map(|b| {
1687                if let Block::Heading { level, .. } = b {
1688                    Some(*level)
1689                } else {
1690                    None
1691                }
1692            })
1693            .collect();
1694        assert_eq!(h, vec![1, 2]);
1695        // Escaped characters
1696        for md in [
1697            "\\# Not a heading\n\n\\* Not a bullet\n",
1698            "\\*not bold\\* and \\[not link\\]\n",
1699        ] {
1700            assert!(
1701                parse_markdown(md)
1702                    .iter()
1703                    .all(|b| matches!(b, Block::Paragraph(_)))
1704            );
1705        }
1706        // Line breaks
1707        for md in ["Line one  \nLine two\n", "Line one\nLine two\n"] {
1708            match &parse_markdown(md)[0] {
1709                Block::Paragraph(t) => {
1710                    assert!(t.text.contains("Line one") && t.text.contains("Line two"));
1711                }
1712                other => panic!("expected Paragraph, got {other:?}"),
1713            }
1714        }
1715        // HTML entities
1716        match &parse_markdown("&amp; &lt; &gt; &#123;\n")[0] {
1717            Block::Paragraph(t) => {
1718                assert!(t.text.contains('&') && t.text.contains('<') && t.text.contains('>'));
1719            }
1720            other => panic!("expected Paragraph, got {other:?}"),
1721        }
1722        // Inline HTML
1723        match &parse_markdown("Text with <strong>html</strong> inline.\n")[0] {
1724            Block::Paragraph(t) => assert!(t.text.contains("html")),
1725            other => panic!("expected Paragraph, got {other:?}"),
1726        }
1727        // Smart punctuation
1728        match &parse_markdown("\"Hello\" -- world... 'single' --- em")[0] {
1729            Block::Paragraph(st) => {
1730                let t = &st.text;
1731                assert!(t.contains('\u{201c}') || t.contains('\u{201d}') || t.contains('"'));
1732                assert!(t.contains('\u{2026}') || t.contains("..."));
1733            }
1734            other => panic!("expected paragraph, got {other:?}"),
1735        }
1736    }
1737
1738    #[test]
1739    fn inline_merge_behavior() {
1740        // Pop without push is safe
1741        let mut state = InlineState::new();
1742        state.pop(&InlineFlag::Strong);
1743        assert!(state.stack.is_empty());
1744        assert_eq!(state.flags(), 0);
1745        assert!(state.link_stack.is_empty());
1746
1747        // Adjacent same-style spans merge
1748        let mut st = StyledText::default();
1749        st.push_text("hello", SpanStyle::plain());
1750        st.push_text(" world", SpanStyle::plain());
1751        assert_eq!(st.spans.len(), 1);
1752        assert_eq!(st.spans[0].end, 11);
1753
1754        // Adjacent bold merges
1755        let mut st = StyledText::default();
1756        let mut bold = SpanStyle::plain();
1757        bold.set_strong();
1758        st.push_text("bold1", bold);
1759        st.push_text("bold2", bold);
1760        assert_eq!(st.spans.len(), 1);
1761        assert!(st.spans[0].style.strong());
1762        validate_styled_text(&st);
1763
1764        // Different styles don't merge
1765        let st = parse_paragraph("*italic*normal*italic*");
1766        assert!(st.spans.len() >= 3);
1767        assert!(st.spans[0].style.emphasis());
1768        assert!(!st.spans[1].style.emphasis());
1769        assert!(st.spans[2].style.emphasis());
1770        validate_styled_text(&st);
1771
1772        // Plain fragments merge
1773        let mut st = StyledText::default();
1774        st.push_text("aaa", SpanStyle::plain());
1775        st.push_text("bbb", SpanStyle::plain());
1776        st.push_text("ccc", SpanStyle::plain());
1777        assert_eq!(st.spans.len(), 1);
1778        assert_eq!(st.text, "aaabbbccc");
1779        assert!(st.is_ascii);
1780        assert_eq!(st.char_len(), 9);
1781
1782        let mut st = StyledText::default();
1783        st.push_text("hello", SpanStyle::plain());
1784        st.push_text("世界", SpanStyle::plain());
1785        assert!(!st.is_ascii);
1786        assert_eq!(st.char_len(), 7);
1787
1788        let st = StyledText {
1789            text: "hello".to_owned(),
1790            ..StyledText::default()
1791        };
1792        assert_eq!(st.char_len(), 5);
1793
1794        let st = StyledText {
1795            text: "世界".to_owned(),
1796            ..StyledText::default()
1797        };
1798        assert_eq!(st.char_len(), 2);
1799
1800        // Active links resolve their interned index once per link span.
1801        let mut state = InlineState::new();
1802        state.push(InlineFlag::Link(Rc::from("https://example.com")));
1803        let mut st = StyledText::default();
1804        assert_eq!(state.current_link_idx(&mut st), 0);
1805        assert_eq!(state.current_link_idx(&mut st), 0);
1806        assert_eq!(st.links.len(), 1);
1807        state.pop_link();
1808        assert!(state.link_stack.is_empty());
1809    }
1810
1811    #[test]
1812    fn parse_list_with_child_blocks() {
1813        for (md, label) in [
1814            (
1815                "- Item:\n\n  ```rust\n  fn main() {}\n  ```\n\n- Next\n",
1816                "Code",
1817            ),
1818            ("- Item:\n\n  > Quoted text\n\n- Next\n", "Quote"),
1819            ("- First para\n\n  Second para\n\n- Another\n", "Paragraph"),
1820            ("- Item\n\n  ## Sub-heading\n\n- Next\n", "Heading"),
1821            ("- Item\n\n  ---\n\n- Next\n", "ThematicBreak"),
1822            (
1823                "- Item\n\n  | A | B |\n  |---|---|\n  | 1 | 2 |\n\n- Next\n",
1824                "Table",
1825            ),
1826            (
1827                "1. First item\n\n   ```rust\n   let x = 1;\n   ```\n\n2. Second item\n",
1828                "OL+Code",
1829            ),
1830        ] {
1831            let blocks = parse_markdown(md);
1832            let has_children = match &blocks[0] {
1833                Block::UnorderedList(items) | Block::OrderedList { items, .. } => {
1834                    !items[0].children.is_empty()
1835                }
1836                _ => false,
1837            };
1838            assert!(has_children, "{label}: should have children");
1839        }
1840        // Deeply nested lists (10 levels)
1841        let mut md = String::with_capacity(512);
1842        for depth in 0..10 {
1843            let indent = "  ".repeat(depth);
1844            writeln!(md, "{indent}- level {depth}").ok();
1845        }
1846        let blocks = parse_markdown(&md);
1847        fn count_depth(block: &Block) -> usize {
1848            match block {
1849                Block::UnorderedList(items) => {
1850                    items[0].children.first().map_or(1, |c| 1 + count_depth(c))
1851                }
1852                _ => 0,
1853            }
1854        }
1855        assert!(count_depth(&blocks[0]) >= 10);
1856        // Mixed ordered/unordered nesting
1857        let md = "- bullet A\n  1. ordered 1\n     - nested bullet\n       1. deep ordered\n  2. ordered 2\n- bullet B\n";
1858        let blocks = parse_markdown(md);
1859        match &blocks[0] {
1860            Block::UnorderedList(items) => {
1861                assert_eq!(items.len(), 2);
1862                assert!(
1863                    items[0]
1864                        .children
1865                        .iter()
1866                        .any(|b| matches!(b, Block::OrderedList { .. }))
1867                );
1868                for child in &items[0].children {
1869                    if let Block::OrderedList { items: ol, .. } = child
1870                        && let Some(Block::UnorderedList(ul)) = ol[0]
1871                            .children
1872                            .iter()
1873                            .find(|b| matches!(b, Block::UnorderedList(_)))
1874                    {
1875                        assert!(
1876                            ul[0]
1877                                .children
1878                                .iter()
1879                                .any(|b| matches!(b, Block::OrderedList { .. }))
1880                        );
1881                    }
1882                }
1883            }
1884            other => panic!("expected UL, got {other:?}"),
1885        }
1886    }
1887
1888    // ── Inline merge and nesting ─────────────────────────────────
1889
1890    #[test]
1891    fn inline_deep_nesting() {
1892        for (label, md, text, strong, emph, strike, link) in [
1893            (
1894                "bold_italic",
1895                "***bold-italic***",
1896                "bold-italic",
1897                true,
1898                true,
1899                false,
1900                None,
1901            ),
1902            (
1903                "bold_italic_strike",
1904                "***~~bold-italic-strike~~***",
1905                "bold-italic-strike",
1906                true,
1907                true,
1908                true,
1909                None,
1910            ),
1911            (
1912                "bold_italic_link",
1913                "[***bold-italic link***](url)",
1914                "bold-italic link",
1915                true,
1916                true,
1917                false,
1918                Some("url"),
1919            ),
1920            (
1921                "all_in_link",
1922                "[***~~all~~***](url)",
1923                "all",
1924                true,
1925                true,
1926                true,
1927                Some("url"),
1928            ),
1929        ] {
1930            let st = parse_paragraph(md);
1931            assert_eq!(st.text, text, "{label}");
1932            assert_eq!(st.spans.len(), 1, "{label}: span count");
1933            let s = &st.spans[0];
1934            assert_eq!(s.style.strong(), strong, "{label}: strong");
1935            assert_eq!(s.style.emphasis(), emph, "{label}: emph");
1936            assert_eq!(s.style.strikethrough(), strike, "{label}: strike");
1937            assert_eq!(
1938                st.link_url(s.style.link_idx).map(Rc::as_ref),
1939                link,
1940                "{label}: link"
1941            );
1942            validate_styled_text(&st);
1943        }
1944
1945        // Deeply interleaved: all formatting types nested
1946        let st = parse_paragraph("**bold *italic ~~strike `code` strike~~ italic* bold**");
1947        validate_styled_text(&st);
1948        assert!(st.spans.iter().any(|s| s.style.strong()));
1949        assert!(st.spans.iter().any(|s| s.style.emphasis()));
1950        assert!(st.spans.iter().any(|s| s.style.strikethrough()));
1951        assert!(st.spans.iter().any(|s| s.style.code()));
1952    }
1953
1954    #[test]
1955    fn inline_code_and_link_contexts() {
1956        // Code inside bold inherits strong
1957        let st = parse_paragraph("**bold `code` bold**");
1958        validate_styled_text(&st);
1959        let code: Vec<_> = st.spans.iter().filter(|s| s.style.code()).collect();
1960        assert_eq!(code.len(), 1);
1961        assert!(code[0].style.strong());
1962        assert_eq!(
1963            &st.text[code[0].start as usize..code[0].end as usize],
1964            "code"
1965        );
1966
1967        // Backtick sequences (also covers single inline code)
1968        for md in ["Use `code` here", "`a`b`c`", "`` `inner` ``"] {
1969            let st = parse_paragraph(md);
1970            validate_styled_text(&st);
1971            assert!(st.spans.iter().any(|s| s.style.code()));
1972        }
1973
1974        // Formatted text in link
1975        let st = parse_paragraph("[**bold** and *italic*](url)");
1976        validate_styled_text(&st);
1977        for span in &st.spans {
1978            assert_eq!(
1979                st.link_url(span.style.link_idx).map(Rc::as_ref),
1980                Some("url")
1981            );
1982        }
1983        assert!(st.spans.iter().any(|s| s.style.strong()));
1984        assert!(st.spans.iter().any(|s| s.style.emphasis()));
1985
1986        // Multiple links
1987        let st = parse_paragraph("[aaa](url1) [bbb](url2)");
1988        validate_styled_text(&st);
1989        let urls: Vec<_> = st
1990            .spans
1991            .iter()
1992            .filter_map(|s| st.link_url(s.style.link_idx).map(Rc::as_ref))
1993            .collect();
1994        assert!(urls.contains(&"url1") && urls.contains(&"url2"));
1995
1996        // Code in link
1997        let st = parse_paragraph("[`code` in link](url)");
1998        validate_styled_text(&st);
1999        assert!(
2000            st.spans
2001                .iter()
2002                .any(|s| s.style.code() && s.style.has_link())
2003        );
2004
2005        // Adjacent different links don't merge
2006        let st = parse_paragraph("[a](u1)[b](u2)");
2007        validate_styled_text(&st);
2008        assert!(st.spans.iter().filter(|s| s.style.has_link()).count() >= 2);
2009
2010        // Emphasis across softbreak
2011        let st = parse_paragraph("*italic\nacross lines*");
2012        validate_styled_text(&st);
2013        assert!(st.spans.iter().any(|s| s.style.emphasis()));
2014
2015        // Empty and unclosed markers
2016        for md in [
2017            "****",
2018            "__",
2019            "[](url)",
2020            "**unclosed",
2021            "*unclosed",
2022            "`unclosed",
2023            "~~unclosed",
2024        ] {
2025            let blocks = parse_markdown(md);
2026            if let Some(Block::Paragraph(st)) = blocks.first() {
2027                validate_styled_text(st);
2028            }
2029        }
2030    }
2031
2032    #[test]
2033    fn inline_long_sequences() {
2034        // 100 alternating bold/normal
2035        let mut md = String::new();
2036        for i in 0..100 {
2037            if i % 2 == 0 {
2038                write!(md, "**bold{i}** ").ok();
2039            } else {
2040                write!(md, "normal{i} ").ok();
2041            }
2042        }
2043        let st = parse_paragraph(&md);
2044        validate_styled_text(&st);
2045        assert_eq!(st.spans.iter().filter(|s| s.style.strong()).count(), 50);
2046
2047        // 50 links
2048        md.clear();
2049        for i in 0..50 {
2050            write!(md, "[link{i}](https://example.com/{i}) ").ok();
2051        }
2052        let st = parse_paragraph(&md);
2053        validate_styled_text(&st);
2054        assert!(st.spans.iter().filter(|s| s.style.has_link()).count() >= 50);
2055
2056        // 100 code spans
2057        md.clear();
2058        for i in 0..100 {
2059            write!(md, "`code{i}` ").ok();
2060        }
2061        let st = parse_paragraph(&md);
2062        validate_styled_text(&st);
2063        assert_eq!(st.spans.iter().filter(|s| s.style.code()).count(), 100);
2064    }
2065
2066    // ── Stress tests ─────────────────────────────────────────────
2067
2068    #[test]
2069    fn stress_table_edge_cases() {
2070        // Extra cols
2071        match &parse_markdown("| A | B |\n|---|---|\n| 1 | 2 | 3 | 4 |\n")[0] {
2072            Block::Table(t) => {
2073                assert_eq!(t.header.len(), 2);
2074                assert_eq!(t.rows.len(), 1);
2075            }
2076            other => panic!("expected table, got {other:?}"),
2077        }
2078        // Fewer cols
2079        match &parse_markdown("| A | B | C | D |\n|---|---|---|---|\n| 1 |\n| x | y |\n")[0] {
2080            Block::Table(t) => {
2081                assert_eq!(t.header.len(), 4);
2082                assert_eq!(t.rows.len(), 2);
2083            }
2084            other => panic!("expected table, got {other:?}"),
2085        }
2086        // Empty cells
2087        match &parse_markdown("| A | B | C |\n|---|---|---|\n|  |  |  |\n| x |  | z |\n")[0] {
2088            Block::Table(t) => {
2089                assert!(t.rows[0].iter().all(|c| c.text.is_empty()));
2090                assert_eq!(t.rows[1][0].text, "x");
2091                assert_eq!(t.rows[1][2].text, "z");
2092            }
2093            other => panic!("expected table, got {other:?}"),
2094        }
2095        // Headers only
2096        match &parse_markdown("| H1 | H2 | H3 |\n|---|---|---|\n")[0] {
2097            Block::Table(t) => {
2098                assert_eq!(
2099                    t.header.iter().map(|c| c.text.as_str()).collect::<Vec<_>>(),
2100                    vec!["H1", "H2", "H3"]
2101                );
2102                assert!(t.rows.is_empty());
2103            }
2104            other => panic!("expected table, got {other:?}"),
2105        }
2106        // Adjacent tables
2107        assert!(
2108            parse_markdown("| A |\n|---|\n| 1 |\n| B |\n|---|\n| 2 |\n")
2109                .iter()
2110                .any(|b| matches!(b, Block::Table(_)))
2111        );
2112        assert_eq!(
2113            parse_markdown("| A |\n|---|\n| 1 |\n\n| B |\n|---|\n| 2 |\n")
2114                .iter()
2115                .filter(|b| matches!(b, Block::Table(_)))
2116                .count(),
2117            2
2118        );
2119        // Long headers with alignment
2120        let (la, lb) = ("A".repeat(200), "B".repeat(300));
2121        let md = format!("| {la} | {lb} | Short |\n|:---|:---:|---:|\n| x | y | z |\n");
2122        match &parse_markdown(&md)[0] {
2123            Block::Table(t) => {
2124                assert_eq!(t.header[0].text, la);
2125                assert_eq!(t.header[1].text, lb);
2126                assert_eq!(
2127                    t.alignments,
2128                    vec![Alignment::Left, Alignment::Center, Alignment::Right]
2129                );
2130            }
2131            other => panic!("expected table, got {other:?}"),
2132        }
2133        // Cell with all formatting types
2134        let md = "| Cell |\n|---|\n| **bold** *italic* `code` [link](url) ~~strike~~ |\n";
2135        match &parse_markdown(md)[0] {
2136            Block::Table(t) => {
2137                let c = &t.rows[0][0];
2138                assert!(c.spans.iter().any(|s| s.style.strong()));
2139                assert!(c.spans.iter().any(|s| s.style.emphasis()));
2140                assert!(c.spans.iter().any(|s| s.style.code()));
2141                assert!(c.spans.iter().any(|s| s.style.has_link()));
2142                assert!(c.spans.iter().any(|s| s.style.strikethrough()));
2143            }
2144            other => panic!("expected table, got {other:?}"),
2145        }
2146    }
2147
2148    #[test]
2149    fn stress_large_table_100_rows_20_cols() {
2150        let mut md = String::with_capacity(100_000);
2151        md.push('|');
2152        for c in 0..20 {
2153            write!(md, " H{c} |").ok();
2154        }
2155        md.push('\n');
2156        md.push('|');
2157        for _ in 0..20 {
2158            md.push_str("---|");
2159        }
2160        md.push('\n');
2161        for r in 0..100 {
2162            md.push('|');
2163            for c in 0..20 {
2164                write!(md, " r{r}c{c} |").ok();
2165            }
2166            md.push('\n');
2167        }
2168        match &parse_markdown(&md)[0] {
2169            Block::Table(t) => {
2170                assert_eq!(t.header.len(), 20);
2171                assert_eq!(t.rows.len(), 100);
2172                assert_eq!(t.rows[0][0].text, "r0c0");
2173                assert_eq!(t.rows[99][19].text, "r99c19");
2174            }
2175            other => panic!("expected table, got {other:?}"),
2176        }
2177    }
2178
2179    #[test]
2180    fn stress_parse_no_panic() {
2181        for (label, md) in [
2182            (
2183                "footnote",
2184                "Text with a footnote[^1].\n\n[^1]: The footnote content.\n".to_string(),
2185            ),
2186            ("huge_para", "word ".repeat(20_000)),
2187            (
2188                "thematic_breaks",
2189                "---\n\n***\n\n___\n\n---\n\n***\n".to_string(),
2190            ),
2191            ("long_heading", format!("# {}\n", "X".repeat(1200))),
2192        ] {
2193            assert!(!parse_markdown(&md).is_empty(), "{label}");
2194        }
2195        assert_eq!(
2196            parse_markdown("---\n\n***\n\n___\n\n---\n\n***\n")
2197                .iter()
2198                .filter(|b| matches!(b, Block::ThematicBreak))
2199                .count(),
2200            5
2201        );
2202        // Mixed block types in one document
2203        let md = "# Heading\nParagraph.\n\n---\n\n- list\n\n> quote\n\n```\ncode\n```\n\n| T |\n|---|\n| v |\n\n![img](x.png)\n";
2204        let b = parse_markdown(md);
2205        assert!(b.iter().any(|b| matches!(b, Block::Heading { .. })));
2206        assert!(b.iter().any(|b| matches!(b, Block::Paragraph(_))));
2207        assert!(b.iter().any(|b| matches!(b, Block::ThematicBreak)));
2208        assert!(b.iter().any(|b| matches!(b, Block::UnorderedList(_))));
2209        assert!(b.iter().any(|b| matches!(b, Block::Quote(_))));
2210        assert!(b.iter().any(|b| matches!(b, Block::Code { .. })));
2211        assert!(b.iter().any(|b| matches!(b, Block::Table(_))));
2212        assert!(b.iter().any(|b| matches!(b, Block::Image { .. })));
2213        // Mixed adversarial: every construct interleaved
2214        let md = "# **~~`heading`~~**\n\n> > > deeply quoted **bold** ~~strike~~ `code`\n\n| a | b |\n|---|---|\n| [link](http://x) | ![img](y) |\n\n- [ ] task 1\n  - [x] sub task\n    - normal\n      1. ordered\n\n```rust\nfn main() {}\n```\n\n---\n\ntext with [link](url \"title\") and ![image](img.png)\n\n<div>raw html</div>\n\n&amp; &lt; &gt; entities\n";
2215        assert!(!parse_markdown(md).is_empty());
2216    }
2217
2218    // ── Chaos / fuzz tests ──────────────────────────────────────────
2219
2220    #[test]
2221    fn chaos_deep_nesting_no_stack_overflow() {
2222        // 500 levels of nested blockquotes
2223        let md = "> ".repeat(500) + "leaf text\n";
2224        assert!(!parse_markdown(&md).is_empty());
2225
2226        // 500 levels of nested list indentation
2227        let mut md = String::new();
2228        for depth in 0..500 {
2229            let indent = "  ".repeat(depth);
2230            writeln!(md, "{indent}- level {depth}").ok();
2231        }
2232        assert!(!parse_markdown(&md).is_empty());
2233    }
2234
2235    #[test]
2236    fn chaos_huge_text_u32_saturation() {
2237        let mut st = StyledText::default();
2238        let chunk = "a".repeat(50_000);
2239        for _ in 0..100 {
2240            st.push_text(&chunk, SpanStyle::plain());
2241        }
2242        assert_eq!(st.char_count, 5_000_000);
2243        assert!(st.text.len() == 5_000_000);
2244    }
2245
2246    #[test]
2247    fn chaos_empty_table_no_columns() {
2248        let md = "|||\n||\n||\n";
2249        let blocks = parse_markdown(md);
2250        assert!(blocks.len() <= 5);
2251    }
2252
2253    #[test]
2254    fn chaos_large_content() {
2255        // Unclosed fence: rest of doc treated as code
2256        let md = format!("```\n{}\n", "x".repeat(100_000));
2257        assert!(!parse_markdown(&md).is_empty());
2258
2259        // 10K list items
2260        let mut md = String::with_capacity(20 * 10_000);
2261        for i in 0..10_000 {
2262            let _ = writeln!(md, "- item {i}");
2263        }
2264        assert!(!parse_markdown(&md).is_empty());
2265
2266        // Single 1MB line
2267        assert!(!parse_markdown(&"x".repeat(1_000_000)).is_empty());
2268
2269        // Long heading
2270        assert!(!parse_markdown(&format!("# {}", "A".repeat(100_000))).is_empty());
2271
2272        // Long link URL
2273        assert!(!parse_markdown(&format!("[text]({})", "a".repeat(100_000))).is_empty());
2274    }
2275
2276    // ── Security / Fuzz Tests ────────────────────────────────────────
2277
2278    /// Adversarial input: null bytes, control characters, and extreme unicode.
2279    #[test]
2280    fn fuzz_adversarial_characters() {
2281        let owned_emoji = "🦀".repeat(10_000);
2282        for input in [
2283            "\0",
2284            "# Hello\0World",
2285            "text\0\0\0more",
2286            "\0\0\0\0\0\0\0\0",
2287            "```\0rust\0\ncode\0\n```",
2288            "| col\0 |\n|---|\n| val\0 |",
2289            "\u{FEFF}# BOM heading",
2290            "text\u{200B}zero\u{200B}width",
2291            "\u{202E}RTL override\u{202C}",
2292            "\u{FFFD}\u{FFFD}\u{FFFD}",
2293            &owned_emoji,
2294            "\t\t\t\t\t\t\t\t\t\t",
2295            "\r\r\r\r\r\r\r\r",
2296            "\x01\x02\x03\x04\x05\x06\x07",
2297        ] {
2298            let _ = parse_markdown(input).len();
2299        }
2300    }
2301
2302    /// Adversarial input: alternating markers and enormous tables.
2303    #[test]
2304    fn fuzz_adversarial_patterns() {
2305        // Alternating open/close markers
2306        for input in [
2307            "**".repeat(5_000),
2308            "~~".repeat(5_000),
2309            "`".repeat(10_000),
2310            "```\n".repeat(1_000),
2311            "[".repeat(5_000),
2312            "](".repeat(5_000),
2313            "![".repeat(5_000),
2314        ] {
2315            let _ = parse_markdown(&input).len();
2316        }
2317        // Enormous table (100 cols × 500 rows)
2318        use std::fmt::Write;
2319        let mut table = String::with_capacity(200_000);
2320        table.push('|');
2321        for c in 0..100 {
2322            let _ = write!(table, " col{c} |");
2323        }
2324        table.push('\n');
2325        table.push('|');
2326        for _ in 0..100 {
2327            table.push_str(" --- |");
2328        }
2329        table.push('\n');
2330        for r in 0..500 {
2331            table.push('|');
2332            for c in 0..100 {
2333                let _ = write!(table, " r{r}c{c} |");
2334            }
2335            table.push('\n');
2336        }
2337        assert!(!parse_markdown(&table).is_empty());
2338    }
2339
2340    // ── Rendering parity diagnostic tests ────────────────────────
2341
2342    /// GFM bare-URL autolinks (no angle brackets) are NOT parsed as
2343    /// clickable links.  Despite `ENABLE_GFM` being set, pulldown-cmark
2344    /// 0.13 does not include autolink detection in that flag — it only
2345    /// enables blockquote admonition tags.
2346    ///
2347    /// This means bare URLs like `https://example.com` in paragraphs
2348    /// (including verification.md §2.3) render as plain text, not links.
2349    #[test]
2350    fn gfm_bare_url_autolinks_not_parsed() {
2351        // Bare https:// URL should become a link per GFM spec,
2352        // but pulldown-cmark 0.13 does NOT parse them.
2353        let st = parse_paragraph("Visit https://example.com for details.");
2354        validate_styled_text(&st);
2355        let link_span = st.spans.iter().find(|s| s.style.has_link());
2356        // BUG / LIMITATION: bare URLs are plain text, not links.
2357        assert!(
2358            link_span.is_none(),
2359            "pulldown-cmark 0.13 does NOT parse bare URLs as links (known limitation)"
2360        );
2361    }
2362
2363    /// Bold text inside a link paragraph produces spans with both
2364    /// strong + link flags — confirming the rendering path divergence
2365    /// between `build_layout_job` (`strengthen_color`) and
2366    /// `render_text_with_links` (`RichText::strong`).
2367    #[test]
2368    fn bold_inside_link_has_both_flags() {
2369        let st = parse_paragraph("[**bold link**](https://example.com)");
2370        validate_styled_text(&st);
2371        let bold_link = st
2372            .spans
2373            .iter()
2374            .find(|s| s.style.strong() && s.style.has_link());
2375        assert!(
2376            bold_link.is_some(),
2377            "should have a span that is both strong and a link"
2378        );
2379    }
2380
2381    /// Inline HTML is rendered with code styling (monospace), not as actual
2382    /// HTML elements.  This documents the current behaviour.
2383    #[test]
2384    fn inline_html_renders_as_code_styled_text() {
2385        let st = parse_paragraph("Text with <br> and <em>emphasis</em> tags.");
2386        validate_styled_text(&st);
2387        // The <br>, <em>, </em> fragments should have the CODE flag.
2388        let html_spans: Vec<_> = st
2389            .spans
2390            .iter()
2391            .filter(|s| s.style.code())
2392            .map(|s| &st.text[s.start as usize..s.end as usize])
2393            .collect();
2394        assert!(
2395            html_spans.iter().any(|t| t.contains('<')),
2396            "HTML tags should be rendered as code-styled spans, got: {html_spans:?}"
2397        );
2398    }
2399
2400    /// Input size limit: documents above `MAX_PARSE_BYTES` are truncated.
2401    #[test]
2402    fn parse_truncates_oversized_input() {
2403        use std::fmt::Write;
2404        // Create input larger than 64 MiB.
2405        let line = "x".repeat(1024) + "\n";
2406        let mut big = String::with_capacity(65 * 1024 * 1024 + 1024);
2407        while big.len() < 65 * 1024 * 1024 {
2408            let _ = write!(big, "{line}");
2409        }
2410        // Must not panic or OOM — parser truncates.
2411        let blocks = parse_markdown(&big);
2412        let _ = blocks.len();
2413    }
2414}