streamdown_parser/
lib.rs

1//! Streamdown Parser
2//!
3//! A streaming markdown parser designed for real-time rendering of markdown
4//! content as it arrives. This is the core parsing engine for streamdown.
5//!
6//! # Overview
7//!
8//! The parser is designed to handle byte-by-byte input for streaming scenarios
9//! (like LLM output) while also working efficiently with complete documents.
10//!
11//! # Example
12//!
13//! ```
14//! use streamdown_parser::{Parser, ParseEvent};
15//!
16//! let mut parser = Parser::new();
17//!
18//! // Feed lines and get events
19//! for event in parser.parse_line("# Hello World") {
20//!     match event {
21//!         ParseEvent::Heading { level, content } => {
22//!             println!("H{}: {}", level, content);
23//!         }
24//!         _ => {}
25//!     }
26//! }
27//! ```
28
29pub mod entities;
30pub mod inline;
31pub mod tokenizer;
32
33pub use entities::decode_html_entities;
34pub use inline::{format_line, InlineElement, InlineParser};
35pub use tokenizer::{cjk_count, is_cjk, not_text, Token, Tokenizer};
36
37use regex::Regex;
38use std::sync::LazyLock;
39use streamdown_core::{BlockType, Code, ListType, ParseState};
40
41// =============================================================================
42// Regex patterns
43// =============================================================================
44
45/// Regex for code fence: ``` or ~~~ or <pre>
46static CODE_FENCE_RE: LazyLock<Regex> =
47    LazyLock::new(|| Regex::new(r"^\s*(```+|~~~+|<pre>)\s*([^\s]*)\s*$").unwrap());
48
49/// Regex for code fence end (also matches </pre>)
50static CODE_FENCE_END_RE: LazyLock<Regex> =
51    LazyLock::new(|| Regex::new(r"^\s*(```+|~~~+|</pre>)\s*$").unwrap());
52
53/// Regex for space-indented code (4+ spaces, not starting with * for lists)
54static SPACE_CODE_RE: LazyLock<Regex> =
55    LazyLock::new(|| Regex::new(r"^    \s*[^\s*]").unwrap());
56
57/// Regex for headings
58static HEADING_RE: LazyLock<Regex> =
59    LazyLock::new(|| Regex::new(r"^(#{1,6})\s+(.*)$").unwrap());
60
61/// Regex for list items: handles -, *, +, +---, and 1. style
62static LIST_ITEM_RE: LazyLock<Regex> =
63    LazyLock::new(|| Regex::new(r"^(\s*)([+*-]|\+-+|\d+\.)\s+(.*)$").unwrap());
64
65/// Regex for blockquotes and think blocks (including unicode variants)
66static BLOCK_RE: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r"^\s*((>\s*)+|[◁<].?think[>▷]|</?.?think[>▷]?)(.*)$").unwrap());
68
69/// Regex for horizontal rules
70static HR_RE: LazyLock<Regex> =
71    LazyLock::new(|| Regex::new(r"^(---+|\*\*\*+|___+)\s*$").unwrap());
72
73/// Regex for table rows
74static TABLE_ROW_RE: LazyLock<Regex> =
75    LazyLock::new(|| Regex::new(r"^\s*\|(.+)\|\s*$").unwrap());
76
77/// Regex for table separator (only contains |, -, :, spaces)
78static TABLE_SEP_RE: LazyLock<Regex> =
79    LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap());
80
81// =============================================================================
82// Types
83// =============================================================================
84
85/// List bullet type.
86#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum ListBullet {
88    /// Dash bullet: -
89    Dash,
90    /// Asterisk bullet: *
91    Asterisk,
92    /// Plus bullet: +
93    Plus,
94    /// Expandable plus: +---
95    PlusExpand,
96    /// Ordered number
97    Ordered(usize),
98}
99
100impl ListBullet {
101    /// Parse a bullet string.
102    pub fn parse(s: &str) -> Option<Self> {
103        let s = s.trim();
104        if s.starts_with("+") && s.len() > 1 && s.chars().skip(1).all(|c| c == '-') {
105            return Some(ListBullet::PlusExpand);
106        }
107        match s {
108            "-" => Some(ListBullet::Dash),
109            "*" => Some(ListBullet::Asterisk),
110            "+" => Some(ListBullet::Plus),
111            s if s.ends_with('.') => {
112                let num = s.trim_end_matches('.').parse().ok()?;
113                Some(ListBullet::Ordered(num))
114            }
115            _ => None,
116        }
117    }
118
119    /// Check if this is an ordered bullet.
120    pub fn is_ordered(&self) -> bool {
121        matches!(self, ListBullet::Ordered(_))
122    }
123}
124
125/// Table parsing state.
126#[derive(Debug, Clone, Copy, PartialEq, Eq)]
127pub enum TableState {
128    /// Parsing header row
129    Header,
130    /// Saw separator, now in body
131    Body,
132}
133
134/// Events emitted by the parser.
135#[derive(Debug, Clone, PartialEq)]
136pub enum ParseEvent {
137    // === Inline elements ===
138    Text(String),
139    InlineCode(String),
140    Bold(String),
141    Italic(String),
142    Underline(String),
143    Strikeout(String),
144    BoldItalic(String),
145    Link { text: String, url: String },
146    Image { alt: String, url: String },
147    Footnote(String),
148
149    // === Block-level elements ===
150    Heading { level: u8, content: String },
151    CodeBlockStart { language: Option<String>, indent: usize },
152    CodeBlockLine(String),
153    CodeBlockEnd,
154    ListItem { indent: usize, bullet: ListBullet, content: String },
155    ListEnd,
156    TableHeader(Vec<String>),
157    TableRow(Vec<String>),
158    TableSeparator,
159    TableEnd,
160    BlockquoteStart { depth: usize },
161    BlockquoteLine(String),
162    BlockquoteEnd,
163    ThinkBlockStart,
164    ThinkBlockLine(String),
165    ThinkBlockEnd,
166    HorizontalRule,
167    EmptyLine,
168    Newline,
169    Prompt(String),
170    InlineElements(Vec<InlineElement>),
171}
172
173impl ParseEvent {
174    pub fn is_block(&self) -> bool {
175        !self.is_inline()
176    }
177
178    pub fn is_inline(&self) -> bool {
179        matches!(
180            self,
181            ParseEvent::Text(_)
182                | ParseEvent::InlineCode(_)
183                | ParseEvent::Bold(_)
184                | ParseEvent::Italic(_)
185                | ParseEvent::Underline(_)
186                | ParseEvent::Strikeout(_)
187                | ParseEvent::BoldItalic(_)
188                | ParseEvent::Link { .. }
189                | ParseEvent::Image { .. }
190                | ParseEvent::Footnote(_)
191        )
192    }
193}
194
195// =============================================================================
196// Parser
197// =============================================================================
198
199/// Streaming markdown parser.
200#[derive(Debug)]
201pub struct Parser {
202    state: ParseState,
203    inline_parser: InlineParser,
204    code_fence: Option<String>,
205    table_state: Option<TableState>,
206    events: Vec<ParseEvent>,
207    /// Track previous empty line for collapsing
208    prev_was_empty: bool,
209}
210
211impl Default for Parser {
212    fn default() -> Self {
213        Self::new()
214    }
215}
216
217impl Parser {
218    /// Create a new parser with default settings.
219    pub fn new() -> Self {
220        Self {
221            state: ParseState::new(),
222            inline_parser: InlineParser::new(),
223            code_fence: None,
224            table_state: None,
225            events: Vec::new(),
226            prev_was_empty: false,
227        }
228    }
229
230    /// Create a parser with a custom ParseState.
231    pub fn with_state(state: ParseState) -> Self {
232        let inline_parser = InlineParser::with_settings(state.links, state.images);
233        Self {
234            state,
235            inline_parser,
236            code_fence: None,
237            table_state: None,
238            events: Vec::new(),
239            prev_was_empty: false,
240        }
241    }
242
243    pub fn state(&self) -> &ParseState { &self.state }
244    pub fn state_mut(&mut self) -> &mut ParseState { &mut self.state }
245
246    pub fn set_process_links(&mut self, enabled: bool) {
247        self.state.links = enabled;
248        self.inline_parser.process_links = enabled;
249    }
250
251    pub fn set_process_images(&mut self, enabled: bool) {
252        self.state.images = enabled;
253        self.inline_parser.process_images = enabled;
254    }
255
256    /// Enable space-indented code blocks (4 spaces = code).
257    pub fn set_code_spaces(&mut self, enabled: bool) {
258        self.state.code_spaces = enabled;
259    }
260
261    /// Parse a single line and return events.
262    pub fn parse_line(&mut self, line: &str) -> Vec<ParseEvent> {
263        self.events.clear();
264
265        // Handle code blocks first (they consume everything)
266        if self.state.is_in_code() {
267            self.parse_in_code_block(line);
268            return std::mem::take(&mut self.events);
269        }
270
271        // Handle think blocks
272        if self.state.block_type == Some(BlockType::Think) {
273            self.parse_in_think_block(line);
274            return std::mem::take(&mut self.events);
275        }
276
277        // Check for empty line (with collapsing) - BEFORE indent stripping
278        if line.trim().is_empty() {
279            return self.handle_empty_line();
280        }
281
282        // Track that previous line wasn't empty
283        let was_prev_empty = self.prev_was_empty;
284        self.prev_was_empty = false;
285        self.state.last_line_empty = false;
286
287        // Check for space-indented code BEFORE first-indent stripping
288        // (so we don't accidentally strip the 4-space indent)
289        if self.try_parse_space_code(line, was_prev_empty) { return self.take_events(); }
290
291        // Now apply first-indent stripping for other constructs
292        let line = self.strip_first_indent(line);
293
294        // Try block-level constructs in order
295        if self.try_parse_code_fence(&line) { return self.take_events(); }
296        if self.try_parse_block(&line) { return self.take_events(); }
297        if self.try_parse_heading(&line) { return self.take_events(); }
298        if self.try_parse_hr(&line) { return self.take_events(); }
299        if self.try_parse_list_item(&line) { return self.take_events(); }
300        if self.try_parse_table(&line) { return self.take_events(); }
301
302        // Exit special contexts for plain text
303        self.exit_block_contexts();
304
305        // Parse as inline content
306        self.parse_inline_content(&line);
307        self.take_events()
308    }
309
310    fn take_events(&mut self) -> Vec<ParseEvent> {
311        std::mem::take(&mut self.events)
312    }
313
314    /// Strip first-indent from line if configured.
315    /// This handles markdown that's indented in the input stream.
316    fn strip_first_indent(&mut self, line: &str) -> String {
317        // Set first_indent from the very first non-empty line
318        if self.state.first_indent.is_none() && !line.trim().is_empty() {
319            let indent = line.len() - line.trim_start().len();
320            self.state.first_indent = Some(indent);
321        }
322
323        // Only strip if first_indent is > 0
324        if let Some(first_indent) = self.state.first_indent {
325            if first_indent > 0 {
326                let current_indent = line.len() - line.trim_start().len();
327                if current_indent >= first_indent {
328                    return line[first_indent..].to_string();
329                }
330            }
331        }
332
333        line.to_string()
334    }
335
336    /// Handle empty line with collapsing.
337    fn handle_empty_line(&mut self) -> Vec<ParseEvent> {
338        // Collapse consecutive empty lines
339        if self.prev_was_empty {
340            return vec![]; // Skip this empty line
341        }
342
343        self.prev_was_empty = true;
344        self.state.last_line_empty = true;
345
346        // End blockquote if in one
347        if self.state.block_depth > 0 && self.state.block_type == Some(BlockType::Quote) {
348            while self.state.block_depth > 0 {
349                self.state.exit_block();
350            }
351            self.events.push(ParseEvent::BlockquoteEnd);
352        }
353
354        // End list if in one
355        if self.state.in_list {
356            self.exit_list_context();
357        }
358
359        // End table if in one
360        if self.table_state.is_some() {
361            self.table_state = None;
362            self.state.in_table = None;
363            self.events.push(ParseEvent::TableEnd);
364        }
365
366        self.events.push(ParseEvent::EmptyLine);
367        self.take_events()
368    }
369
370    /// Exit block contexts when encountering plain text.
371    fn exit_block_contexts(&mut self) {
372        if self.state.in_list {
373            self.exit_list_context();
374        }
375        if self.table_state.is_some() {
376            self.table_state = None;
377            self.state.in_table = None;
378            self.events.push(ParseEvent::TableEnd);
379        }
380    }
381
382    // =========================================================================
383    // Code block parsing
384    // =========================================================================
385
386    fn parse_in_code_block(&mut self, line: &str) {
387        // Check for closing fence
388        if let Some(ref fence) = self.code_fence.clone() {
389            if let Some(caps) = CODE_FENCE_END_RE.captures(line) {
390                let end_fence = caps.get(1).map(|m| m.as_str()).unwrap_or("");
391                // Match fence type: ``` with ```, </pre> with <pre>
392                let matches = (fence.starts_with('`') && end_fence.starts_with('`'))
393                    || (fence.starts_with('~') && end_fence.starts_with('~'))
394                    || (fence == "<pre>" && end_fence == "</pre>");
395
396                if matches {
397                    self.events.push(ParseEvent::CodeBlockEnd);
398                    self.state.exit_code_block();
399                    self.code_fence = None;
400                    return;
401                }
402            }
403        }
404
405        // For space-indented code, check if we've dedented
406        if self.state.in_code == Some(Code::Spaces) {
407            let indent = line.len() - line.trim_start().len();
408            if indent < 4 && !line.trim().is_empty() {
409                self.events.push(ParseEvent::CodeBlockEnd);
410                self.state.exit_code_block();
411                // Re-parse this line - need to do it after we return
412                // For now, just parse inline content
413                self.parse_inline_content(line);
414                return;
415            }
416        }
417
418        // Emit code line (strip indent for space-indented code)
419        let code_line = if self.state.in_code == Some(Code::Spaces) {
420            if line.len() >= 4 {
421                line[4..].to_string()
422            } else {
423                line.to_string()
424            }
425        } else {
426            line.to_string()
427        };
428
429        self.events.push(ParseEvent::CodeBlockLine(code_line));
430    }
431
432    fn try_parse_code_fence(&mut self, line: &str) -> bool {
433        if let Some(caps) = CODE_FENCE_RE.captures(line) {
434            let fence = caps.get(1).map(|m| m.as_str()).unwrap_or("```");
435            let lang = caps.get(2).map(|m| m.as_str()).filter(|s| !s.is_empty());
436            let indent = line.len() - line.trim_start().len();
437
438            self.code_fence = Some(fence.to_string());
439            self.state.code_indent = indent;
440            self.state.enter_code_block(
441                Code::Backtick,
442                lang.map(|s| s.to_string()).or_else(|| Some("text".to_string())),
443            );
444
445            self.events.push(ParseEvent::CodeBlockStart {
446                language: lang.map(|s| s.to_string()),
447                indent,
448            });
449            true
450        } else {
451            false
452        }
453    }
454
455    fn try_parse_space_code(&mut self, line: &str, was_prev_empty: bool) -> bool {
456        // Space-indented code only when CodeSpaces is enabled
457        if !self.state.code_spaces {
458            return false;
459        }
460
461        // Only after empty line, and not in a list
462        if !was_prev_empty || self.state.in_list {
463            return false;
464        }
465
466        if SPACE_CODE_RE.is_match(line) {
467            self.state.enter_code_block(Code::Spaces, Some("text".to_string()));
468            self.events.push(ParseEvent::CodeBlockStart {
469                language: Some("text".to_string()),
470                indent: 4,
471            });
472            // Also emit the first line
473            let code_line = if line.len() >= 4 { &line[4..] } else { line };
474            self.events.push(ParseEvent::CodeBlockLine(code_line.to_string()));
475            true
476        } else {
477            false
478        }
479    }
480
481    // =========================================================================
482    // Think/blockquote parsing
483    // =========================================================================
484
485    fn parse_in_think_block(&mut self, line: &str) {
486        // Check for end of think block (various formats)
487        if line.trim() == "</think>" || line.trim() == "</think▷" || line.trim() == "◁/think▷" {
488            self.events.push(ParseEvent::ThinkBlockEnd);
489            self.state.exit_block();
490        } else {
491            self.events.push(ParseEvent::ThinkBlockLine(line.to_string()));
492        }
493    }
494
495    fn try_parse_block(&mut self, line: &str) -> bool {
496        if let Some(caps) = BLOCK_RE.captures(line) {
497            let marker = caps.get(1).map(|m| m.as_str()).unwrap_or("");
498            let content = caps.get(3).map(|m| m.as_str()).unwrap_or("");
499
500            // Check for think block variants
501            if marker.contains("think") {
502                if marker.contains('/') {
503                    // End of think block
504                    if self.state.block_type == Some(BlockType::Think) {
505                        self.events.push(ParseEvent::ThinkBlockEnd);
506                        self.state.exit_block();
507                    }
508                    return true;
509                } else {
510                    // Start of think block
511                    self.state.enter_block(BlockType::Think);
512                    self.events.push(ParseEvent::ThinkBlockStart);
513                    if !content.trim().is_empty() {
514                        self.events.push(ParseEvent::ThinkBlockLine(content.to_string()));
515                    }
516                    return true;
517                }
518            }
519
520            // Regular blockquote
521            let depth = marker.matches('>').count();
522            if depth > 0 {
523                if self.state.block_depth != depth {
524                    if depth > self.state.block_depth {
525                        for _ in self.state.block_depth..depth {
526                            self.state.enter_block(BlockType::Quote);
527                        }
528                        self.events.push(ParseEvent::BlockquoteStart { depth });
529                    } else {
530                        for _ in depth..self.state.block_depth {
531                            self.state.exit_block();
532                        }
533                    }
534                }
535                self.events.push(ParseEvent::BlockquoteLine(content.to_string()));
536                return true;
537            }
538        }
539
540        // End blockquote if we were in one and this line doesn't continue it
541        if self.state.block_depth > 0 && self.state.block_type == Some(BlockType::Quote) {
542            while self.state.block_depth > 0 {
543                self.state.exit_block();
544            }
545            self.events.push(ParseEvent::BlockquoteEnd);
546        }
547
548        false
549    }
550
551    // =========================================================================
552    // Other block parsing
553    // =========================================================================
554
555    fn try_parse_heading(&mut self, line: &str) -> bool {
556        if let Some(caps) = HEADING_RE.captures(line) {
557            let hashes = caps.get(1).map(|m| m.as_str()).unwrap_or("");
558            let content = caps.get(2).map(|m| m.as_str()).unwrap_or("");
559            let level = hashes.len().min(6) as u8;
560
561            self.events.push(ParseEvent::Heading {
562                level,
563                content: content.to_string(),
564            });
565            true
566        } else {
567            false
568        }
569    }
570
571    fn try_parse_hr(&mut self, line: &str) -> bool {
572        if HR_RE.is_match(line.trim()) {
573            self.events.push(ParseEvent::HorizontalRule);
574            true
575        } else {
576            false
577        }
578    }
579
580    fn try_parse_list_item(&mut self, line: &str) -> bool {
581        if let Some(caps) = LIST_ITEM_RE.captures(line) {
582            let indent_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
583            let bullet_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
584            let content = caps.get(3).map(|m| m.as_str()).unwrap_or("");
585
586            let indent = indent_str.len();
587            let bullet = ListBullet::parse(bullet_str).unwrap_or(ListBullet::Dash);
588
589            // Update list_indent_text (width of bullet + space)
590            self.state.list_indent_text = bullet_str.len();
591
592            let list_type = if bullet.is_ordered() {
593                ListType::Ordered
594            } else {
595                ListType::Bullet
596            };
597
598            // Pop items with greater or equal indent (for same-level items)
599            while let Some((stack_indent, _)) = self.state.list_item_stack.last() {
600                if *stack_indent > indent {
601                    self.state.pop_list();
602                } else {
603                    break;
604                }
605            }
606
607            // Push new level if indented further than current, or if stack is empty
608            let need_push = self.state.list_item_stack.last()
609                .map(|(i, _)| indent > *i)
610                .unwrap_or(true);
611
612            if need_push {
613                self.state.push_list(indent, list_type);
614            }
615
616            // For ordered lists, get the next number
617            let final_bullet = if let ListBullet::Ordered(_) = bullet {
618                ListBullet::Ordered(self.state.next_list_number().unwrap_or(1))
619            } else {
620                bullet
621            };
622
623            self.events.push(ParseEvent::ListItem {
624                indent,
625                bullet: final_bullet,
626                content: content.to_string(),
627            });
628            true
629        } else {
630            false
631        }
632    }
633
634    fn exit_list_context(&mut self) {
635        while self.state.in_list {
636            self.state.pop_list();
637        }
638        self.events.push(ParseEvent::ListEnd);
639    }
640
641    fn try_parse_table(&mut self, line: &str) -> bool {
642        if let Some(caps) = TABLE_ROW_RE.captures(line) {
643            let inner = caps.get(1).map(|m| m.as_str()).unwrap_or("");
644
645            // Check if this is a separator row
646            if TABLE_SEP_RE.is_match(inner) {
647                if self.table_state == Some(TableState::Header) {
648                    self.table_state = Some(TableState::Body);
649                    self.state.in_table = Some(Code::Body);
650                    self.events.push(ParseEvent::TableSeparator);
651                    return true;
652                }
653            }
654
655            let cells: Vec<String> = inner.split('|').map(|s| s.trim().to_string()).collect();
656
657            match self.table_state {
658                None => {
659                    // First row is header
660                    self.table_state = Some(TableState::Header);
661                    self.state.in_table = Some(Code::Header);
662                    self.events.push(ParseEvent::TableHeader(cells));
663                }
664                Some(TableState::Header) => {
665                    // If we see another row before separator, it's still header
666                    // (some tables have multi-line headers)
667                    self.events.push(ParseEvent::TableHeader(cells));
668                }
669                Some(TableState::Body) => {
670                    self.events.push(ParseEvent::TableRow(cells));
671                }
672            }
673            return true;
674        }
675
676        // End table if we were in one
677        if self.table_state.is_some() {
678            self.table_state = None;
679            self.state.in_table = None;
680            self.events.push(ParseEvent::TableEnd);
681        }
682
683        false
684    }
685
686    fn parse_inline_content(&mut self, line: &str) {
687        let elements = self.inline_parser.parse(line);
688
689        for element in elements {
690            let event = match element {
691                InlineElement::Text(s) => ParseEvent::Text(s),
692                InlineElement::Bold(s) => ParseEvent::Bold(s),
693                InlineElement::Italic(s) => ParseEvent::Italic(s),
694                InlineElement::BoldItalic(s) => ParseEvent::BoldItalic(s),
695                InlineElement::Underline(s) => ParseEvent::Underline(s),
696                InlineElement::Strikeout(s) => ParseEvent::Strikeout(s),
697                InlineElement::Code(s) => ParseEvent::InlineCode(s),
698                InlineElement::Link { text, url } => ParseEvent::Link { text, url },
699                InlineElement::Image { alt, url } => ParseEvent::Image { alt, url },
700                InlineElement::Footnote(s) => ParseEvent::Footnote(s),
701            };
702            self.events.push(event);
703        }
704
705        self.events.push(ParseEvent::Newline);
706    }
707
708    /// Parse a complete document.
709    pub fn parse_document(&mut self, content: &str) -> Vec<ParseEvent> {
710        let mut all_events = Vec::new();
711        for line in content.lines() {
712            all_events.extend(self.parse_line(line));
713        }
714        all_events.extend(self.finalize());
715        all_events
716    }
717
718    /// Finalize parsing, closing any open blocks.
719    pub fn finalize(&mut self) -> Vec<ParseEvent> {
720        self.events.clear();
721
722        if self.state.is_in_code() {
723            self.events.push(ParseEvent::CodeBlockEnd);
724            self.state.exit_code_block();
725            self.code_fence = None;
726        }
727
728        if self.state.block_type == Some(BlockType::Think) {
729            self.events.push(ParseEvent::ThinkBlockEnd);
730            self.state.exit_block();
731        }
732
733        if self.state.block_depth > 0 {
734            self.events.push(ParseEvent::BlockquoteEnd);
735            while self.state.block_depth > 0 {
736                self.state.exit_block();
737            }
738        }
739
740        if self.state.in_list {
741            self.exit_list_context();
742        }
743
744        if self.table_state.is_some() {
745            self.table_state = None;
746            self.state.in_table = None;
747            self.events.push(ParseEvent::TableEnd);
748        }
749
750        self.take_events()
751    }
752
753    /// Reset the parser to initial state.
754    pub fn reset(&mut self) {
755        self.state = ParseState::new();
756        self.inline_parser.reset();
757        self.code_fence = None;
758        self.table_state = None;
759        self.events.clear();
760        self.prev_was_empty = false;
761    }
762}
763
764// =============================================================================
765// Tests
766// =============================================================================
767
768#[cfg(test)]
769mod tests {
770    use super::*;
771
772    #[test]
773    fn test_parse_heading() {
774        let mut parser = Parser::new();
775        let events = parser.parse_line("# Hello World");
776        assert!(events.iter().any(|e| matches!(
777            e, ParseEvent::Heading { level: 1, content } if content == "Hello World"
778        )));
779    }
780
781    #[test]
782    fn test_parse_code_block() {
783        let mut parser = Parser::new();
784        let e1 = parser.parse_line("```rust");
785        assert!(e1.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { language: Some(l), .. } if l == "rust")));
786        let e2 = parser.parse_line("let x = 1;");
787        assert!(e2.iter().any(|e| matches!(e, ParseEvent::CodeBlockLine(s) if s == "let x = 1;")));
788        let e3 = parser.parse_line("```");
789        assert!(e3.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
790    }
791
792    #[test]
793    fn test_parse_pre_tag() {
794        let mut parser = Parser::new();
795        let e1 = parser.parse_line("<pre>");
796        assert!(e1.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
797        let e2 = parser.parse_line("code");
798        assert!(e2.iter().any(|e| matches!(e, ParseEvent::CodeBlockLine(_))));
799        let e3 = parser.parse_line("</pre>");
800        assert!(e3.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
801    }
802
803    #[test]
804    fn test_space_indented_code() {
805        let mut parser = Parser::new();
806        parser.set_code_spaces(true);
807        parser.parse_line(""); // Empty line first
808        let events = parser.parse_line("    let x = 1;");
809        assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
810        assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockLine(s) if s == "let x = 1;")));
811    }
812
813    #[test]
814    fn test_empty_line_collapsing() {
815        let mut parser = Parser::new();
816        let e1 = parser.parse_line("");
817        assert!(e1.iter().any(|e| matches!(e, ParseEvent::EmptyLine)));
818        let e2 = parser.parse_line("");
819        assert!(e2.is_empty()); // Collapsed
820        let e3 = parser.parse_line("text");
821        assert!(!e3.is_empty());
822        let e4 = parser.parse_line("");
823        assert!(e4.iter().any(|e| matches!(e, ParseEvent::EmptyLine)));
824    }
825
826    #[test]
827    fn test_parse_think_block_unicode() {
828        let mut parser = Parser::new();
829        let e1 = parser.parse_line("◁think▷");
830        assert!(e1.iter().any(|e| matches!(e, ParseEvent::ThinkBlockStart)));
831    }
832
833    #[test]
834    fn test_parse_list() {
835        let mut parser = Parser::new();
836        let events = parser.parse_line("- Item one");
837        assert!(events.iter().any(|e| matches!(
838            e, ParseEvent::ListItem { bullet: ListBullet::Dash, content, .. } if content == "Item one"
839        )));
840    }
841
842    #[test]
843    fn test_parse_nested_list() {
844        let mut parser = Parser::new();
845        parser.parse_line("- Item 1");
846        let e2 = parser.parse_line("  - Nested");
847        // Nested item should have indent 2
848        assert!(e2.iter().any(|e| matches!(
849            e, ParseEvent::ListItem { indent: 2, .. }
850        )));
851    }
852
853    #[test]
854    fn test_parse_ordered_list_numbering() {
855        let mut parser = Parser::new();
856        parser.parse_line("1. First");
857        let e2 = parser.parse_line("2. Second");
858        // Should auto-number
859        assert!(e2.iter().any(|e| matches!(
860            e, ParseEvent::ListItem { bullet: ListBullet::Ordered(2), .. }
861        )));
862    }
863
864    #[test]
865    fn test_parse_blockquote() {
866        let mut parser = Parser::new();
867        let events = parser.parse_line("> Quote text");
868        assert!(events.iter().any(|e| matches!(e, ParseEvent::BlockquoteLine(s) if s == "Quote text")));
869    }
870
871    #[test]
872    fn test_parse_nested_blockquote() {
873        let mut parser = Parser::new();
874        let events = parser.parse_line(">> Nested quote");
875        assert!(events.iter().any(|e| matches!(e, ParseEvent::BlockquoteStart { depth: 2 })));
876    }
877
878    #[test]
879    fn test_parse_hr() {
880        let mut parser = Parser::new();
881        assert!(parser.parse_line("---").iter().any(|e| matches!(e, ParseEvent::HorizontalRule)));
882        assert!(parser.parse_line("***").iter().any(|e| matches!(e, ParseEvent::HorizontalRule)));
883        assert!(parser.parse_line("___").iter().any(|e| matches!(e, ParseEvent::HorizontalRule)));
884    }
885
886    #[test]
887    fn test_parse_table() {
888        let mut parser = Parser::new();
889        let e1 = parser.parse_line("| A | B | C |");
890        assert!(e1.iter().any(|e| matches!(e, ParseEvent::TableHeader(_))));
891        let e2 = parser.parse_line("|---|---|---|");
892        assert!(e2.iter().any(|e| matches!(e, ParseEvent::TableSeparator)));
893        let e3 = parser.parse_line("| 1 | 2 | 3 |");
894        assert!(e3.iter().any(|e| matches!(e, ParseEvent::TableRow(_))));
895    }
896
897    #[test]
898    fn test_parse_think_block() {
899        let mut parser = Parser::new();
900        let e1 = parser.parse_line("<think>");
901        assert!(e1.iter().any(|e| matches!(e, ParseEvent::ThinkBlockStart)));
902        let e2 = parser.parse_line("Thinking...");
903        assert!(e2.iter().any(|e| matches!(e, ParseEvent::ThinkBlockLine(s) if s == "Thinking...")));
904        let e3 = parser.parse_line("</think>");
905        assert!(e3.iter().any(|e| matches!(e, ParseEvent::ThinkBlockEnd)));
906    }
907
908    #[test]
909    fn test_first_indent_stripping() {
910        let mut parser = Parser::new();
911        // First line has 4 spaces indent
912        let e1 = parser.parse_line("    # Hello");
913        // Should strip the 4 spaces and parse as heading
914        assert!(e1.iter().any(|e| matches!(e, ParseEvent::Heading { level: 1, content } if content == "Hello")));
915    }
916
917    #[test]
918    fn test_parse_document() {
919        let mut parser = Parser::new();
920        let doc = "# Title\n\nSome text.\n\n```\ncode\n```";
921        let events = parser.parse_document(doc);
922        assert!(events.iter().any(|e| matches!(e, ParseEvent::Heading { level: 1, .. })));
923        assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
924        assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
925    }
926
927    #[test]
928    fn test_finalize_closes_blocks() {
929        let mut parser = Parser::new();
930        parser.parse_line("```");
931        parser.parse_line("code");
932        let events = parser.finalize();
933        assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
934    }
935
936    #[test]
937    fn test_is_block_is_inline() {
938        assert!(ParseEvent::Heading { level: 1, content: "x".to_string() }.is_block());
939        assert!(ParseEvent::CodeBlockStart { language: None, indent: 0 }.is_block());
940        assert!(ParseEvent::Text("x".to_string()).is_inline());
941        assert!(ParseEvent::Bold("x".to_string()).is_inline());
942    }
943}