1pub mod entities;
30pub mod inline;
31pub mod tokenizer;
32
33pub use entities::decode_html_entities;
34pub use inline::{format_line, InlineElement, InlineParser};
35pub use tokenizer::{cjk_count, is_cjk, not_text, Token, Tokenizer};
36
37use regex::Regex;
38use std::sync::LazyLock;
39use streamdown_core::{BlockType, Code, ListType, ParseState};
40
41static CODE_FENCE_RE: LazyLock<Regex> =
47 LazyLock::new(|| Regex::new(r"^\s*(```+|~~~+|<pre>)\s*([^\s]*)\s*$").unwrap());
48
49static CODE_FENCE_END_RE: LazyLock<Regex> =
51 LazyLock::new(|| Regex::new(r"^\s*(```+|~~~+|</pre>)\s*$").unwrap());
52
53static SPACE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^ \s*[^\s*]").unwrap());
55
56static HEADING_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(#{1,6})\s+(.*)$").unwrap());
58
59static LIST_ITEM_RE: LazyLock<Regex> =
61 LazyLock::new(|| Regex::new(r"^(\s*)([+*-]|\+-+|\d+\.)\s+(.*)$").unwrap());
62
63static BLOCK_RE: LazyLock<Regex> =
65 LazyLock::new(|| Regex::new(r"^\s*((>\s*)+|[◁<].?think[>▷]|</?.?think[>▷]?)(.*)$").unwrap());
66
67static HR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(---+|\*\*\*+|___+)\s*$").unwrap());
69
70static TABLE_ROW_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\|(.+)\|\s*$").unwrap());
72
73static TABLE_SEP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap());
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
82pub enum ListBullet {
83 Dash,
85 Asterisk,
87 Plus,
89 PlusExpand,
91 Ordered(usize),
93}
94
95impl ListBullet {
96 pub fn parse(s: &str) -> Option<Self> {
98 let s = s.trim();
99 if s.starts_with("+") && s.len() > 1 && s.chars().skip(1).all(|c| c == '-') {
100 return Some(ListBullet::PlusExpand);
101 }
102 match s {
103 "-" => Some(ListBullet::Dash),
104 "*" => Some(ListBullet::Asterisk),
105 "+" => Some(ListBullet::Plus),
106 s if s.ends_with('.') => {
107 let num = s.trim_end_matches('.').parse().ok()?;
108 Some(ListBullet::Ordered(num))
109 }
110 _ => None,
111 }
112 }
113
114 pub fn is_ordered(&self) -> bool {
116 matches!(self, ListBullet::Ordered(_))
117 }
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum TableState {
123 Header,
125 Body,
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum ParseEvent {
132 Text(String),
134 InlineCode(String),
135 Bold(String),
136 Italic(String),
137 Underline(String),
138 Strikeout(String),
139 BoldItalic(String),
140 Link {
141 text: String,
142 url: String,
143 },
144 Image {
145 alt: String,
146 url: String,
147 },
148 Footnote(String),
149
150 Heading {
152 level: u8,
153 content: String,
154 },
155 CodeBlockStart {
156 language: Option<String>,
157 indent: usize,
158 },
159 CodeBlockLine(String),
160 CodeBlockEnd,
161 ListItem {
162 indent: usize,
163 bullet: ListBullet,
164 content: String,
165 },
166 ListEnd,
167 TableHeader(Vec<String>),
168 TableRow(Vec<String>),
169 TableSeparator,
170 TableEnd,
171 BlockquoteStart {
172 depth: usize,
173 },
174 BlockquoteLine(String),
175 BlockquoteEnd,
176 ThinkBlockStart,
177 ThinkBlockLine(String),
178 ThinkBlockEnd,
179 HorizontalRule,
180 EmptyLine,
181 Newline,
182 Prompt(String),
183 InlineElements(Vec<InlineElement>),
184}
185
186impl ParseEvent {
187 pub fn is_block(&self) -> bool {
188 !self.is_inline()
189 }
190
191 pub fn is_inline(&self) -> bool {
192 matches!(
193 self,
194 ParseEvent::Text(_)
195 | ParseEvent::InlineCode(_)
196 | ParseEvent::Bold(_)
197 | ParseEvent::Italic(_)
198 | ParseEvent::Underline(_)
199 | ParseEvent::Strikeout(_)
200 | ParseEvent::BoldItalic(_)
201 | ParseEvent::Link { .. }
202 | ParseEvent::Image { .. }
203 | ParseEvent::Footnote(_)
204 )
205 }
206}
207
208#[derive(Debug)]
214pub struct Parser {
215 state: ParseState,
216 inline_parser: InlineParser,
217 code_fence: Option<String>,
218 table_state: Option<TableState>,
219 events: Vec<ParseEvent>,
220 prev_was_empty: bool,
222}
223
224impl Default for Parser {
225 fn default() -> Self {
226 Self::new()
227 }
228}
229
230impl Parser {
231 pub fn new() -> Self {
233 Self {
234 state: ParseState::new(),
235 inline_parser: InlineParser::new(),
236 code_fence: None,
237 table_state: None,
238 events: Vec::new(),
239 prev_was_empty: false,
240 }
241 }
242
243 pub fn with_state(state: ParseState) -> Self {
245 let inline_parser = InlineParser::with_settings(state.links, state.images);
246 Self {
247 state,
248 inline_parser,
249 code_fence: None,
250 table_state: None,
251 events: Vec::new(),
252 prev_was_empty: false,
253 }
254 }
255
256 pub fn state(&self) -> &ParseState {
257 &self.state
258 }
259 pub fn state_mut(&mut self) -> &mut ParseState {
260 &mut self.state
261 }
262
263 pub fn set_process_links(&mut self, enabled: bool) {
264 self.state.links = enabled;
265 self.inline_parser.process_links = enabled;
266 }
267
268 pub fn set_process_images(&mut self, enabled: bool) {
269 self.state.images = enabled;
270 self.inline_parser.process_images = enabled;
271 }
272
273 pub fn set_code_spaces(&mut self, enabled: bool) {
275 self.state.code_spaces = enabled;
276 }
277
278 pub fn parse_line(&mut self, line: &str) -> Vec<ParseEvent> {
280 self.events.clear();
281
282 if self.state.is_in_code() {
284 self.parse_in_code_block(line);
285 return std::mem::take(&mut self.events);
286 }
287
288 if self.state.block_type == Some(BlockType::Think) {
290 self.parse_in_think_block(line);
291 return std::mem::take(&mut self.events);
292 }
293
294 if line.trim().is_empty() {
296 return self.handle_empty_line();
297 }
298
299 let was_prev_empty = self.prev_was_empty;
301 self.prev_was_empty = false;
302 self.state.last_line_empty = false;
303
304 if self.try_parse_space_code(line, was_prev_empty) {
307 return self.take_events();
308 }
309
310 let line = self.strip_first_indent(line);
312
313 if self.try_parse_code_fence(&line) {
315 return self.take_events();
316 }
317 if self.try_parse_block(&line) {
318 return self.take_events();
319 }
320 if self.try_parse_heading(&line) {
321 return self.take_events();
322 }
323 if self.try_parse_hr(&line) {
324 return self.take_events();
325 }
326 if self.try_parse_list_item(&line) {
327 return self.take_events();
328 }
329 if self.try_parse_table(&line) {
330 return self.take_events();
331 }
332
333 self.exit_block_contexts();
335
336 self.parse_inline_content(&line);
338 self.take_events()
339 }
340
341 fn take_events(&mut self) -> Vec<ParseEvent> {
342 std::mem::take(&mut self.events)
343 }
344
345 fn strip_first_indent(&mut self, line: &str) -> String {
348 if self.state.first_indent.is_none() && !line.trim().is_empty() {
351 let indent = line.chars().take_while(|c| c.is_whitespace()).count();
352 self.state.first_indent = Some(indent);
353 }
354
355 if let Some(first_indent) = self.state.first_indent {
357 if first_indent > 0 {
358 let current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
359 if current_indent >= first_indent {
360 return line.chars().skip(first_indent).collect();
362 }
363 }
364 }
365
366 line.to_string()
367 }
368
369 fn handle_empty_line(&mut self) -> Vec<ParseEvent> {
371 if self.prev_was_empty {
373 return vec![]; }
375
376 self.prev_was_empty = true;
377 self.state.last_line_empty = true;
378
379 if self.state.block_depth > 0 && self.state.block_type == Some(BlockType::Quote) {
381 while self.state.block_depth > 0 {
382 self.state.exit_block();
383 }
384 self.events.push(ParseEvent::BlockquoteEnd);
385 }
386
387 if self.state.in_list {
389 self.exit_list_context();
390 }
391
392 if self.table_state.is_some() {
394 self.table_state = None;
395 self.state.in_table = None;
396 self.events.push(ParseEvent::TableEnd);
397 }
398
399 self.events.push(ParseEvent::EmptyLine);
400 self.take_events()
401 }
402
403 fn exit_block_contexts(&mut self) {
405 if self.state.in_list {
406 self.exit_list_context();
407 }
408 if self.table_state.is_some() {
409 self.table_state = None;
410 self.state.in_table = None;
411 self.events.push(ParseEvent::TableEnd);
412 }
413 }
414
415 fn parse_in_code_block(&mut self, line: &str) {
420 if let Some(ref fence) = self.code_fence.clone() {
422 if let Some(caps) = CODE_FENCE_END_RE.captures(line) {
423 let end_fence = caps.get(1).map(|m| m.as_str()).unwrap_or("");
424 let matches = (fence.starts_with('`') && end_fence.starts_with('`'))
426 || (fence.starts_with('~') && end_fence.starts_with('~'))
427 || (fence == "<pre>" && end_fence == "</pre>");
428
429 if matches {
430 self.events.push(ParseEvent::CodeBlockEnd);
431 self.state.exit_code_block();
432 self.code_fence = None;
433 return;
434 }
435 }
436 }
437
438 if self.state.in_code == Some(Code::Spaces) {
440 let indent = line.chars().take_while(|c| c.is_whitespace()).count();
441 if indent < 4 && !line.trim().is_empty() {
442 self.events.push(ParseEvent::CodeBlockEnd);
443 self.state.exit_code_block();
444 self.parse_inline_content(line);
447 return;
448 }
449 }
450
451 let code_line = if self.state.in_code == Some(Code::Spaces) {
453 line.chars().skip(4).collect()
454 } else {
455 line.to_string()
456 };
457
458 self.events.push(ParseEvent::CodeBlockLine(code_line));
459 }
460
461 fn try_parse_code_fence(&mut self, line: &str) -> bool {
462 if let Some(caps) = CODE_FENCE_RE.captures(line) {
463 let fence = caps.get(1).map(|m| m.as_str()).unwrap_or("```");
464 let lang = caps.get(2).map(|m| m.as_str()).filter(|s| !s.is_empty());
465 let indent = line.chars().take_while(|c| c.is_whitespace()).count();
466
467 self.code_fence = Some(fence.to_string());
468 self.state.code_indent = indent;
469 self.state.enter_code_block(
470 Code::Backtick,
471 lang.map(|s| s.to_string())
472 .or_else(|| Some("text".to_string())),
473 );
474
475 self.events.push(ParseEvent::CodeBlockStart {
476 language: lang.map(|s| s.to_string()),
477 indent,
478 });
479 true
480 } else {
481 false
482 }
483 }
484
485 fn try_parse_space_code(&mut self, line: &str, was_prev_empty: bool) -> bool {
486 if !self.state.code_spaces {
488 return false;
489 }
490
491 if !was_prev_empty || self.state.in_list {
493 return false;
494 }
495
496 if SPACE_CODE_RE.is_match(line) {
497 self.state
498 .enter_code_block(Code::Spaces, Some("text".to_string()));
499 self.events.push(ParseEvent::CodeBlockStart {
500 language: Some("text".to_string()),
501 indent: 4,
502 });
503 let code_line: String = line.chars().skip(4).collect();
505 self.events.push(ParseEvent::CodeBlockLine(code_line));
506 true
507 } else {
508 false
509 }
510 }
511
512 fn parse_in_think_block(&mut self, line: &str) {
517 if line.trim() == "</think>" || line.trim() == "</think▷" || line.trim() == "◁/think▷"
519 {
520 self.events.push(ParseEvent::ThinkBlockEnd);
521 self.state.exit_block();
522 } else {
523 self.events
524 .push(ParseEvent::ThinkBlockLine(line.to_string()));
525 }
526 }
527
528 fn try_parse_block(&mut self, line: &str) -> bool {
529 if let Some(caps) = BLOCK_RE.captures(line) {
530 let marker = caps.get(1).map(|m| m.as_str()).unwrap_or("");
531 let content = caps.get(3).map(|m| m.as_str()).unwrap_or("");
532
533 if marker.contains("think") {
535 if marker.contains('/') {
536 if self.state.block_type == Some(BlockType::Think) {
538 self.events.push(ParseEvent::ThinkBlockEnd);
539 self.state.exit_block();
540 }
541 return true;
542 } else {
543 self.state.enter_block(BlockType::Think);
545 self.events.push(ParseEvent::ThinkBlockStart);
546 if !content.trim().is_empty() {
547 self.events
548 .push(ParseEvent::ThinkBlockLine(content.to_string()));
549 }
550 return true;
551 }
552 }
553
554 let depth = marker.matches('>').count();
556 if depth > 0 {
557 if self.state.block_depth != depth {
558 if depth > self.state.block_depth {
559 for _ in self.state.block_depth..depth {
560 self.state.enter_block(BlockType::Quote);
561 }
562 self.events.push(ParseEvent::BlockquoteStart { depth });
563 } else {
564 for _ in depth..self.state.block_depth {
565 self.state.exit_block();
566 }
567 }
568 }
569 self.events
570 .push(ParseEvent::BlockquoteLine(content.to_string()));
571 return true;
572 }
573 }
574
575 if self.state.block_depth > 0 && self.state.block_type == Some(BlockType::Quote) {
577 while self.state.block_depth > 0 {
578 self.state.exit_block();
579 }
580 self.events.push(ParseEvent::BlockquoteEnd);
581 }
582
583 false
584 }
585
586 fn try_parse_heading(&mut self, line: &str) -> bool {
591 if let Some(caps) = HEADING_RE.captures(line) {
592 let hashes = caps.get(1).map(|m| m.as_str()).unwrap_or("");
593 let content = caps.get(2).map(|m| m.as_str()).unwrap_or("");
594 let level = hashes.len().min(6) as u8;
595
596 self.events.push(ParseEvent::Heading {
597 level,
598 content: content.to_string(),
599 });
600 true
601 } else {
602 false
603 }
604 }
605
606 fn try_parse_hr(&mut self, line: &str) -> bool {
607 if HR_RE.is_match(line.trim()) {
608 self.events.push(ParseEvent::HorizontalRule);
609 true
610 } else {
611 false
612 }
613 }
614
615 fn try_parse_list_item(&mut self, line: &str) -> bool {
616 if let Some(caps) = LIST_ITEM_RE.captures(line) {
617 let indent_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
618 let bullet_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
619 let content = caps.get(3).map(|m| m.as_str()).unwrap_or("");
620
621 let indent = indent_str.chars().count();
623 let bullet = ListBullet::parse(bullet_str).unwrap_or(ListBullet::Dash);
624
625 self.state.list_indent_text = bullet_str.chars().count();
627
628 let list_type = if bullet.is_ordered() {
629 ListType::Ordered
630 } else {
631 ListType::Bullet
632 };
633
634 while let Some((stack_indent, _)) = self.state.list_item_stack.last() {
636 if *stack_indent > indent {
637 self.state.pop_list();
638 } else {
639 break;
640 }
641 }
642
643 let need_push = self
645 .state
646 .list_item_stack
647 .last()
648 .map(|(i, _)| indent > *i)
649 .unwrap_or(true);
650
651 if need_push {
652 self.state.push_list(indent, list_type);
653 }
654
655 let final_bullet = if let ListBullet::Ordered(_) = bullet {
657 ListBullet::Ordered(self.state.next_list_number().unwrap_or(1))
658 } else {
659 bullet
660 };
661
662 self.events.push(ParseEvent::ListItem {
663 indent,
664 bullet: final_bullet,
665 content: content.to_string(),
666 });
667 true
668 } else {
669 false
670 }
671 }
672
673 fn exit_list_context(&mut self) {
674 while self.state.in_list {
675 self.state.pop_list();
676 }
677 self.events.push(ParseEvent::ListEnd);
678 }
679
680 fn try_parse_table(&mut self, line: &str) -> bool {
681 if let Some(caps) = TABLE_ROW_RE.captures(line) {
682 let inner = caps.get(1).map(|m| m.as_str()).unwrap_or("");
683
684 if TABLE_SEP_RE.is_match(inner) && self.table_state == Some(TableState::Header) {
686 self.table_state = Some(TableState::Body);
687 self.state.in_table = Some(Code::Body);
688 self.events.push(ParseEvent::TableSeparator);
689 return true;
690 }
691
692 let cells: Vec<String> = inner.split('|').map(|s| s.trim().to_string()).collect();
693
694 match self.table_state {
695 None => {
696 self.table_state = Some(TableState::Header);
698 self.state.in_table = Some(Code::Header);
699 self.events.push(ParseEvent::TableHeader(cells));
700 }
701 Some(TableState::Header) => {
702 self.events.push(ParseEvent::TableHeader(cells));
705 }
706 Some(TableState::Body) => {
707 self.events.push(ParseEvent::TableRow(cells));
708 }
709 }
710 return true;
711 }
712
713 if self.table_state.is_some() {
715 self.table_state = None;
716 self.state.in_table = None;
717 self.events.push(ParseEvent::TableEnd);
718 }
719
720 false
721 }
722
723 fn parse_inline_content(&mut self, line: &str) {
724 let elements = self.inline_parser.parse(line);
725
726 for element in elements {
727 let event = match element {
728 InlineElement::Text(s) => ParseEvent::Text(s),
729 InlineElement::Bold(s) => ParseEvent::Bold(s),
730 InlineElement::Italic(s) => ParseEvent::Italic(s),
731 InlineElement::BoldItalic(s) => ParseEvent::BoldItalic(s),
732 InlineElement::Underline(s) => ParseEvent::Underline(s),
733 InlineElement::Strikeout(s) => ParseEvent::Strikeout(s),
734 InlineElement::Code(s) => ParseEvent::InlineCode(s),
735 InlineElement::Link { text, url } => ParseEvent::Link { text, url },
736 InlineElement::Image { alt, url } => ParseEvent::Image { alt, url },
737 InlineElement::Footnote(s) => ParseEvent::Footnote(s),
738 };
739 self.events.push(event);
740 }
741
742 self.events.push(ParseEvent::Newline);
743 }
744
745 pub fn parse_document(&mut self, content: &str) -> Vec<ParseEvent> {
747 let mut all_events = Vec::new();
748 for line in content.lines() {
749 all_events.extend(self.parse_line(line));
750 }
751 all_events.extend(self.finalize());
752 all_events
753 }
754
755 pub fn finalize(&mut self) -> Vec<ParseEvent> {
757 self.events.clear();
758
759 if self.state.is_in_code() {
760 self.events.push(ParseEvent::CodeBlockEnd);
761 self.state.exit_code_block();
762 self.code_fence = None;
763 }
764
765 if self.state.block_type == Some(BlockType::Think) {
766 self.events.push(ParseEvent::ThinkBlockEnd);
767 self.state.exit_block();
768 }
769
770 if self.state.block_depth > 0 {
771 self.events.push(ParseEvent::BlockquoteEnd);
772 while self.state.block_depth > 0 {
773 self.state.exit_block();
774 }
775 }
776
777 if self.state.in_list {
778 self.exit_list_context();
779 }
780
781 if self.table_state.is_some() {
782 self.table_state = None;
783 self.state.in_table = None;
784 self.events.push(ParseEvent::TableEnd);
785 }
786
787 self.take_events()
788 }
789
790 pub fn reset(&mut self) {
792 self.state = ParseState::new();
793 self.inline_parser.reset();
794 self.code_fence = None;
795 self.table_state = None;
796 self.events.clear();
797 self.prev_was_empty = false;
798 }
799}
800
801#[cfg(test)]
806mod tests {
807 use super::*;
808
809 #[test]
810 fn test_parse_heading() {
811 let mut parser = Parser::new();
812 let events = parser.parse_line("# Hello World");
813 assert!(events.iter().any(|e| matches!(
814 e, ParseEvent::Heading { level: 1, content } if content == "Hello World"
815 )));
816 }
817
818 #[test]
819 fn test_parse_code_block() {
820 let mut parser = Parser::new();
821 let e1 = parser.parse_line("```rust");
822 assert!(e1.iter().any(
823 |e| matches!(e, ParseEvent::CodeBlockStart { language: Some(l), .. } if l == "rust")
824 ));
825 let e2 = parser.parse_line("let x = 1;");
826 assert!(e2
827 .iter()
828 .any(|e| matches!(e, ParseEvent::CodeBlockLine(s) if s == "let x = 1;")));
829 let e3 = parser.parse_line("```");
830 assert!(e3.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
831 }
832
833 #[test]
834 fn test_parse_pre_tag() {
835 let mut parser = Parser::new();
836 let e1 = parser.parse_line("<pre>");
837 assert!(e1
838 .iter()
839 .any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
840 let e2 = parser.parse_line("code");
841 assert!(e2.iter().any(|e| matches!(e, ParseEvent::CodeBlockLine(_))));
842 let e3 = parser.parse_line("</pre>");
843 assert!(e3.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
844 }
845
846 #[test]
847 fn test_space_indented_code() {
848 let mut parser = Parser::new();
849 parser.set_code_spaces(true);
850 parser.parse_line(""); let events = parser.parse_line(" let x = 1;");
852 assert!(events
853 .iter()
854 .any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
855 assert!(events
856 .iter()
857 .any(|e| matches!(e, ParseEvent::CodeBlockLine(s) if s == "let x = 1;")));
858 }
859
860 #[test]
861 fn test_empty_line_collapsing() {
862 let mut parser = Parser::new();
863 let e1 = parser.parse_line("");
864 assert!(e1.iter().any(|e| matches!(e, ParseEvent::EmptyLine)));
865 let e2 = parser.parse_line("");
866 assert!(e2.is_empty()); let e3 = parser.parse_line("text");
868 assert!(!e3.is_empty());
869 let e4 = parser.parse_line("");
870 assert!(e4.iter().any(|e| matches!(e, ParseEvent::EmptyLine)));
871 }
872
873 #[test]
874 fn test_parse_think_block_unicode() {
875 let mut parser = Parser::new();
876 let e1 = parser.parse_line("◁think▷");
877 assert!(e1.iter().any(|e| matches!(e, ParseEvent::ThinkBlockStart)));
878 }
879
880 #[test]
881 fn test_parse_list() {
882 let mut parser = Parser::new();
883 let events = parser.parse_line("- Item one");
884 assert!(events.iter().any(|e| matches!(
885 e, ParseEvent::ListItem { bullet: ListBullet::Dash, content, .. } if content == "Item one"
886 )));
887 }
888
889 #[test]
890 fn test_parse_nested_list() {
891 let mut parser = Parser::new();
892 parser.parse_line("- Item 1");
893 let e2 = parser.parse_line(" - Nested");
894 assert!(e2
896 .iter()
897 .any(|e| matches!(e, ParseEvent::ListItem { indent: 2, .. })));
898 }
899
900 #[test]
901 fn test_parse_ordered_list_numbering() {
902 let mut parser = Parser::new();
903 parser.parse_line("1. First");
904 let e2 = parser.parse_line("2. Second");
905 assert!(e2.iter().any(|e| matches!(
907 e,
908 ParseEvent::ListItem {
909 bullet: ListBullet::Ordered(2),
910 ..
911 }
912 )));
913 }
914
915 #[test]
916 fn test_parse_blockquote() {
917 let mut parser = Parser::new();
918 let events = parser.parse_line("> Quote text");
919 assert!(events
920 .iter()
921 .any(|e| matches!(e, ParseEvent::BlockquoteLine(s) if s == "Quote text")));
922 }
923
924 #[test]
925 fn test_parse_nested_blockquote() {
926 let mut parser = Parser::new();
927 let events = parser.parse_line(">> Nested quote");
928 assert!(events
929 .iter()
930 .any(|e| matches!(e, ParseEvent::BlockquoteStart { depth: 2 })));
931 }
932
933 #[test]
934 fn test_parse_hr() {
935 let mut parser = Parser::new();
936 assert!(parser
937 .parse_line("---")
938 .iter()
939 .any(|e| matches!(e, ParseEvent::HorizontalRule)));
940 assert!(parser
941 .parse_line("***")
942 .iter()
943 .any(|e| matches!(e, ParseEvent::HorizontalRule)));
944 assert!(parser
945 .parse_line("___")
946 .iter()
947 .any(|e| matches!(e, ParseEvent::HorizontalRule)));
948 }
949
950 #[test]
951 fn test_parse_table() {
952 let mut parser = Parser::new();
953 let e1 = parser.parse_line("| A | B | C |");
954 assert!(e1.iter().any(|e| matches!(e, ParseEvent::TableHeader(_))));
955 let e2 = parser.parse_line("|---|---|---|");
956 assert!(e2.iter().any(|e| matches!(e, ParseEvent::TableSeparator)));
957 let e3 = parser.parse_line("| 1 | 2 | 3 |");
958 assert!(e3.iter().any(|e| matches!(e, ParseEvent::TableRow(_))));
959 }
960
961 #[test]
962 fn test_parse_think_block() {
963 let mut parser = Parser::new();
964 let e1 = parser.parse_line("<think>");
965 assert!(e1.iter().any(|e| matches!(e, ParseEvent::ThinkBlockStart)));
966 let e2 = parser.parse_line("Thinking...");
967 assert!(e2
968 .iter()
969 .any(|e| matches!(e, ParseEvent::ThinkBlockLine(s) if s == "Thinking...")));
970 let e3 = parser.parse_line("</think>");
971 assert!(e3.iter().any(|e| matches!(e, ParseEvent::ThinkBlockEnd)));
972 }
973
974 #[test]
975 fn test_first_indent_stripping() {
976 let mut parser = Parser::new();
977 let e1 = parser.parse_line(" # Hello");
979 assert!(e1
981 .iter()
982 .any(|e| matches!(e, ParseEvent::Heading { level: 1, content } if content == "Hello")));
983 }
984
985 #[test]
986 fn test_parse_document() {
987 let mut parser = Parser::new();
988 let doc = "# Title\n\nSome text.\n\n```\ncode\n```";
989 let events = parser.parse_document(doc);
990 assert!(events
991 .iter()
992 .any(|e| matches!(e, ParseEvent::Heading { level: 1, .. })));
993 assert!(events
994 .iter()
995 .any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
996 assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
997 }
998
999 #[test]
1000 fn test_finalize_closes_blocks() {
1001 let mut parser = Parser::new();
1002 parser.parse_line("```");
1003 parser.parse_line("code");
1004 let events = parser.finalize();
1005 assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
1006 }
1007
1008 #[test]
1009 fn test_is_block_is_inline() {
1010 assert!(ParseEvent::Heading {
1011 level: 1,
1012 content: "x".to_string()
1013 }
1014 .is_block());
1015 assert!(ParseEvent::CodeBlockStart {
1016 language: None,
1017 indent: 0
1018 }
1019 .is_block());
1020 assert!(ParseEvent::Text("x".to_string()).is_inline());
1021 assert!(ParseEvent::Bold("x".to_string()).is_inline());
1022 }
1023
1024 #[test]
1025 fn test_first_indent_stripping_multibyte_whitespace() {
1026 let mut parser = Parser::new();
1036
1037 let line1 = " # Hello";
1039 assert_eq!(line1.len() - line1.trim_start().len(), 2);
1040 let _ = parser.parse_line(line1);
1041
1042 let line2 = " World";
1044 assert!(!line2.is_char_boundary(2)); let events = parser.parse_line(line2);
1048
1049 assert!(!events.is_empty());
1051 }
1052
1053 #[test]
1054 fn test_space_indented_code_strip_with_fullwidth() {
1055 let mut parser = Parser::new();
1063 parser.set_code_spaces(true);
1064
1065 parser.parse_line("");
1067
1068 let line1 = " first line of code";
1070 let events1 = parser.parse_line(line1);
1071 assert!(events1.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
1072
1073 let line2 = " second line";
1076 assert!(!line2.is_char_boundary(4)); let events2 = parser.parse_line(line2);
1079
1080 assert!(!events2.is_empty());
1082 }
1083
1084 #[test]
1085 fn test_list_item_indent_with_fullwidth_spaces() {
1086 let mut parser = Parser::new();
1090
1091 let events1 = parser.parse_line("- top level");
1093 assert!(events1.iter().any(|e| matches!(e, ParseEvent::ListItem { indent: 0, .. })));
1094
1095 let line2 = " - nested item"; let events2 = parser.parse_line(line2);
1099
1100 let list_item = events2.iter().find(|e| matches!(e, ParseEvent::ListItem { .. }));
1102 assert!(list_item.is_some(), "Should have parsed list item");
1103
1104 if let Some(ParseEvent::ListItem { indent, .. }) = list_item {
1105 assert_eq!(
1108 *indent, 1,
1109 "Indent should be 1 (char-based), not 3 (byte-based)"
1110 );
1111 }
1112 }
1113
1114 #[test]
1115 fn test_space_indented_code_dedent_with_fullwidth() {
1116 let mut parser = Parser::new();
1120 parser.set_code_spaces(true);
1121
1122 parser.parse_line("");
1124
1125 let events1 = parser.parse_line(" code line");
1127 assert!(events1.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
1128
1129 let line2 = " not code anymore";
1132 let byte_indent = line2.len() - line2.trim_start().len();
1133 let char_indent = line2.chars().take_while(|c| c.is_whitespace()).count();
1134 assert_eq!(byte_indent, 6); assert_eq!(char_indent, 2); let events2 = parser.parse_line(line2);
1138
1139 assert!(
1141 events2.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)),
1142 "Should have exited code block with only 2-char indent"
1143 );
1144 }
1145}