pub mod entities;
pub mod inline;
pub mod tokenizer;
pub use entities::decode_html_entities;
pub use inline::{format_line, InlineElement, InlineParser};
pub use tokenizer::{cjk_count, is_cjk, not_text, Token, Tokenizer};
use regex::Regex;
use std::sync::LazyLock;
use streamdown_core::{BlockType, Code, ListType, ParseState};
static CODE_FENCE_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*(```+|~~~+|<pre>)\s*([^\s]*)\s*$").unwrap());
static CODE_FENCE_END_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*(```+|~~~+|</pre>)\s*$").unwrap());
static SPACE_CODE_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^ \s*[^\s*]").unwrap());
static HEADING_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(#{1,6})\s+(.*)$").unwrap());
static LIST_ITEM_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^(\s*)([+*-]|\+-+|\d+\.)\s+(.*)$").unwrap());
static BLOCK_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^\s*((>\s*)+|[◁<].?think[>▷]|</?.?think[>▷]?)(.*)$").unwrap());
static HR_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(---+|\*\*\*+|___+)\s*$").unwrap());
static TABLE_ROW_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\s*\|(.+)\|\s*$").unwrap());
static TABLE_SEP_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^[\s|:-]+$").unwrap());
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ListBullet {
Dash,
Asterisk,
Plus,
PlusExpand,
Ordered(usize),
}
impl ListBullet {
pub fn parse(s: &str) -> Option<Self> {
let s = s.trim();
if s.starts_with("+") && s.len() > 1 && s.chars().skip(1).all(|c| c == '-') {
return Some(ListBullet::PlusExpand);
}
match s {
"-" => Some(ListBullet::Dash),
"*" => Some(ListBullet::Asterisk),
"+" => Some(ListBullet::Plus),
s if s.ends_with('.') => {
let num = s.trim_end_matches('.').parse().ok()?;
Some(ListBullet::Ordered(num))
}
_ => None,
}
}
pub fn is_ordered(&self) -> bool {
matches!(self, ListBullet::Ordered(_))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TableState {
Header,
Body,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ParseEvent {
Text(String),
InlineCode(String),
Bold(String),
Italic(String),
Underline(String),
Strikeout(String),
BoldItalic(String),
Link {
text: String,
url: String,
},
Image {
alt: String,
url: String,
},
Footnote(String),
Heading {
level: u8,
content: String,
},
CodeBlockStart {
language: Option<String>,
indent: usize,
},
CodeBlockLine(String),
CodeBlockEnd,
ListItem {
indent: usize,
bullet: ListBullet,
content: String,
},
ListEnd,
TableHeader(Vec<String>),
TableRow(Vec<String>),
TableSeparator,
TableEnd,
BlockquoteStart {
depth: usize,
},
BlockquoteLine(String),
BlockquoteEnd,
ThinkBlockStart,
ThinkBlockLine(String),
ThinkBlockEnd,
HorizontalRule,
EmptyLine,
Newline,
Prompt(String),
InlineElements(Vec<InlineElement>),
}
impl ParseEvent {
pub fn is_block(&self) -> bool {
!self.is_inline()
}
pub fn is_inline(&self) -> bool {
matches!(
self,
ParseEvent::Text(_)
| ParseEvent::InlineCode(_)
| ParseEvent::Bold(_)
| ParseEvent::Italic(_)
| ParseEvent::Underline(_)
| ParseEvent::Strikeout(_)
| ParseEvent::BoldItalic(_)
| ParseEvent::Link { .. }
| ParseEvent::Image { .. }
| ParseEvent::Footnote(_)
)
}
}
#[derive(Debug)]
pub struct Parser {
state: ParseState,
inline_parser: InlineParser,
code_fence: Option<String>,
table_state: Option<TableState>,
events: Vec<ParseEvent>,
prev_was_empty: bool,
}
impl Default for Parser {
fn default() -> Self {
Self::new()
}
}
impl Parser {
pub fn new() -> Self {
Self {
state: ParseState::new(),
inline_parser: InlineParser::new(),
code_fence: None,
table_state: None,
events: Vec::new(),
prev_was_empty: false,
}
}
pub fn with_state(state: ParseState) -> Self {
let inline_parser = InlineParser::with_settings(state.links, state.images);
Self {
state,
inline_parser,
code_fence: None,
table_state: None,
events: Vec::new(),
prev_was_empty: false,
}
}
pub fn state(&self) -> &ParseState {
&self.state
}
pub fn state_mut(&mut self) -> &mut ParseState {
&mut self.state
}
pub fn set_process_links(&mut self, enabled: bool) {
self.state.links = enabled;
self.inline_parser.process_links = enabled;
}
pub fn set_process_images(&mut self, enabled: bool) {
self.state.images = enabled;
self.inline_parser.process_images = enabled;
}
pub fn set_code_spaces(&mut self, enabled: bool) {
self.state.code_spaces = enabled;
}
pub fn parse_line(&mut self, line: &str) -> Vec<ParseEvent> {
self.events.clear();
if self.state.is_in_code() {
self.parse_in_code_block(line);
return std::mem::take(&mut self.events);
}
if self.state.block_type == Some(BlockType::Think) {
self.parse_in_think_block(line);
return std::mem::take(&mut self.events);
}
if line.trim().is_empty() {
return self.handle_empty_line();
}
let was_prev_empty = self.prev_was_empty;
self.prev_was_empty = false;
self.state.last_line_empty = false;
if self.try_parse_space_code(line, was_prev_empty) {
return self.take_events();
}
let line = self.strip_first_indent(line);
if self.try_parse_code_fence(&line) {
return self.take_events();
}
if self.try_parse_block(&line) {
return self.take_events();
}
if self.try_parse_heading(&line) {
return self.take_events();
}
if self.try_parse_hr(&line) {
return self.take_events();
}
if self.try_parse_list_item(&line) {
return self.take_events();
}
if self.try_parse_table(&line) {
return self.take_events();
}
self.exit_block_contexts();
self.parse_inline_content(&line);
self.take_events()
}
fn take_events(&mut self) -> Vec<ParseEvent> {
std::mem::take(&mut self.events)
}
fn strip_first_indent(&mut self, line: &str) -> String {
if self.state.first_indent.is_none() && !line.trim().is_empty() {
let indent = line.chars().take_while(|c| c.is_whitespace()).count();
self.state.first_indent = Some(indent);
}
if let Some(first_indent) = self.state.first_indent {
if first_indent > 0 {
let current_indent = line.chars().take_while(|c| c.is_whitespace()).count();
if current_indent >= first_indent {
return line.chars().skip(first_indent).collect();
}
}
}
line.to_string()
}
fn handle_empty_line(&mut self) -> Vec<ParseEvent> {
if self.prev_was_empty {
return vec![]; }
self.prev_was_empty = true;
self.state.last_line_empty = true;
if self.state.block_depth > 0 && self.state.block_type == Some(BlockType::Quote) {
while self.state.block_depth > 0 {
self.state.exit_block();
}
self.events.push(ParseEvent::BlockquoteEnd);
}
if self.state.in_list {
self.exit_list_context();
}
if self.table_state.is_some() {
self.table_state = None;
self.state.in_table = None;
self.events.push(ParseEvent::TableEnd);
}
self.events.push(ParseEvent::EmptyLine);
self.take_events()
}
fn exit_block_contexts(&mut self) {
if self.state.in_list {
self.exit_list_context();
}
if self.table_state.is_some() {
self.table_state = None;
self.state.in_table = None;
self.events.push(ParseEvent::TableEnd);
}
}
fn parse_in_code_block(&mut self, line: &str) {
if let Some(ref fence) = self.code_fence.clone() {
if let Some(caps) = CODE_FENCE_END_RE.captures(line) {
let end_fence = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let matches = (fence.starts_with('`') && end_fence.starts_with('`'))
|| (fence.starts_with('~') && end_fence.starts_with('~'))
|| (fence == "<pre>" && end_fence == "</pre>");
if matches {
self.events.push(ParseEvent::CodeBlockEnd);
self.state.exit_code_block();
self.code_fence = None;
return;
}
}
}
if self.state.in_code == Some(Code::Spaces) {
let indent = line.chars().take_while(|c| c.is_whitespace()).count();
if indent < 4 && !line.trim().is_empty() {
self.events.push(ParseEvent::CodeBlockEnd);
self.state.exit_code_block();
self.parse_inline_content(line);
return;
}
}
let code_line = if self.state.in_code == Some(Code::Spaces) {
line.chars().skip(4).collect()
} else {
line.to_string()
};
self.events.push(ParseEvent::CodeBlockLine(code_line));
}
fn try_parse_code_fence(&mut self, line: &str) -> bool {
if let Some(caps) = CODE_FENCE_RE.captures(line) {
let fence = caps.get(1).map(|m| m.as_str()).unwrap_or("```");
let lang = caps.get(2).map(|m| m.as_str()).filter(|s| !s.is_empty());
let indent = line.chars().take_while(|c| c.is_whitespace()).count();
self.code_fence = Some(fence.to_string());
self.state.code_indent = indent;
self.state.enter_code_block(
Code::Backtick,
lang.map(|s| s.to_string())
.or_else(|| Some("text".to_string())),
);
self.events.push(ParseEvent::CodeBlockStart {
language: lang.map(|s| s.to_string()),
indent,
});
true
} else {
false
}
}
fn try_parse_space_code(&mut self, line: &str, was_prev_empty: bool) -> bool {
if !self.state.code_spaces {
return false;
}
if !was_prev_empty || self.state.in_list {
return false;
}
if SPACE_CODE_RE.is_match(line) {
self.state
.enter_code_block(Code::Spaces, Some("text".to_string()));
self.events.push(ParseEvent::CodeBlockStart {
language: Some("text".to_string()),
indent: 4,
});
let code_line: String = line.chars().skip(4).collect();
self.events.push(ParseEvent::CodeBlockLine(code_line));
true
} else {
false
}
}
fn parse_in_think_block(&mut self, line: &str) {
if line.trim() == "</think>" || line.trim() == "</think▷" || line.trim() == "◁/think▷"
{
self.events.push(ParseEvent::ThinkBlockEnd);
self.state.exit_block();
} else {
self.events
.push(ParseEvent::ThinkBlockLine(line.to_string()));
}
}
fn try_parse_block(&mut self, line: &str) -> bool {
if let Some(caps) = BLOCK_RE.captures(line) {
let marker = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let content = caps.get(3).map(|m| m.as_str()).unwrap_or("");
if marker.contains("think") {
if marker.contains('/') {
if self.state.block_type == Some(BlockType::Think) {
self.events.push(ParseEvent::ThinkBlockEnd);
self.state.exit_block();
}
return true;
} else {
self.state.enter_block(BlockType::Think);
self.events.push(ParseEvent::ThinkBlockStart);
if !content.trim().is_empty() {
self.events
.push(ParseEvent::ThinkBlockLine(content.to_string()));
}
return true;
}
}
let depth = marker.matches('>').count();
if depth > 0 {
if self.state.block_depth != depth {
if depth > self.state.block_depth {
for _ in self.state.block_depth..depth {
self.state.enter_block(BlockType::Quote);
}
self.events.push(ParseEvent::BlockquoteStart { depth });
} else {
for _ in depth..self.state.block_depth {
self.state.exit_block();
}
}
}
self.events
.push(ParseEvent::BlockquoteLine(content.to_string()));
return true;
}
}
if self.state.block_depth > 0 && self.state.block_type == Some(BlockType::Quote) {
while self.state.block_depth > 0 {
self.state.exit_block();
}
self.events.push(ParseEvent::BlockquoteEnd);
}
false
}
fn try_parse_heading(&mut self, line: &str) -> bool {
if let Some(caps) = HEADING_RE.captures(line) {
let hashes = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let content = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let level = hashes.len().min(6) as u8;
self.events.push(ParseEvent::Heading {
level,
content: content.to_string(),
});
true
} else {
false
}
}
fn try_parse_hr(&mut self, line: &str) -> bool {
if HR_RE.is_match(line.trim()) {
self.events.push(ParseEvent::HorizontalRule);
true
} else {
false
}
}
fn try_parse_list_item(&mut self, line: &str) -> bool {
if let Some(caps) = LIST_ITEM_RE.captures(line) {
let indent_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
let bullet_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
let content = caps.get(3).map(|m| m.as_str()).unwrap_or("");
let indent = indent_str.chars().count();
let bullet = ListBullet::parse(bullet_str).unwrap_or(ListBullet::Dash);
self.state.list_indent_text = bullet_str.chars().count();
let list_type = if bullet.is_ordered() {
ListType::Ordered
} else {
ListType::Bullet
};
while let Some((stack_indent, _)) = self.state.list_item_stack.last() {
if *stack_indent > indent {
self.state.pop_list();
} else {
break;
}
}
let need_push = self
.state
.list_item_stack
.last()
.map(|(i, _)| indent > *i)
.unwrap_or(true);
if need_push {
self.state.push_list(indent, list_type);
}
let final_bullet = if let ListBullet::Ordered(_) = bullet {
ListBullet::Ordered(self.state.next_list_number().unwrap_or(1))
} else {
bullet
};
self.events.push(ParseEvent::ListItem {
indent,
bullet: final_bullet,
content: content.to_string(),
});
true
} else {
false
}
}
fn exit_list_context(&mut self) {
while self.state.in_list {
self.state.pop_list();
}
self.events.push(ParseEvent::ListEnd);
}
fn try_parse_table(&mut self, line: &str) -> bool {
if let Some(caps) = TABLE_ROW_RE.captures(line) {
let inner = caps.get(1).map(|m| m.as_str()).unwrap_or("");
if TABLE_SEP_RE.is_match(inner) && self.table_state == Some(TableState::Header) {
self.table_state = Some(TableState::Body);
self.state.in_table = Some(Code::Body);
self.events.push(ParseEvent::TableSeparator);
return true;
}
let cells: Vec<String> = inner.split('|').map(|s| s.trim().to_string()).collect();
match self.table_state {
None => {
self.table_state = Some(TableState::Header);
self.state.in_table = Some(Code::Header);
self.events.push(ParseEvent::TableHeader(cells));
}
Some(TableState::Header) => {
self.events.push(ParseEvent::TableHeader(cells));
}
Some(TableState::Body) => {
self.events.push(ParseEvent::TableRow(cells));
}
}
return true;
}
if self.table_state.is_some() {
self.table_state = None;
self.state.in_table = None;
self.events.push(ParseEvent::TableEnd);
}
false
}
fn parse_inline_content(&mut self, line: &str) {
let elements = self.inline_parser.parse(line);
for element in elements {
let event = match element {
InlineElement::Text(s) => ParseEvent::Text(s),
InlineElement::Bold(s) => ParseEvent::Bold(s),
InlineElement::Italic(s) => ParseEvent::Italic(s),
InlineElement::BoldItalic(s) => ParseEvent::BoldItalic(s),
InlineElement::Underline(s) => ParseEvent::Underline(s),
InlineElement::Strikeout(s) => ParseEvent::Strikeout(s),
InlineElement::Code(s) => ParseEvent::InlineCode(s),
InlineElement::Link { text, url } => ParseEvent::Link { text, url },
InlineElement::Image { alt, url } => ParseEvent::Image { alt, url },
InlineElement::Footnote(s) => ParseEvent::Footnote(s),
};
self.events.push(event);
}
self.events.push(ParseEvent::Newline);
}
pub fn parse_document(&mut self, content: &str) -> Vec<ParseEvent> {
let mut all_events = Vec::new();
for line in content.lines() {
all_events.extend(self.parse_line(line));
}
all_events.extend(self.finalize());
all_events
}
pub fn finalize(&mut self) -> Vec<ParseEvent> {
self.events.clear();
if self.state.is_in_code() {
self.events.push(ParseEvent::CodeBlockEnd);
self.state.exit_code_block();
self.code_fence = None;
}
if self.state.block_type == Some(BlockType::Think) {
self.events.push(ParseEvent::ThinkBlockEnd);
self.state.exit_block();
}
if self.state.block_depth > 0 {
self.events.push(ParseEvent::BlockquoteEnd);
while self.state.block_depth > 0 {
self.state.exit_block();
}
}
if self.state.in_list {
self.exit_list_context();
}
if self.table_state.is_some() {
self.table_state = None;
self.state.in_table = None;
self.events.push(ParseEvent::TableEnd);
}
self.take_events()
}
pub fn reset(&mut self) {
self.state = ParseState::new();
self.inline_parser.reset();
self.code_fence = None;
self.table_state = None;
self.events.clear();
self.prev_was_empty = false;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_heading() {
let mut parser = Parser::new();
let events = parser.parse_line("# Hello World");
assert!(events.iter().any(|e| matches!(
e, ParseEvent::Heading { level: 1, content } if content == "Hello World"
)));
}
#[test]
fn test_parse_code_block() {
let mut parser = Parser::new();
let e1 = parser.parse_line("```rust");
assert!(e1.iter().any(
|e| matches!(e, ParseEvent::CodeBlockStart { language: Some(l), .. } if l == "rust")
));
let e2 = parser.parse_line("let x = 1;");
assert!(e2
.iter()
.any(|e| matches!(e, ParseEvent::CodeBlockLine(s) if s == "let x = 1;")));
let e3 = parser.parse_line("```");
assert!(e3.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
}
#[test]
fn test_parse_pre_tag() {
let mut parser = Parser::new();
let e1 = parser.parse_line("<pre>");
assert!(e1
.iter()
.any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
let e2 = parser.parse_line("code");
assert!(e2.iter().any(|e| matches!(e, ParseEvent::CodeBlockLine(_))));
let e3 = parser.parse_line("</pre>");
assert!(e3.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
}
#[test]
fn test_space_indented_code() {
let mut parser = Parser::new();
parser.set_code_spaces(true);
parser.parse_line(""); let events = parser.parse_line(" let x = 1;");
assert!(events
.iter()
.any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
assert!(events
.iter()
.any(|e| matches!(e, ParseEvent::CodeBlockLine(s) if s == "let x = 1;")));
}
#[test]
fn test_empty_line_collapsing() {
let mut parser = Parser::new();
let e1 = parser.parse_line("");
assert!(e1.iter().any(|e| matches!(e, ParseEvent::EmptyLine)));
let e2 = parser.parse_line("");
assert!(e2.is_empty()); let e3 = parser.parse_line("text");
assert!(!e3.is_empty());
let e4 = parser.parse_line("");
assert!(e4.iter().any(|e| matches!(e, ParseEvent::EmptyLine)));
}
#[test]
fn test_parse_think_block_unicode() {
let mut parser = Parser::new();
let e1 = parser.parse_line("◁think▷");
assert!(e1.iter().any(|e| matches!(e, ParseEvent::ThinkBlockStart)));
}
#[test]
fn test_parse_list() {
let mut parser = Parser::new();
let events = parser.parse_line("- Item one");
assert!(events.iter().any(|e| matches!(
e, ParseEvent::ListItem { bullet: ListBullet::Dash, content, .. } if content == "Item one"
)));
}
#[test]
fn test_parse_nested_list() {
let mut parser = Parser::new();
parser.parse_line("- Item 1");
let e2 = parser.parse_line(" - Nested");
assert!(e2
.iter()
.any(|e| matches!(e, ParseEvent::ListItem { indent: 2, .. })));
}
#[test]
fn test_parse_ordered_list_numbering() {
let mut parser = Parser::new();
parser.parse_line("1. First");
let e2 = parser.parse_line("2. Second");
assert!(e2.iter().any(|e| matches!(
e,
ParseEvent::ListItem {
bullet: ListBullet::Ordered(2),
..
}
)));
}
#[test]
fn test_parse_blockquote() {
let mut parser = Parser::new();
let events = parser.parse_line("> Quote text");
assert!(events
.iter()
.any(|e| matches!(e, ParseEvent::BlockquoteLine(s) if s == "Quote text")));
}
#[test]
fn test_parse_nested_blockquote() {
let mut parser = Parser::new();
let events = parser.parse_line(">> Nested quote");
assert!(events
.iter()
.any(|e| matches!(e, ParseEvent::BlockquoteStart { depth: 2 })));
}
#[test]
fn test_parse_hr() {
let mut parser = Parser::new();
assert!(parser
.parse_line("---")
.iter()
.any(|e| matches!(e, ParseEvent::HorizontalRule)));
assert!(parser
.parse_line("***")
.iter()
.any(|e| matches!(e, ParseEvent::HorizontalRule)));
assert!(parser
.parse_line("___")
.iter()
.any(|e| matches!(e, ParseEvent::HorizontalRule)));
}
#[test]
fn test_parse_table() {
let mut parser = Parser::new();
let e1 = parser.parse_line("| A | B | C |");
assert!(e1.iter().any(|e| matches!(e, ParseEvent::TableHeader(_))));
let e2 = parser.parse_line("|---|---|---|");
assert!(e2.iter().any(|e| matches!(e, ParseEvent::TableSeparator)));
let e3 = parser.parse_line("| 1 | 2 | 3 |");
assert!(e3.iter().any(|e| matches!(e, ParseEvent::TableRow(_))));
}
#[test]
fn test_parse_think_block() {
let mut parser = Parser::new();
let e1 = parser.parse_line("<think>");
assert!(e1.iter().any(|e| matches!(e, ParseEvent::ThinkBlockStart)));
let e2 = parser.parse_line("Thinking...");
assert!(e2
.iter()
.any(|e| matches!(e, ParseEvent::ThinkBlockLine(s) if s == "Thinking...")));
let e3 = parser.parse_line("</think>");
assert!(e3.iter().any(|e| matches!(e, ParseEvent::ThinkBlockEnd)));
}
#[test]
fn test_first_indent_stripping() {
let mut parser = Parser::new();
let e1 = parser.parse_line(" # Hello");
assert!(e1
.iter()
.any(|e| matches!(e, ParseEvent::Heading { level: 1, content } if content == "Hello")));
}
#[test]
fn test_parse_document() {
let mut parser = Parser::new();
let doc = "# Title\n\nSome text.\n\n```\ncode\n```";
let events = parser.parse_document(doc);
assert!(events
.iter()
.any(|e| matches!(e, ParseEvent::Heading { level: 1, .. })));
assert!(events
.iter()
.any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
}
#[test]
fn test_finalize_closes_blocks() {
let mut parser = Parser::new();
parser.parse_line("```");
parser.parse_line("code");
let events = parser.finalize();
assert!(events.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)));
}
#[test]
fn test_is_block_is_inline() {
assert!(ParseEvent::Heading {
level: 1,
content: "x".to_string()
}
.is_block());
assert!(ParseEvent::CodeBlockStart {
language: None,
indent: 0
}
.is_block());
assert!(ParseEvent::Text("x".to_string()).is_inline());
assert!(ParseEvent::Bold("x".to_string()).is_inline());
}
#[test]
fn test_first_indent_stripping_multibyte_whitespace() {
let mut parser = Parser::new();
let line1 = " # Hello";
assert_eq!(line1.len() - line1.trim_start().len(), 2);
let _ = parser.parse_line(line1);
let line2 = " World";
assert!(!line2.is_char_boundary(2));
let events = parser.parse_line(line2);
assert!(!events.is_empty());
}
#[test]
fn test_space_indented_code_strip_with_fullwidth() {
let mut parser = Parser::new();
parser.set_code_spaces(true);
parser.parse_line("");
let line1 = " first line of code";
let events1 = parser.parse_line(line1);
assert!(events1.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
let line2 = " second line";
assert!(!line2.is_char_boundary(4));
let events2 = parser.parse_line(line2);
assert!(!events2.is_empty());
}
#[test]
fn test_list_item_indent_with_fullwidth_spaces() {
let mut parser = Parser::new();
let events1 = parser.parse_line("- top level");
assert!(events1.iter().any(|e| matches!(e, ParseEvent::ListItem { indent: 0, .. })));
let line2 = " - nested item"; let events2 = parser.parse_line(line2);
let list_item = events2.iter().find(|e| matches!(e, ParseEvent::ListItem { .. }));
assert!(list_item.is_some(), "Should have parsed list item");
if let Some(ParseEvent::ListItem { indent, .. }) = list_item {
assert_eq!(
*indent, 1,
"Indent should be 1 (char-based), not 3 (byte-based)"
);
}
}
#[test]
fn test_space_indented_code_dedent_with_fullwidth() {
let mut parser = Parser::new();
parser.set_code_spaces(true);
parser.parse_line("");
let events1 = parser.parse_line(" code line");
assert!(events1.iter().any(|e| matches!(e, ParseEvent::CodeBlockStart { .. })));
let line2 = " not code anymore";
let byte_indent = line2.len() - line2.trim_start().len();
let char_indent = line2.chars().take_while(|c| c.is_whitespace()).count();
assert_eq!(byte_indent, 6); assert_eq!(char_indent, 2);
let events2 = parser.parse_line(line2);
assert!(
events2.iter().any(|e| matches!(e, ParseEvent::CodeBlockEnd)),
"Should have exited code block with only 2-char indent"
);
}
}