use ox_content_allocator::{Allocator, Vec};
use ox_content_ast::{
AlignKind, BlockQuote, Document, Html, Image, Link, List, ListItem, Node, Paragraph, Span,
Table, TableCell, TableRow, Text,
};
use crate::error::{ParseError, ParseResult};
#[derive(Debug, Clone, Default)]
pub struct ParserOptions {
pub gfm: bool,
pub footnotes: bool,
pub task_lists: bool,
pub tables: bool,
pub strikethrough: bool,
pub autolinks: bool,
pub max_nesting_depth: usize,
}
impl ParserOptions {
#[must_use]
pub fn gfm() -> Self {
Self {
gfm: true,
footnotes: true,
task_lists: true,
tables: true,
strikethrough: true,
autolinks: true,
max_nesting_depth: 100,
}
}
}
pub struct Parser<'a> {
allocator: &'a Allocator,
source: &'a str,
options: ParserOptions,
position: usize,
nesting_depth: usize,
}
struct ParsedListItem<'a> {
ordered: bool,
start: Option<u32>,
content: &'a str,
content_offset: usize,
checked: Option<bool>,
}
impl<'a> Parser<'a> {
#[must_use]
pub fn new(allocator: &'a Allocator, source: &'a str) -> Self {
Self { allocator, source, options: ParserOptions::default(), position: 0, nesting_depth: 0 }
}
#[must_use]
pub fn with_options(allocator: &'a Allocator, source: &'a str, options: ParserOptions) -> Self {
Self { allocator, source, options, position: 0, nesting_depth: 0 }
}
pub fn parse(mut self) -> ParseResult<Document<'a>> {
let mut children = self.allocator.new_vec();
while !self.is_at_end() {
if let Some(node) = self.parse_block()? {
children.push(node);
}
}
let span = Span::new(0, self.source.len() as u32);
Ok(Document { children, span })
}
fn is_at_end(&self) -> bool {
self.position >= self.source.len()
}
fn remaining(&self) -> &'a str {
&self.source[self.position..]
}
fn peek(&self) -> Option<char> {
self.remaining().chars().next()
}
fn advance(&mut self) -> Option<char> {
let ch = self.peek()?;
self.position += ch.len_utf8();
Some(ch)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.peek() {
if ch == ' ' || ch == '\t' {
self.advance();
} else {
break;
}
}
}
fn skip_blank_lines(&mut self) {
while !self.is_at_end() {
let start = self.position;
self.skip_whitespace();
if self.peek() == Some('\n') {
self.advance();
} else {
self.position = start;
break;
}
}
}
fn parse_block(&mut self) -> ParseResult<Option<Node<'a>>> {
self.skip_blank_lines();
if self.is_at_end() {
return Ok(None);
}
if self.nesting_depth > self.options.max_nesting_depth {
return Err(ParseError::NestingTooDeep {
span: Span::new(self.position as u32, self.position as u32),
max_depth: self.options.max_nesting_depth,
});
}
let start = self.position;
if self.try_parse_heading() {
return self.parse_heading(start);
}
if self.try_parse_thematic_break() {
return self.parse_thematic_break(start);
}
if self.try_parse_block_quote() {
return self.parse_block_quote(start);
}
if self.try_parse_fenced_code() {
return self.parse_fenced_code(start);
}
if self.try_parse_html_block() {
return self.parse_html_block(start);
}
if self.options.tables && self.try_parse_table() {
return self.parse_table(start);
}
if self.try_parse_list() {
return self.parse_list(start);
}
self.parse_paragraph(start)
}
fn try_parse_html_block(&self) -> bool {
let line = self.remaining().lines().next().unwrap_or("");
Self::parse_html_block_tag_name(line).is_some() || line.trim_start().starts_with("<!--")
}
fn parse_html_block(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
let line = self.remaining().lines().next().unwrap_or("");
if line.trim_start().starts_with("<!--") {
loop {
let consumed = self.consume_line();
if consumed.contains("-->") || self.is_at_end() {
break;
}
}
let span = Span::new(start as u32, self.position as u32);
let value = &self.source[start..self.position];
return Ok(Some(Node::Html(Html { value, span })));
}
let Some(tag_name) = Self::parse_html_block_tag_name(line) else {
return Ok(None);
};
let closing_tag = format!("</{tag_name}");
loop {
let consumed = self.consume_line();
if consumed.to_ascii_lowercase().contains(&closing_tag) || self.is_at_end() {
break;
}
}
let span = Span::new(start as u32, self.position as u32);
let value = &self.source[start..self.position];
Ok(Some(Node::Html(Html { value, span })))
}
fn parse_html_block_tag_name(line: &str) -> Option<String> {
let trimmed = line.trim_start();
let after_open = trimmed.strip_prefix('<')?;
let after_slash = after_open.strip_prefix('/').unwrap_or(after_open);
let mut tag_len = 0;
for byte in after_slash.as_bytes() {
if byte.is_ascii_alphanumeric() || *byte == b'-' {
tag_len += 1;
} else {
break;
}
}
if tag_len == 0 {
return None;
}
let tag_name = &after_slash[..tag_len];
let next = after_slash.as_bytes().get(tag_len).copied();
if let Some(byte) = next {
if !matches!(byte, b' ' | b'\t' | b'>' | b'/') {
return None;
}
}
if !Self::is_supported_html_block_tag(tag_name) {
return None;
}
Some(tag_name.to_ascii_lowercase())
}
fn is_supported_html_block_tag(tag_name: &str) -> bool {
[
"article",
"aside",
"blockquote",
"details",
"dialog",
"div",
"figcaption",
"figure",
"footer",
"header",
"main",
"nav",
"ol",
"p",
"pre",
"section",
"summary",
"table",
"tbody",
"td",
"tfoot",
"th",
"thead",
"tr",
"ul",
]
.iter()
.any(|candidate| tag_name.eq_ignore_ascii_case(candidate))
}
fn try_parse_block_quote(&self) -> bool {
let remaining = self.remaining();
let line = remaining.lines().next().unwrap_or("");
let trimmed = line.trim_start();
trimmed.starts_with('>')
}
fn parse_block_quote(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
self.nesting_depth += 1;
let mut inner = String::new();
loop {
if self.is_at_end() {
break;
}
let line_start = self.position;
self.skip_whitespace();
if self.peek() == Some('\n') || self.is_at_end() {
self.position = line_start;
break;
}
self.position = line_start;
let remaining = self.remaining();
let line = remaining.lines().next().unwrap_or("");
let trimmed = line.trim_start();
if let Some(after_gt) = trimmed.strip_prefix('>') {
let stripped = after_gt.strip_prefix(' ').unwrap_or(after_gt);
inner.push_str(stripped);
inner.push('\n');
self.position += line.len();
if self.peek() == Some('\n') {
self.advance();
}
} else {
break;
}
}
let inner_str = self.allocator.alloc_str(&inner);
let sub_parser = Parser::with_options(self.allocator, inner_str, self.options.clone());
let sub_doc = sub_parser.parse()?;
self.nesting_depth -= 1;
let span = Span::new(start as u32, self.position as u32);
Ok(Some(Node::BlockQuote(BlockQuote { children: sub_doc.children, span })))
}
fn try_parse_list(&self) -> bool {
let remaining = self.remaining();
let line = remaining.lines().next().unwrap_or("");
let trimmed = line.trim_start();
if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
return true;
}
let mut chars = trimmed.chars().peekable();
let mut has_digit = false;
while let Some(ch) = chars.peek() {
if ch.is_ascii_digit() {
has_digit = true;
chars.next();
} else {
break;
}
}
if has_digit {
if let Some(ch) = chars.next() {
if (ch == '.' || ch == ')') && chars.peek() == Some(&' ') {
return true;
}
}
}
false
}
fn calc_indentation(&self, start: usize) -> usize {
let mut indent = 0;
let bytes = self.source.as_bytes();
for byte in bytes.iter().skip(start) {
match byte {
b' ' => indent += 1,
b'\t' => indent += 4, _ => break,
}
}
indent
}
fn parse_task_list_prefix(&self, content: &'a str) -> Option<(bool, usize)> {
if !self.options.task_lists || content.len() < 3 {
return None;
}
if (content.starts_with("[x]") || content.starts_with("[X]"))
&& (content.len() == 3 || content.starts_with("[x] ") || content.starts_with("[X] "))
{
return Some((true, usize::from(content.len() > 3) + 3));
}
if content.starts_with("[ ]") && (content.len() == 3 || content.starts_with("[ ] ")) {
return Some((false, usize::from(content.len() > 3) + 3));
}
None
}
fn parse_list_item_line(&self, line_start: usize) -> Option<ParsedListItem<'a>> {
let remaining = &self.source[line_start..];
let line = remaining.lines().next().unwrap_or("");
let trimmed = line.trim_start();
let trimmed_offset = line_start + (line.len() - trimmed.len());
if trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ ") {
let mut content = &trimmed[2..];
let mut content_offset = trimmed_offset + 2;
let mut checked = None;
if let Some((done, consumed)) = self.parse_task_list_prefix(content) {
checked = Some(done);
content = &content[consumed..];
content_offset += consumed;
}
return Some(ParsedListItem {
ordered: false,
start: None,
content,
content_offset,
checked,
});
}
let bytes = trimmed.as_bytes();
let mut marker_end = 0;
while marker_end < bytes.len() && bytes[marker_end].is_ascii_digit() {
marker_end += 1;
}
if marker_end == 0 || marker_end + 1 >= bytes.len() {
return None;
}
let marker = bytes[marker_end];
if !matches!(marker, b'.' | b')') || bytes[marker_end + 1] != b' ' {
return None;
}
Some(ParsedListItem {
ordered: true,
start: trimmed[..marker_end].parse().ok(),
content: &trimmed[marker_end + 2..],
content_offset: trimmed_offset + marker_end + 2,
checked: None,
})
}
fn parse_list(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
let baseline_indent = self.calc_indentation(start);
let first_line_start = self.position;
let Some(first_item) = self.parse_list_item_line(first_line_start) else {
return Ok(None);
};
let ordered = first_item.ordered;
let list_start = first_item.start;
let mut children: Vec<'a, ListItem<'a>> = self.allocator.new_vec();
loop {
if self.is_at_end() {
break;
}
let line_start = self.position;
self.skip_whitespace();
if self.peek() == Some('\n') || self.is_at_end() {
self.position = line_start; break;
}
let current_indent = self.calc_indentation(line_start);
if current_indent < baseline_indent {
self.position = line_start;
break;
}
if current_indent > baseline_indent {
self.position = line_start; if self.try_parse_list() {
if let Some(Node::List(nested_list)) = self.parse_list(line_start)? {
if let Some(last_item) = children.last_mut() {
last_item.span = last_item.span.merge(nested_list.span);
last_item.children.push(Node::List(nested_list));
}
}
} else {
while let Some(ch) = self.peek() {
self.advance();
if ch == '\n' {
break;
}
}
}
continue;
}
self.position = line_start;
let remaining = self.remaining();
let line = remaining.lines().next().unwrap_or("");
let Some(item) = self.parse_list_item_line(line_start) else {
break;
};
if item.ordered != ordered {
break;
}
self.position += line.len();
if self.peek() == Some('\n') {
self.advance();
}
let item_children_inline = self.parse_inline(item.content, item.content_offset)?;
let mut para_children = self.allocator.new_vec();
for child in item_children_inline {
para_children.push(child);
}
let para = Paragraph {
children: para_children,
span: Span::new(
item.content_offset as u32,
(item.content_offset + item.content.len()) as u32,
),
};
let mut list_item_children = self.allocator.new_vec();
list_item_children.push(Node::Paragraph(para));
let list_item = ListItem {
checked: item.checked,
spread: false,
children: list_item_children,
span: Span::new(line_start as u32, self.position as u32),
};
children.push(list_item);
}
let span = Span::new(start as u32, self.position as u32);
Ok(Some(Node::List(List { ordered, start: list_start, spread: false, children, span })))
}
fn try_parse_heading(&self) -> bool {
let remaining = self.remaining();
let mut chars = remaining.chars().peekable();
let mut hash_count = 0;
while chars.peek() == Some(&'#') {
chars.next();
hash_count += 1;
if hash_count > 6 {
return false;
}
}
hash_count > 0 && matches!(chars.peek(), Some(' ') | Some('\t') | Some('\n') | None)
}
fn try_parse_thematic_break(&self) -> bool {
let remaining = self.remaining();
let line = remaining.lines().next().unwrap_or("");
let trimmed = line.trim();
if trimmed.len() < 3 {
return false;
}
let first = trimmed.chars().next().unwrap();
if !matches!(first, '-' | '*' | '_') {
return false;
}
trimmed.chars().all(|c| c == first || c == ' ' || c == '\t')
&& trimmed.chars().filter(|&c| c == first).count() >= 3
}
fn try_parse_fenced_code(&self) -> bool {
let remaining = self.remaining();
remaining.starts_with("```") || remaining.starts_with("~~~")
}
fn try_parse_table(&self) -> bool {
let remaining = self.remaining();
let lines: std::vec::Vec<&str> = remaining.lines().take(2).collect();
if lines.len() < 2 {
return false;
}
let first_line = lines[0].trim();
if !first_line.starts_with('|') && !first_line.contains('|') {
return false;
}
let second_line = lines[1].trim();
if !second_line.contains('|') || !second_line.contains('-') {
return false;
}
let is_delimiter = second_line.split('|').filter(|s| !s.is_empty()).all(|cell| {
let trimmed = cell.trim();
if trimmed.is_empty() {
return true;
}
trimmed.chars().all(|c| c == '-' || c == ':')
});
is_delimiter
}
fn parse_heading(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
let mut depth = 0u8;
while self.peek() == Some('#') {
depth += 1;
self.advance();
}
self.skip_whitespace();
let content_start = self.position;
let mut content_end = content_start;
while let Some(ch) = self.peek() {
if ch == '\n' {
break;
}
self.advance();
content_end = self.position;
}
let content = self.source[content_start..content_end].trim_end();
let content = content.trim_end_matches('#').trim_end();
if self.peek() == Some('\n') {
self.advance();
}
let span = Span::new(start as u32, self.position as u32);
let children = if !content.is_empty() {
self.parse_inline(content, content_start)?
} else {
self.allocator.new_vec()
};
Ok(Some(Node::Heading(ox_content_ast::Heading { depth, children, span })))
}
fn parse_thematic_break(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
while let Some(ch) = self.peek() {
self.advance();
if ch == '\n' {
break;
}
}
let span = Span::new(start as u32, self.position as u32);
Ok(Some(Node::ThematicBreak(ox_content_ast::ThematicBreak { span })))
}
fn parse_fenced_code(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
let fence_char = self.peek().unwrap();
let mut fence_len = 0;
while self.peek() == Some(fence_char) {
fence_len += 1;
self.advance();
}
self.skip_whitespace();
let info_start = self.position;
while let Some(ch) = self.peek() {
if ch == '\n' {
break;
}
self.advance();
}
let info = self.source[info_start..self.position].trim();
let (lang, meta) = if info.is_empty() {
(None, None)
} else if let Some(space_idx) = info.find(' ') {
(Some(&info[..space_idx]), Some(&info[space_idx + 1..]))
} else {
(Some(info), None)
};
if self.peek() == Some('\n') {
self.advance();
}
let content_start = self.position;
let mut content_end = content_start;
loop {
if self.is_at_end() {
break;
}
let line_start = self.position;
let mut closing_fence_len = 0;
while self.peek() == Some(fence_char) {
closing_fence_len += 1;
self.advance();
}
if closing_fence_len >= fence_len {
while let Some(ch) = self.peek() {
if ch == '\n' {
self.advance();
break;
}
self.advance();
}
content_end = line_start;
break;
}
self.position = line_start;
while let Some(ch) = self.peek() {
self.advance();
if ch == '\n' {
break;
}
}
content_end = self.position;
}
let value = &self.source[content_start..content_end];
let span = Span::new(start as u32, self.position as u32);
Ok(Some(Node::CodeBlock(ox_content_ast::CodeBlock { lang, meta, value, span })))
}
fn parse_table(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
let mut rows: std::vec::Vec<std::vec::Vec<&str>> = std::vec::Vec::new();
let mut align: Vec<'a, AlignKind> = self.allocator.new_vec();
let header_line = self.consume_line();
let header_cells = Self::parse_table_row_cells(header_line);
rows.push(header_cells);
let delimiter_line = self.consume_line();
for cell in delimiter_line.split('|').filter(|s| !s.trim().is_empty()) {
let cell = cell.trim();
let starts_colon = cell.starts_with(':');
let ends_colon = cell.ends_with(':');
let alignment = match (starts_colon, ends_colon) {
(true, true) => AlignKind::Center,
(true, false) => AlignKind::Left,
(false, true) => AlignKind::Right,
(false, false) => AlignKind::None,
};
align.push(alignment);
}
loop {
if self.is_at_end() {
break;
}
let line_start = self.position;
self.skip_whitespace();
if self.peek() == Some('\n') || self.is_at_end() {
self.position = line_start;
break;
}
let remaining = self.remaining();
let line = remaining.lines().next().unwrap_or("");
if !line.contains('|') {
self.position = line_start;
break;
}
self.position = line_start;
let row_line = self.consume_line();
let row_cells = Self::parse_table_row_cells(row_line);
rows.push(row_cells);
}
let mut children: Vec<'a, TableRow<'a>> = self.allocator.new_vec();
for row_cells in rows {
let mut cells: Vec<'a, TableCell<'a>> = self.allocator.new_vec();
for cell_content in row_cells {
let cell_children = self.parse_inline(cell_content, 0)?;
let cell = TableCell { children: cell_children, span: Span::new(0, 0) };
cells.push(cell);
}
let row = TableRow { children: cells, span: Span::new(0, 0) };
children.push(row);
}
let span = Span::new(start as u32, self.position as u32);
Ok(Some(Node::Table(Table { align, children, span })))
}
fn consume_line(&mut self) -> &'a str {
let start = self.position;
while let Some(ch) = self.peek() {
self.advance();
if ch == '\n' {
break;
}
}
self.source[start..self.position].trim_end_matches('\n')
}
fn parse_table_row_cells(line: &'a str) -> std::vec::Vec<&'a str> {
let trimmed = line.trim();
let trimmed = trimmed.strip_prefix('|').unwrap_or(trimmed);
let trimmed = trimmed.strip_suffix('|').unwrap_or(trimmed);
trimmed.split('|').map(str::trim).collect()
}
fn parse_paragraph(&mut self, start: usize) -> ParseResult<Option<Node<'a>>> {
let mut content_end = start;
loop {
if self.is_at_end() {
break;
}
let line_start = self.position;
self.skip_whitespace();
if self.peek() == Some('\n') || self.is_at_end() {
self.position = line_start;
break;
}
self.position = line_start;
if self.try_parse_heading()
|| self.try_parse_thematic_break()
|| self.try_parse_block_quote()
|| self.try_parse_fenced_code()
|| self.try_parse_html_block()
|| (self.options.tables && self.try_parse_table())
|| self.try_parse_list()
{
break;
}
while let Some(ch) = self.peek() {
self.advance();
if ch == '\n' {
break;
}
}
content_end = self.position;
}
let content = self.source[start..content_end].trim();
if content.is_empty() {
return Ok(None);
}
let span = Span::new(start as u32, content_end as u32);
let children = self.parse_inline(content, start)?;
Ok(Some(Node::Paragraph(Paragraph { children, span })))
}
fn parse_inline(&self, content: &'a str, offset: usize) -> ParseResult<Vec<'a, Node<'a>>> {
let mut children = self.allocator.new_vec();
let mut pos = 0;
let bytes = content.as_bytes();
while pos < content.len() {
let start = pos;
while pos < content.len() {
let ch = bytes[pos];
if matches!(ch, b'*' | b'_' | b'`' | b'[' | b'!' | b'~' | b'\\') {
break;
}
pos += 1;
}
if pos > start {
let text_content = &content[start..pos];
let text = Text {
value: text_content,
span: Span::new((offset + start) as u32, (offset + pos) as u32),
};
children.push(Node::Text(text));
}
if pos >= content.len() {
break;
}
let ch = bytes[pos];
match ch {
b'\\' if pos + 1 < content.len() && bytes[pos + 1] == b'\n' => {
let break_node = ox_content_ast::Break {
span: Span::new((offset + pos) as u32, (offset + pos + 2) as u32),
};
children.push(Node::Break(break_node));
pos += 2;
}
b'\\' if pos + 1 < content.len() => {
pos += 1;
let escaped = &content[pos..pos + 1];
let text = Text {
value: escaped,
span: Span::new((offset + pos - 1) as u32, (offset + pos + 1) as u32),
};
children.push(Node::Text(text));
pos += 1;
}
b'~' if self.options.strikethrough
&& pos + 1 < content.len()
&& bytes[pos + 1] == b'~' =>
{
let inner_start = pos + 2;
let mut inner_end = inner_start;
let mut found = false;
while inner_end + 1 < content.len() {
if bytes[inner_end] == b'~' && bytes[inner_end + 1] == b'~' {
found = true;
break;
}
inner_end += 1;
}
if found {
let inner_content = &content[inner_start..inner_end];
let inner_children =
self.parse_inline(inner_content, offset + inner_start)?;
let span =
Span::new((offset + pos) as u32, (offset + inner_end + 2) as u32);
let delete = ox_content_ast::Delete { children: inner_children, span };
children.push(Node::Delete(delete));
pos = inner_end + 2;
} else {
let text = Text {
value: &content[pos..pos + 2],
span: Span::new((offset + pos) as u32, (offset + pos + 2) as u32),
};
children.push(Node::Text(text));
pos += 2;
}
}
b'*' | b'_' => {
let marker = ch;
let mut count = 1;
while pos + count < content.len() && bytes[pos + count] == marker {
count += 1;
}
let inner_start = pos + count;
let mut inner_end = inner_start;
let mut found = false;
while inner_end < content.len() {
if bytes[inner_end] == marker {
let mut end_count = 1;
while inner_end + end_count < content.len()
&& bytes[inner_end + end_count] == marker
{
end_count += 1;
}
if end_count >= count {
found = true;
break;
}
inner_end += end_count;
} else {
inner_end += 1;
}
}
if found {
let inner_content = &content[inner_start..inner_end];
let inner_children =
self.parse_inline(inner_content, offset + inner_start)?;
let span =
Span::new((offset + pos) as u32, (offset + inner_end + count) as u32);
if count >= 2 {
let strong = ox_content_ast::Strong { children: inner_children, span };
children.push(Node::Strong(strong));
pos = inner_end + count;
} else {
let emphasis =
ox_content_ast::Emphasis { children: inner_children, span };
children.push(Node::Emphasis(emphasis));
pos = inner_end + count;
}
} else {
let text = Text {
value: &content[pos..pos + count],
span: Span::new((offset + pos) as u32, (offset + pos + count) as u32),
};
children.push(Node::Text(text));
pos += count;
}
}
b'`' => {
pos += 1;
let code_start = pos;
while pos < content.len() && bytes[pos] != b'`' {
pos += 1;
}
if pos < content.len() {
let code_content = &content[code_start..pos];
let inline_code = ox_content_ast::InlineCode {
value: code_content,
span: Span::new(
(offset + code_start - 1) as u32,
(offset + pos + 1) as u32,
),
};
children.push(Node::InlineCode(inline_code));
pos += 1;
} else {
let text = Text {
value: &content[code_start - 1..],
span: Span::new(
(offset + code_start - 1) as u32,
(offset + content.len()) as u32,
),
};
children.push(Node::Text(text));
}
}
b'[' => {
let link_start = pos;
pos += 1;
let text_start = pos;
let mut bracket_depth = 1;
while pos < content.len() && bracket_depth > 0 {
match bytes[pos] {
b'[' => bracket_depth += 1,
b']' => bracket_depth -= 1,
_ => {} }
if bracket_depth > 0 {
pos += 1;
}
}
if pos < content.len()
&& bytes[pos] == b']'
&& pos + 1 < content.len()
&& bytes[pos + 1] == b'('
{
let link_text = &content[text_start..pos];
pos += 2;
let url_start = pos;
let mut paren_depth = 1;
while pos < content.len() && paren_depth > 0 {
match bytes[pos] {
b'(' => paren_depth += 1,
b')' => paren_depth -= 1,
_ => {} }
if paren_depth > 0 {
pos += 1;
}
}
if pos < content.len() && bytes[pos] == b')' {
let url = &content[url_start..pos];
pos += 1;
let link_children =
self.parse_inline(link_text, offset + text_start)?;
let link = Link {
url,
title: None,
children: link_children,
span: Span::new(
(offset + link_start) as u32,
(offset + pos) as u32,
),
};
children.push(Node::Link(link));
} else {
let text = Text {
value: &content[link_start..pos],
span: Span::new(
(offset + link_start) as u32,
(offset + pos) as u32,
),
};
children.push(Node::Text(text));
}
} else {
let text = Text {
value: "[",
span: Span::new(
(offset + link_start) as u32,
(offset + link_start + 1) as u32,
),
};
children.push(Node::Text(text));
pos = link_start + 1;
}
}
b'!' => {
if pos + 1 < content.len() && bytes[pos + 1] == b'[' {
let image_start = pos;
pos += 2; let alt_start = pos;
let mut bracket_depth = 1;
while pos < content.len() && bracket_depth > 0 {
match bytes[pos] {
b'[' => bracket_depth += 1,
b']' => bracket_depth -= 1,
_ => {}
}
if bracket_depth > 0 {
pos += 1;
}
}
if pos < content.len()
&& bytes[pos] == b']'
&& pos + 1 < content.len()
&& bytes[pos + 1] == b'('
{
let alt_text = &content[alt_start..pos];
pos += 2;
let url_start = pos;
let mut paren_depth = 1;
while pos < content.len() && paren_depth > 0 {
match bytes[pos] {
b'(' => paren_depth += 1,
b')' => paren_depth -= 1,
_ => {}
}
if paren_depth > 0 {
pos += 1;
}
}
if pos < content.len() && bytes[pos] == b')' {
let url = &content[url_start..pos];
pos += 1;
let image = Image {
url,
alt: alt_text,
title: None,
span: Span::new(
(offset + image_start) as u32,
(offset + pos) as u32,
),
};
children.push(Node::Image(image));
} else {
let text = Text {
value: &content[image_start..pos],
span: Span::new(
(offset + image_start) as u32,
(offset + pos) as u32,
),
};
children.push(Node::Text(text));
}
} else {
let text = Text {
value: "![",
span: Span::new(
(offset + image_start) as u32,
(offset + image_start + 2) as u32,
),
};
children.push(Node::Text(text));
pos = image_start + 2;
}
} else {
let text = Text {
value: "!",
span: Span::new((offset + pos) as u32, (offset + pos + 1) as u32),
};
children.push(Node::Text(text));
pos += 1;
}
}
_ => {
let text = Text {
value: &content[pos..pos + 1],
span: Span::new((offset + pos) as u32, (offset + pos + 1) as u32),
};
children.push(Node::Text(text));
pos += 1;
}
}
}
Ok(children)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_image() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::Paragraph(p) => {
assert_eq!(p.children.len(), 1);
match &p.children[0] {
Node::Image(img) => {
assert_eq!(img.alt, "Alt text");
assert_eq!(img.url, "/path/to/image.png");
}
_ => panic!("expected image, got {:?}", &p.children[0]),
}
}
_ => panic!("expected paragraph"),
}
}
#[test]
fn test_parse_heading() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "# Hello\n").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::Heading(h) => {
assert_eq!(h.depth, 1);
}
_ => panic!("expected heading"),
}
}
#[test]
fn test_parse_paragraph() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "Hello world").parse().unwrap();
assert_eq!(doc.children.len(), 1);
assert!(matches!(&doc.children[0], Node::Paragraph(_)));
}
#[test]
fn test_parse_thematic_break() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "---").parse().unwrap();
assert_eq!(doc.children.len(), 1);
assert!(matches!(&doc.children[0], Node::ThematicBreak(_)));
}
#[test]
fn test_parse_fenced_code() {
let allocator = Allocator::new();
let doc = Parser::new(
&allocator,
"```rust\nfn main() {}
```",
)
.parse()
.unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::CodeBlock(cb) => {
assert_eq!(cb.lang, Some("rust"));
}
_ => panic!("expected code block"),
}
}
#[test]
fn test_parse_inline_code() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "Use `code` here").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::Paragraph(p) => {
assert!(p.children.iter().any(|n| matches!(n, Node::InlineCode(_))));
}
_ => panic!("expected paragraph"),
}
}
#[test]
fn test_parse_strikethrough() {
let allocator = Allocator::new();
let doc =
Parser::with_options(&allocator, "~~done~~", ParserOptions::gfm()).parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::Paragraph(p) => {
assert!(matches!(&p.children[0], Node::Delete(_)));
}
_ => panic!("expected paragraph"),
}
}
#[test]
fn test_parse_hard_break() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "line 1\\\nline 2").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::Paragraph(p) => {
assert!(p.children.iter().any(|n| matches!(n, Node::Break(_))));
}
_ => panic!("expected paragraph"),
}
}
#[test]
fn test_parse_table() {
let allocator = Allocator::new();
let table_md = "| Header 1 | Header 2 |\n|----------|----------|\n| Cell 1 | Cell 2 |";
let parser = Parser::with_options(&allocator, table_md, ParserOptions::gfm());
let doc = parser.parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::Table(t) => {
assert_eq!(t.children.len(), 2); }
_ => panic!("expected table, got {:?}", &doc.children[0]),
}
}
#[test]
fn test_parse_unordered_list() {
let allocator = Allocator::new();
let list_md = "- Item 1\n- Item 2\n- Item 3";
let parser = Parser::new(&allocator, list_md);
let doc = parser.parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::List(list) => {
assert!(!list.ordered);
assert_eq!(list.children.len(), 3);
}
_ => panic!("expected list, got {:?}", &doc.children[0]),
}
}
#[test]
fn test_parse_ordered_list() {
let allocator = Allocator::new();
let list_md = "1. First\n2. Second\n3. Third";
let parser = Parser::new(&allocator, list_md);
let doc = parser.parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::List(list) => {
assert!(list.ordered);
assert_eq!(list.children.len(), 3);
}
_ => panic!("expected list, got {:?}", &doc.children[0]),
}
}
#[test]
fn test_parse_block_quote() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "> Hello world").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::BlockQuote(bq) => {
assert_eq!(bq.children.len(), 1);
assert!(matches!(&bq.children[0], Node::Paragraph(_)));
}
_ => panic!("expected block quote, got {:?}", &doc.children[0]),
}
}
#[test]
fn test_parse_block_quote_multiline() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "> line 1\n> line 2").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::BlockQuote(bq) => {
assert_eq!(bq.children.len(), 1);
}
_ => panic!("expected block quote, got {:?}", &doc.children[0]),
}
}
#[test]
fn test_parse_nested_block_quote() {
let allocator = Allocator::new();
let doc = Parser::new(&allocator, "> > nested").parse().unwrap();
assert_eq!(doc.children.len(), 1);
match &doc.children[0] {
Node::BlockQuote(bq) => {
assert_eq!(bq.children.len(), 1);
assert!(matches!(&bq.children[0], Node::BlockQuote(_)));
}
_ => panic!("expected block quote, got {:?}", &doc.children[0]),
}
}
}