use crate::error::Result;
use crate::model::{
Align, Block, BlockImage, Cell, ColSpec, Column, Columns, Document, ImageSource, List, ListItem,
ListKind, Table, TableStyle,
};
mod attrs;
mod inline;
pub(crate) use attrs::{parse_attrs, Attr};
pub fn parse(src: &str) -> Result<Document> {
let lines: Vec<String> = src.lines().map(|l| l.to_string()).collect();
Ok(Document { blocks: parse_blocks(&lines) })
}
fn indent_of(s: &str) -> usize {
s.len() - s.trim_start().len()
}
fn dedent(s: &str, n: usize) -> String {
let strip = s.bytes().take_while(|b| *b == b' ').count().min(n);
s[strip..].to_string()
}
fn parse_blocks(lines: &[String]) -> Vec<Block> {
let mut blocks = Vec::new();
let mut i = 0;
while i < lines.len() {
let line = &lines[i];
if line.trim().is_empty() {
i += 1;
continue;
}
let ind = indent_of(line);
let content = line[ind..].to_string();
if let Some(lang) = content.strip_prefix("```") {
let lang = lang.trim().to_string();
let mut text = Vec::new();
i += 1;
while i < lines.len() && !lines[i].trim_start().starts_with("```") {
text.push(lines[i].clone());
i += 1;
}
i += 1; blocks.push(Block::Code {
lang: if lang.is_empty() { None } else { Some(lang) },
text: text.join("\n"),
});
continue;
}
if is_fence_open(&content) {
let word = content[3..].trim().to_string();
let inner = gather_div(lines, &mut i); if word == "columns" {
blocks.push(Block::Columns(Columns { cols: parse_columns(&inner), gap: None }));
} else if let Some(align) = align_from_word(&word) {
let mut sub = parse_blocks(&inner);
apply_align(&mut sub, align);
blocks.append(&mut sub);
} else {
blocks.append(&mut parse_blocks(&inner)); }
continue;
}
if let Some((level, rest)) = heading(&content) {
let (text, align) = split_trailing_attrs(rest);
blocks.push(Block::Heading { level, inlines: inline::parse_inlines(&text), align });
i += 1;
continue;
}
if is_hr(&content) {
blocks.push(Block::Divider);
i += 1;
continue;
}
if content.starts_with('>') {
let mut inner = Vec::new();
while i < lines.len() {
let t = lines[i].trim_start();
let Some(r) = t.strip_prefix('>') else { break };
inner.push(r.strip_prefix(' ').unwrap_or(r).to_string());
i += 1;
}
blocks.push(Block::Quote(parse_blocks(&inner)));
continue;
}
if let Some(img) = block_image(&content) {
blocks.push(Block::Image(img));
i += 1;
continue;
}
if list_marker(&content).is_some() {
let (list, next) = parse_list(lines, i, ind);
blocks.push(Block::List(list));
i = next;
continue;
}
if content.contains('|')
&& i + 1 < lines.len()
&& is_table_delim(lines[i + 1].trim())
{
let (table, next) = parse_table(lines, i);
blocks.push(Block::Table(table));
i = next;
continue;
}
let mut para = String::new();
while i < lines.len() {
let l = &lines[i];
if l.trim().is_empty() {
break;
}
let c = l[indent_of(l)..].to_string();
if is_block_start(&c) {
break;
}
let mut piece = c.trim();
let hard = piece.ends_with('\\');
if hard {
piece = piece[..piece.len() - 1].trim_end();
}
append_soft(&mut para, piece);
if hard {
para.push('\n');
}
i += 1;
}
let (text, align) = split_trailing_attrs(¶);
blocks.push(Block::Paragraph { inlines: inline::parse_inlines(&text), align });
}
blocks
}
fn is_block_start(c: &str) -> bool {
c.starts_with("```")
|| is_fence_open(c)
|| is_hr(c)
|| c.starts_with('>')
|| heading(c).is_some()
|| list_marker(c).is_some()
|| block_image(c).is_some()
}
fn is_hr(c: &str) -> bool {
let b = c.as_bytes();
b.len() >= 3 && matches!(b[0], b'-' | b'*' | b'_') && b.iter().all(|x| *x == b[0])
}
fn parse_list(lines: &[String], start: usize, base: usize) -> (List, usize) {
let (ordered, first_start, _) = list_marker(&lines[start][base..]).unwrap();
let kind = if ordered { ListKind::Ordered } else { ListKind::Unordered };
let mut items = Vec::new();
let mut i = start;
while i < lines.len() {
let line = &lines[i];
if line.trim().is_empty() {
if next_nonblank_indent(lines, i + 1).map(|n| n >= base).unwrap_or(false) {
i += 1;
continue;
}
break;
}
let ind = indent_of(line);
if ind < base {
break;
}
let Some((ord, _, off)) = list_marker(&line[ind..]) else {
break; };
if ind != base || ord != ordered {
break; }
let content_indent = base + off;
let (first_line, check) = split_task_mark(&line[ind..][off..]);
let mut item_lines = vec![first_line];
i += 1;
while i < lines.len() {
let l = &lines[i];
if l.trim().is_empty() {
if next_nonblank_indent(lines, i + 1).map(|n| n > base).unwrap_or(false) {
item_lines.push(String::new());
i += 1;
continue;
}
break;
}
if indent_of(l) > base {
item_lines.push(dedent(l, content_indent));
i += 1;
} else {
break;
}
}
items.push(ListItem { blocks: parse_blocks(&item_lines), check });
}
(List { kind, start: first_start.max(1), items }, i)
}
fn split_task_mark(s: &str) -> (String, Option<bool>) {
let done = match s.get(..3) {
Some("[ ]") => false,
Some("[x]") | Some("[X]") => true,
_ => return (s.to_string(), None),
};
match s[3..].chars().next() {
None => (String::new(), Some(done)),
Some(c) if c.is_whitespace() => (s[3 + c.len_utf8()..].to_string(), Some(done)),
_ => (s.to_string(), None),
}
}
fn next_nonblank_indent(lines: &[String], from: usize) -> Option<usize> {
lines[from..].iter().find(|l| !l.trim().is_empty()).map(|l| indent_of(l))
}
fn heading(c: &str) -> Option<(u8, &str)> {
let hashes = c.bytes().take_while(|b| *b == b'#').count();
if (1..=6).contains(&hashes) && c.as_bytes().get(hashes) == Some(&b' ') {
Some((hashes as u8, c[hashes + 1..].trim()))
} else {
None
}
}
fn list_marker(c: &str) -> Option<(bool, u32, usize)> {
let b = c.as_bytes();
if matches!(b.first(), Some(b'-' | b'*' | b'+')) && matches!(b.get(1), Some(b' ' | b'\t')) {
return Some((false, 0, 2));
}
let digits = c.bytes().take_while(|x| x.is_ascii_digit()).count();
if digits > 0
&& matches!(b.get(digits), Some(b'.' | b')'))
&& matches!(b.get(digits + 1), Some(b' ' | b'\t'))
{
let n = c[..digits].parse::<u32>().unwrap_or(1);
return Some((true, n, digits + 2));
}
None
}
fn block_image(c: &str) -> Option<BlockImage> {
let c = c.trim();
let rest = c.strip_prefix("?;
if !c.ends_with(')') {
return None;
}
let alt = &rest[..close_alt];
let src = &rest[close_alt + 2..rest.len() - 1];
if src.is_empty() {
return None;
}
Some(BlockImage {
src: image_source(src),
width: None,
align: Align::Left,
caption: if alt.trim().is_empty() { None } else { Some(inline::parse_inlines(alt.trim())) },
decor: crate::model::ImageDecor::default(),
})
}
pub(crate) fn image_source(src: &str) -> ImageSource {
match src.strip_prefix('@') {
Some(name) => ImageSource::Named(name.to_string()),
None => ImageSource::Path(src.into()),
}
}
fn align_from_word(w: &str) -> Option<Align> {
match w {
"center" | "centre" => Some(Align::Center),
"right" => Some(Align::Right),
"left" => Some(Align::Left),
"justify" => Some(Align::Justify),
_ => None,
}
}
fn is_fence_open(c: &str) -> bool {
c.starts_with(":::") && c.len() > 3 && !c[3..].trim().is_empty()
}
fn gather_div(lines: &[String], i: &mut usize) -> Vec<String> {
*i += 1;
let mut inner = Vec::new();
let mut depth = 1usize;
while *i < lines.len() {
let t = lines[*i].trim();
if t == ":::" {
depth -= 1;
if depth == 0 {
*i += 1;
break; }
} else if is_fence_open(t) {
depth += 1;
}
inner.push(lines[*i].clone());
*i += 1;
}
inner
}
fn parse_columns(inner: &[String]) -> Vec<Column> {
let mut cols = Vec::new();
let mut i = 0;
while i < inner.len() {
let mut parts = inner[i].trim().strip_prefix(":::").unwrap_or("").split_whitespace();
if parts.next() == Some("col") {
let weight =
parts.next().and_then(|s| s.parse::<f32>().ok()).filter(|w| *w > 0.0).unwrap_or(1.0);
let col_lines = gather_div(inner, &mut i);
cols.push(Column { blocks: parse_blocks(&col_lines), weight });
} else {
i += 1;
}
}
cols
}
fn is_table_delim(t: &str) -> bool {
let cells = split_row(t);
!cells.is_empty()
&& cells
.iter()
.all(|c| !c.is_empty() && c.contains('-') && c.bytes().all(|b| b == b'-' || b == b':'))
}
fn split_row(line: &str) -> Vec<String> {
let t = line.trim();
let t = t.strip_prefix('|').unwrap_or(t);
let t = t.strip_suffix('|').unwrap_or(t);
let mut cells = Vec::new();
let mut cur = String::new();
let mut in_code = false;
let mut chars = t.chars();
while let Some(ch) = chars.next() {
match ch {
'`' => {
in_code = !in_code;
cur.push('`');
}
'\\' if !in_code => {
cur.push('\\');
if let Some(n) = chars.next() {
cur.push(n);
}
}
'|' if !in_code => {
cells.push(cur.trim().to_string());
cur = String::new();
}
_ => cur.push(ch),
}
}
cells.push(cur.trim().to_string());
cells
}
fn parse_align_row(line: &str) -> Vec<Align> {
split_row(line)
.iter()
.map(|c| match (c.starts_with(':'), c.ends_with(':')) {
(true, true) => Align::Center,
(false, true) => Align::Right,
_ => Align::Left,
})
.collect()
}
fn parse_table(lines: &[String], start: usize) -> (Table, usize) {
let to_cells = |t: &str| -> Vec<Cell> {
split_row(t).iter().map(|s| Cell { inlines: inline::parse_inlines(s), bg: None }).collect()
};
let header = Some(to_cells(lines[start].trim()));
let cols: Vec<ColSpec> = parse_align_row(lines[start + 1].trim())
.into_iter()
.map(|a| ColSpec { align: a, width: None })
.collect();
let mut rows = Vec::new();
let mut i = start + 2;
while i < lines.len() {
let t = lines[i].trim();
if t.is_empty() || !t.contains('|') {
break;
}
rows.push(to_cells(t));
i += 1;
}
(Table { header, rows, cols, style: TableStyle::default() }, i)
}
fn apply_align(blocks: &mut [Block], align: Align) {
for b in blocks {
match b {
Block::Heading { align: a, .. } | Block::Paragraph { align: a, .. } => *a = align,
Block::Quote(inner) => apply_align(inner, align),
Block::List(list) => {
for it in &mut list.items {
apply_align(&mut it.blocks, align);
}
}
_ => {}
}
}
}
fn split_trailing_attrs(s: &str) -> (String, Align) {
let t = s.trim_end();
if t.ends_with('}') {
if let Some(open) = t.rfind('{') {
let before = &t[..open];
if before.ends_with(' ') || before.is_empty() {
let inside = &t[open + 1..t.len() - 1];
let align = parse_attrs(inside)
.iter()
.find_map(|a| match a {
Attr::Kv(k, v) if k == "align" => align_from_word(v),
Attr::Flag(f) => align_from_word(f),
_ => None,
})
.unwrap_or(Align::Left);
return (before.trim_end().to_string(), align);
}
}
}
(t.to_string(), Align::Left)
}
fn append_soft(buf: &mut String, next: &str) {
if next.is_empty() {
return;
}
if let (Some(a), Some(b)) = (buf.chars().last(), next.chars().next()) {
if a != '\n' && needs_space(a, b) {
buf.push(' ');
}
}
buf.push_str(next);
}
fn needs_space(a: char, b: char) -> bool {
fn cjk(c: char) -> bool {
matches!(c, '\u{2E80}'..='\u{9FFF}' | '\u{FF00}'..='\u{FFEF}')
}
!cjk(a) && !cjk(b)
}