use crate::ast::node::TextSpan;
use super::inline::parse_inline_markdown;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ListKind {
Unordered,
Ordered,
}
#[derive(Debug, Clone, PartialEq)]
pub enum MdBlock {
Heading { level: u8, spans: Vec<TextSpan> },
Paragraph { spans: Vec<TextSpan> },
Blockquote { spans: Vec<TextSpan> },
ListItem {
kind: ListKind,
depth: u32,
ordinal: Option<u32>,
spans: Vec<TextSpan>,
},
CodeBlock {
lang: Option<String>,
content: String,
},
HorizontalRule,
}
#[derive(Debug)]
enum Open {
None,
Paragraph(Vec<String>),
Blockquote(Vec<String>),
Code {
lang: Option<String>,
lines: Vec<String>,
},
}
pub fn parse_block_markdown(input: &str) -> Vec<MdBlock> {
let mut out: Vec<MdBlock> = Vec::new();
let mut open: Open = Open::None;
for raw_line in input.split('\n') {
let line = raw_line.trim_end_matches('\r');
if let Open::Code { lang, lines } = &mut open {
if line.trim() == "```" {
let content = lines.join("\n");
let lang_out = lang.take();
out.push(MdBlock::CodeBlock {
lang: lang_out,
content,
});
open = Open::None;
} else {
lines.push(line.to_owned());
}
continue;
}
if line.trim().is_empty() {
flush(&mut open, &mut out);
continue;
}
let line_trimmed_start = line.trim_start();
if line_trimmed_start.starts_with("```") {
flush(&mut open, &mut out);
let after_backticks = line_trimmed_start.get(3..).unwrap_or("").trim();
let lang = if after_backticks.is_empty() {
None
} else {
Some(after_backticks.to_owned())
};
open = Open::Code {
lang,
lines: Vec::new(),
};
continue;
}
if is_horizontal_rule(line) {
flush(&mut open, &mut out);
out.push(MdBlock::HorizontalRule);
continue;
}
if let Some((level, text)) = parse_atx_heading(line) {
flush(&mut open, &mut out);
let spans = parse_inline_markdown(text);
out.push(MdBlock::Heading { level, spans });
continue;
}
if let Some(inner) = strip_blockquote_prefix(line) {
match &mut open {
Open::Blockquote(lines) => {
lines.push(inner.to_owned());
}
Open::None | Open::Paragraph(_) | Open::Code { .. } => {
flush(&mut open, &mut out);
open = Open::Blockquote(vec![inner.to_owned()]);
}
}
continue;
}
if matches!(&open, Open::Blockquote(_)) {
flush(&mut open, &mut out);
}
if let Some(item) = parse_list_item(line) {
flush(&mut open, &mut out);
let spans = parse_inline_markdown(item.text);
out.push(MdBlock::ListItem {
kind: item.kind,
depth: item.depth,
ordinal: item.ordinal,
spans,
});
continue;
}
match &mut open {
Open::Paragraph(lines) => {
lines.push(line.to_owned());
}
Open::None | Open::Blockquote(_) | Open::Code { .. } => {
open = Open::Paragraph(vec![line.to_owned()]);
}
}
}
flush(&mut open, &mut out);
out
}
fn flush(open: &mut Open, out: &mut Vec<MdBlock>) {
let done = std::mem::replace(open, Open::None);
match done {
Open::None => {}
Open::Paragraph(lines) => {
if lines.is_empty() {
return;
}
let text = lines.join(" ");
let spans = parse_inline_markdown(&text);
out.push(MdBlock::Paragraph { spans });
}
Open::Blockquote(lines) => {
if lines.is_empty() {
return;
}
let text = lines.join(" ");
let spans = parse_inline_markdown(&text);
out.push(MdBlock::Blockquote { spans });
}
Open::Code { lang, lines } => {
let content = lines.join("\n");
out.push(MdBlock::CodeBlock { lang, content });
}
}
}
fn is_horizontal_rule(line: &str) -> bool {
let trimmed = line.trim();
let ch = match trimmed.chars().next() {
Some(c) if matches!(c, '-' | '*' | '_') => c,
_ => return false,
};
let mut count = 0u32;
for c in trimmed.chars() {
if c == ch {
count += 1;
} else if c == ' ' {
} else {
return false;
}
}
count >= 3
}
fn parse_atx_heading(line: &str) -> Option<(u8, &str)> {
let s = line.trim_start();
let hash_count = s.bytes().take_while(|&b| b == b'#').count();
if hash_count == 0 || hash_count > 6 {
return None;
}
let rest = s.get(hash_count..)?;
let inner = if rest.is_empty() {
""
} else if rest.starts_with(' ') {
rest.get(1..).unwrap_or("")
} else {
return None;
};
let inner = inner.trim_end();
let stripped = inner.trim_end_matches('#').trim_end();
let text = if stripped.len() < inner.len() {
stripped
} else {
inner
};
Some((hash_count as u8, text))
}
fn strip_blockquote_prefix(line: &str) -> Option<&str> {
let s = line.trim_start();
if !s.starts_with('>') {
return None;
}
let after = s.get(1..).unwrap_or("");
Some(if after.starts_with(' ') {
after.get(1..).unwrap_or("")
} else {
after
})
}
struct ListItemData<'a> {
kind: ListKind,
depth: u32,
ordinal: Option<u32>,
text: &'a str,
}
fn parse_list_item(line: &str) -> Option<ListItemData<'_>> {
let leading_spaces = line.count_ascii_lead_spaces();
let depth = (leading_spaces / 2) as u32;
let s = line.trim_start();
if let Some(first) = s.chars().next() {
if matches!(first, '-' | '*' | '+') {
let rest = s.get(1..).unwrap_or("");
if rest.starts_with(' ') {
let text = rest.get(1..).unwrap_or("").trim_end();
return Some(ListItemData {
kind: ListKind::Unordered,
depth,
ordinal: None,
text,
});
}
}
}
let digit_end = s.bytes().take_while(|b| b.is_ascii_digit()).count();
if digit_end > 0 {
let after_digits = s.get(digit_end..)?;
if after_digits.starts_with(". ") {
let ordinal_str = s.get(..digit_end)?;
let ordinal: u32 = ordinal_str.parse().ok()?;
let text = after_digits.get(2..).unwrap_or("").trim_end();
return Some(ListItemData {
kind: ListKind::Ordered,
depth,
ordinal: Some(ordinal),
text,
});
}
}
None
}
trait CountAsciiLeadSpaces {
fn count_ascii_lead_spaces(&self) -> usize;
}
impl CountAsciiLeadSpaces for str {
fn count_ascii_lead_spaces(&self) -> usize {
self.bytes().take_while(|&b| b == b' ').count()
}
}