use markdown2pdf::markdown::{Lexer, Token};
use std::fs;
use std::process;
fn main() {
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("usage: dump_ast <markdown-file>");
process::exit(2);
}
let path = &args[1];
let input = match fs::read_to_string(path) {
Ok(s) => s,
Err(e) => {
eprintln!("failed to read {}: {}", path, e);
process::exit(1);
}
};
let mut lexer = Lexer::new(input);
let tokens = match lexer.parse() {
Ok(t) => t,
Err(e) => {
eprintln!("lexer error: {:?}", e);
process::exit(1);
}
};
println!("=== AST for {} ===\n", path);
for (i, tok) in tokens.iter().enumerate() {
println!("[{}] {}", i, summarize(tok, 0));
}
println!("\n=== {} top-level tokens ===", tokens.len());
let mut html_blocks = 0;
let mut html_inlines = 0;
let mut html_comments = 0;
let mut others = 0;
fn walk<'a>(
toks: &'a [Token],
html_blocks: &mut u32,
html_inlines: &mut u32,
html_comments: &mut u32,
others: &mut u32,
) {
for t in toks {
match t {
Token::HtmlBlock(_) => *html_blocks += 1,
Token::HtmlInline(_) => *html_inlines += 1,
Token::HtmlComment(_) => *html_comments += 1,
_ => *others += 1,
}
match t {
Token::Heading(c, _)
| Token::Emphasis { content: c, .. }
| Token::StrongEmphasis(c)
| Token::BlockQuote(c)
| Token::ListItem { content: c, .. }
| Token::Link { content: c, .. }
| Token::Image { alt: c, .. }
| Token::Strikethrough(c) => {
walk(c, html_blocks, html_inlines, html_comments, others);
}
Token::Table { headers, rows, .. } => {
for h in headers {
walk(h, html_blocks, html_inlines, html_comments, others);
}
for row in rows {
for cell in row {
walk(cell, html_blocks, html_inlines, html_comments, others);
}
}
}
_ => {}
}
}
}
walk(
&tokens,
&mut html_blocks,
&mut html_inlines,
&mut html_comments,
&mut others,
);
println!();
println!("=== HTML construct tally (recursive) ===");
println!(" HtmlBlock: {}", html_blocks);
println!(" HtmlInline: {}", html_inlines);
println!(" HtmlComment: {}", html_comments);
println!(" Other: {}", others);
}
fn summarize(tok: &Token, depth: usize) -> String {
let pad = " ".repeat(depth);
match tok {
Token::Text(s) => format!("Text({})", show(s)),
Token::Newline => "Newline".to_string(),
Token::HardBreak => "HardBreak".to_string(),
Token::HorizontalRule => "HorizontalRule".to_string(),
Token::DelimRun { ch, count } => format!("DelimRun({}, {})", ch, count),
Token::Heading(content, level) => {
let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
format!(
"Heading(level={}, [\n{}{}\n{}])",
level,
pad,
inner.join(&format!(",\n{}", pad)),
" ".repeat(depth)
)
}
Token::Emphasis { level, content } => {
let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
format!("Emphasis(level={}, [{}])", level, inner.join(", "))
}
Token::StrongEmphasis(body) => {
let inner: Vec<String> = body.iter().map(|t| summarize(t, depth + 1)).collect();
format!("StrongEmphasis([{}])", inner.join(", "))
}
Token::Code { language, content, block } => {
format!(
"Code(lang={:?}, block={}, content={})",
language,
block,
show(content)
)
}
Token::BlockQuote(body) => {
let inner: Vec<String> = body.iter().map(|t| summarize(t, depth + 1)).collect();
format!(
"BlockQuote([\n{} {}\n{}])",
pad,
inner.join(&format!(",\n{} ", pad)),
pad
)
}
Token::ListItem {
ordered,
number,
marker,
checked,
loose,
content,
} => {
let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
format!(
"ListItem(ordered={}, number={:?}, marker={:?}, checked={:?}, loose={}, [\n{} {}\n{}])",
ordered,
number,
marker,
checked,
loose,
pad,
inner.join(&format!(",\n{} ", pad)),
pad
)
}
Token::Link { content, url, title } => {
let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
format!(
"Link(url={:?}, title={:?}, [{}])",
url,
title,
inner.join(", ")
)
}
Token::Image { alt, url, title } => {
let inner: Vec<String> = alt.iter().map(|t| summarize(t, depth + 1)).collect();
format!(
"Image(url={:?}, title={:?}, alt=[{}])",
url,
title,
inner.join(", ")
)
}
Token::Table { headers, aligns, rows } => {
format!(
"Table(cols={}, rows={}, aligns={:?})",
headers.len(),
rows.len(),
aligns
)
}
Token::TableAlignment(a) => format!("TableAlignment({:?})", a),
Token::Strikethrough(body) => {
let inner: Vec<String> = body.iter().map(|t| summarize(t, depth + 1)).collect();
format!("Strikethrough([{}])", inner.join(", "))
}
Token::HtmlBlock(s) => format!("\x1b[36mHtmlBlock\x1b[0m({})", show(s)),
Token::HtmlInline(s) => format!("\x1b[36mHtmlInline\x1b[0m({})", show(s)),
Token::HtmlComment(s) => format!("\x1b[36mHtmlComment\x1b[0m({})", show(s)),
Token::Unknown(s) => format!("Unknown({})", show(s)),
}
}
fn show(s: &str) -> String {
let escaped: String = s
.chars()
.flat_map(|c| match c {
'\n' => "\\n".chars().collect::<Vec<_>>(),
'\t' => "\\t".chars().collect::<Vec<_>>(),
'\r' => "\\r".chars().collect::<Vec<_>>(),
_ => vec![c],
})
.collect();
format!("\"{}\"", escaped)
}