markdown2pdf 0.4.0

Create PDF with Markdown files (a md to pdf transpiler)
Documentation
//! Reads a markdown file and prints the lexer's AST in a compact
//! one-token-per-line format. Useful for verifying what the lexer
//! produces without going through the PDF renderer (which skips
//! HtmlBlock tokens).
//!
//! Usage:
//!   cargo run --example dump_ast -- path/to/file.md
//!
//! Output format: each top-level token on its own line, with nested
//! token bodies indented. HtmlBlock / HtmlInline / HtmlComment payloads
//! are shown with `\n` made visible so you can verify the verbatim
//! content matches your input.

use markdown2pdf::markdown::{Lexer, Token};
use std::fs;
use std::process;

fn main() {
    let args: Vec<String> = std::env::args().collect();
    if args.len() < 2 {
        eprintln!("usage: dump_ast <markdown-file>");
        process::exit(2);
    }
    let path = &args[1];
    let input = match fs::read_to_string(path) {
        Ok(s) => s,
        Err(e) => {
            eprintln!("failed to read {}: {}", path, e);
            process::exit(1);
        }
    };

    let mut lexer = Lexer::new(input);
    let tokens = match lexer.parse() {
        Ok(t) => t,
        Err(e) => {
            eprintln!("lexer error: {:?}", e);
            process::exit(1);
        }
    };

    println!("=== AST for {} ===\n", path);
    for (i, tok) in tokens.iter().enumerate() {
        println!("[{}] {}", i, summarize(tok, 0));
    }
    println!("\n=== {} top-level tokens ===", tokens.len());

    // Per-kind tally so a quick glance shows whether HTML constructs
    // got picked up as expected.
    let mut html_blocks = 0;
    let mut html_inlines = 0;
    let mut html_comments = 0;
    let mut others = 0;
    fn walk<'a>(
        toks: &'a [Token],
        html_blocks: &mut u32,
        html_inlines: &mut u32,
        html_comments: &mut u32,
        others: &mut u32,
    ) {
        for t in toks {
            match t {
                Token::HtmlBlock(_) => *html_blocks += 1,
                Token::HtmlInline(_) => *html_inlines += 1,
                Token::HtmlComment(_) => *html_comments += 1,
                _ => *others += 1,
            }
            match t {
                Token::Heading(c, _)
                | Token::Emphasis { content: c, .. }
                | Token::StrongEmphasis(c)
                | Token::BlockQuote(c)
                | Token::ListItem { content: c, .. }
                | Token::Link { content: c, .. }
                | Token::Image { alt: c, .. }
                | Token::Strikethrough(c) => {
                    walk(c, html_blocks, html_inlines, html_comments, others);
                }
                Token::Table { headers, rows, .. } => {
                    for h in headers {
                        walk(h, html_blocks, html_inlines, html_comments, others);
                    }
                    for row in rows {
                        for cell in row {
                            walk(cell, html_blocks, html_inlines, html_comments, others);
                        }
                    }
                }
                _ => {}
            }
        }
    }
    walk(
        &tokens,
        &mut html_blocks,
        &mut html_inlines,
        &mut html_comments,
        &mut others,
    );

    println!();
    println!("=== HTML construct tally (recursive) ===");
    println!("  HtmlBlock:   {}", html_blocks);
    println!("  HtmlInline:  {}", html_inlines);
    println!("  HtmlComment: {}", html_comments);
    println!("  Other:       {}", others);
}

fn summarize(tok: &Token, depth: usize) -> String {
    let pad = "  ".repeat(depth);
    match tok {
        Token::Text(s) => format!("Text({})", show(s)),
        Token::Newline => "Newline".to_string(),
        Token::HardBreak => "HardBreak".to_string(),
        Token::HorizontalRule => "HorizontalRule".to_string(),
        Token::DelimRun { ch, count } => format!("DelimRun({}, {})", ch, count),
        Token::Heading(content, level) => {
            let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
            format!(
                "Heading(level={}, [\n{}{}\n{}])",
                level,
                pad,
                inner.join(&format!(",\n{}", pad)),
                "  ".repeat(depth)
            )
        }
        Token::Emphasis { level, content } => {
            let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
            format!("Emphasis(level={}, [{}])", level, inner.join(", "))
        }
        Token::StrongEmphasis(body) => {
            let inner: Vec<String> = body.iter().map(|t| summarize(t, depth + 1)).collect();
            format!("StrongEmphasis([{}])", inner.join(", "))
        }
        Token::Code { language, content, block } => {
            format!(
                "Code(lang={:?}, block={}, content={})",
                language,
                block,
                show(content)
            )
        }
        Token::BlockQuote(body) => {
            let inner: Vec<String> = body.iter().map(|t| summarize(t, depth + 1)).collect();
            format!(
                "BlockQuote([\n{}  {}\n{}])",
                pad,
                inner.join(&format!(",\n{}  ", pad)),
                pad
            )
        }
        Token::ListItem {
            ordered,
            number,
            marker,
            checked,
            loose,
            content,
        } => {
            let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
            format!(
                "ListItem(ordered={}, number={:?}, marker={:?}, checked={:?}, loose={}, [\n{}  {}\n{}])",
                ordered,
                number,
                marker,
                checked,
                loose,
                pad,
                inner.join(&format!(",\n{}  ", pad)),
                pad
            )
        }
        Token::Link { content, url, title } => {
            let inner: Vec<String> = content.iter().map(|t| summarize(t, depth + 1)).collect();
            format!(
                "Link(url={:?}, title={:?}, [{}])",
                url,
                title,
                inner.join(", ")
            )
        }
        Token::Image { alt, url, title } => {
            let inner: Vec<String> = alt.iter().map(|t| summarize(t, depth + 1)).collect();
            format!(
                "Image(url={:?}, title={:?}, alt=[{}])",
                url,
                title,
                inner.join(", ")
            )
        }
        Token::Table { headers, aligns, rows } => {
            format!(
                "Table(cols={}, rows={}, aligns={:?})",
                headers.len(),
                rows.len(),
                aligns
            )
        }
        Token::TableAlignment(a) => format!("TableAlignment({:?})", a),
        Token::Strikethrough(body) => {
            let inner: Vec<String> = body.iter().map(|t| summarize(t, depth + 1)).collect();
            format!("Strikethrough([{}])", inner.join(", "))
        }
        Token::HtmlBlock(s) => format!("\x1b[36mHtmlBlock\x1b[0m({})", show(s)),
        Token::HtmlInline(s) => format!("\x1b[36mHtmlInline\x1b[0m({})", show(s)),
        Token::HtmlComment(s) => format!("\x1b[36mHtmlComment\x1b[0m({})", show(s)),
        Token::Unknown(s) => format!("Unknown({})", show(s)),
    }
}

fn show(s: &str) -> String {
    let escaped: String = s
        .chars()
        .flat_map(|c| match c {
            '\n' => "\\n".chars().collect::<Vec<_>>(),
            '\t' => "\\t".chars().collect::<Vec<_>>(),
            '\r' => "\\r".chars().collect::<Vec<_>>(),
            _ => vec![c],
        })
        .collect();
    format!("\"{}\"", escaped)
}