mdream 1.0.7

Fastest HTML-to-Markdown converter. Zero dependencies, streaming support.
Documentation
use std::io::{self, Read, Write};
use mdream::MarkdownStreamProcessor;
use mdream::types::HTMLToMarkdownOptions;

fn main() -> io::Result<()> {
    let args: Vec<String> = std::env::args().collect();
    let mut origin: Option<String> = None;
    let mut verbose = false;
    let mut clean_urls = false;

    let mut i = 1;
    while i < args.len() {
        match args[i].as_str() {
            "--origin" | "-o" => {
                i += 1;
                if i < args.len() {
                    origin = Some(args[i].clone());
                }
            }
            "--verbose" | "-v" => verbose = true,
            "--clean-urls" => clean_urls = true,
            "--help" | "-h" => {
                eprintln!("Usage: mdream [OPTIONS]");
                eprintln!("  Reads HTML from stdin, outputs Markdown to stdout");
                eprintln!();
                eprintln!("Options:");
                eprintln!("  -o, --origin <URL>  Base URL for resolving relative links");
                eprintln!("  -v, --verbose       Print conversion stats to stderr");
                eprintln!("  --clean-urls        Strip tracking query params (utm_*, fbclid, etc.)");
                eprintln!("  -h, --help          Show this help");
                return Ok(());
            }
            other => {
                eprintln!("Unknown option: {other}");
                std::process::exit(1);
            }
        }
        i += 1;
    }

    let options = HTMLToMarkdownOptions {
        origin,
        clean_urls,
        ..Default::default()
    };

    let mut processor = MarkdownStreamProcessor::new(options);
    let stdin = io::stdin();
    let stdout = io::stdout();
    let mut out = stdout.lock();
    let mut buf = [0u8; 8192];
    let mut total_in: usize = 0;
    let mut total_out: usize = 0;

    loop {
        let n = stdin.lock().read(&mut buf)?;
        if n == 0 {
            break;
        }
        total_in += n;
        let chunk = std::str::from_utf8(&buf[..n]).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
        let md = processor.process_chunk(chunk);
        if !md.is_empty() {
            total_out += md.len();
            out.write_all(md.as_bytes())?;
            out.flush()?;
        }
    }

    let remaining = processor.finish();
    if !remaining.is_empty() {
        total_out += remaining.len();
        out.write_all(remaining.as_bytes())?;
    }

    if verbose {
        eprintln!("Input: {total_in} bytes");
        eprintln!("Output: {total_out} bytes");
    }

    Ok(())
}