pdf_oxide 0.3.24

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
// Convert PDF pages to Markdown, HTML, and plain text files.
// Run: dotnet run -- document.pdf

using PdfOxide.Core;

if (args.Length < 1)
{
    Console.Error.WriteLine("Usage: dotnet run -- <file.pdf>");
    return 1;
}

var path = args[0];
using var doc = PdfDocument.Open(path);

Directory.CreateDirectory("output");
var pages = doc.PageCount;
Console.WriteLine($"Converting {pages} pages from {path}...");

for (int i = 0; i < pages; i++)
{
    var n = i + 1;
    File.WriteAllText($"output/page_{n}.md", doc.ToMarkdown(i));
    Console.WriteLine($"Saved: output/page_{n}.md");

    File.WriteAllText($"output/page_{n}.html", doc.ToHtml(i));
    Console.WriteLine($"Saved: output/page_{n}.html");

    File.WriteAllText($"output/page_{n}.txt", doc.ExtractText(i));
    Console.WriteLine($"Saved: output/page_{n}.txt");
}

Console.WriteLine("Done. Files written to output/");
return 0;