The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
// Convert PDF pages to Markdown, HTML, and plain text files.
// Run: dotnet run -- document.pdf
usingPdfOxide.Core;if(args.Length<1){Console.Error.WriteLine("Usage: dotnet run -- <file.pdf>");return1;}varpath=args[0];usingvardoc=PdfDocument.Open(path);Directory.CreateDirectory("output");varpages=doc.PageCount;Console.WriteLine($"Converting {pages} pages from {path}...");for(inti=0;i<pages;i++){varn=i+1;File.WriteAllText($"output/page_{n}.md",doc.ToMarkdown(i));Console.WriteLine($"Saved: output/page_{n}.md");File.WriteAllText($"output/page_{n}.html",doc.ToHtml(i));Console.WriteLine($"Saved: output/page_{n}.html");File.WriteAllText($"output/page_{n}.txt",doc.ExtractText(i));Console.WriteLine($"Saved: output/page_{n}.txt");}Console.WriteLine("Done. Files written to output/");return0;