#![allow(clippy::uninlined_format_args, clippy::field_reassign_with_default)]
use std::path::Path;
use transmutation::converters::DocumentConverter;
use transmutation::converters::pdf::PdfConverter;
use transmutation::types::{ConversionOptions, OutputFormat};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("\n╔════════════════════════════════════════╗");
println!("║ 🚀 Testing Docling FFI Pipeline ║");
println!("╚════════════════════════════════════════╝\n");
let pdf_path = Path::new("data/1706.03762v7.pdf");
if !pdf_path.exists() {
eprintln!("❌ PDF not found: {}", pdf_path.display());
eprintln!("📁 Available PDFs:");
for entry in std::fs::read_dir("data")? {
let entry = entry?;
if entry.path().extension().and_then(|s| s.to_str()) == Some("pdf") {
println!(" - {}", entry.path().display());
}
}
return Ok(());
}
println!("📄 Input PDF: {}", pdf_path.display());
println!("🔧 Features: docling-ffi enabled");
println!("🧠 Pipeline: Full ML + Post-processing\n");
let converter = PdfConverter::new();
let mut options = ConversionOptions::default();
options.use_ffi = true;
let output_format = OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
};
println!("⏳ Converting PDF...\n");
let result = converter.convert(pdf_path, output_format, options).await?;
println!("\n✅ Conversion complete!");
println!("📊 Pages processed: {}", result.statistics.pages_processed);
println!("📊 Output chunks: {}", result.content.len());
let mut full_output = String::new();
for output in &result.content {
full_output.push_str(&String::from_utf8_lossy(&output.data));
full_output.push('\n');
}
println!("📏 Total size: {} bytes", full_output.len());
println!("📏 Lines: {}", full_output.lines().count());
let output_path = "data/output_ffi_test.md";
std::fs::write(output_path, &full_output)?;
println!("\n💾 Saved to: {}", output_path);
println!("\n╔════════════════════════════════════════╗");
println!("║ 📝 First 50 lines of output: ║");
println!("╚════════════════════════════════════════╝\n");
for (i, line) in full_output.lines().take(50).enumerate() {
println!("{:3} | {}", i + 1, line);
}
if full_output.lines().count() > 50 {
println!("\n... ({} more lines)", full_output.lines().count() - 50);
}
Ok(())
}