1use std::path::{Path, PathBuf};
5
6use crate::{
7 Result,
8 epub::write_epub,
9 model::{ColumnMode, DocBlock},
10 parse::parse_pdf2xml,
11 reconstruct::reconstruct,
12 report::ConversionReport,
13 tools::PopplerTools,
14};
15
16#[derive(Debug, Clone)]
17pub struct ConvertOptions {
18 pub columns: ColumnMode,
19 pub language: String,
21 pub title: String,
23}
24
25impl Default for ConvertOptions {
26 fn default() -> Self {
27 Self {
28 columns: ColumnMode::Auto,
29 language: "en".to_string(),
30 title: String::new(),
31 }
32 }
33}
34
35pub struct ConvertOutcome {
36 pub output: PathBuf,
37 pub report: ConversionReport,
38}
39
40pub fn convert_pdf(
41 input: &Path,
42 output: &Path,
43 options: &ConvertOptions,
44) -> Result<ConvertOutcome> {
45 let tools = PopplerTools::discover()?;
46
47 let xml = tools.pdf_to_xml(input)?;
48 let pages = parse_pdf2xml(&xml)?;
49 let reconstruction = reconstruct(&pages, options.columns);
50
51 let title = if options.title.is_empty() {
52 input
53 .file_stem()
54 .map(|stem| stem.to_string_lossy().into_owned())
55 .unwrap_or_else(|| "Converted PDF".to_string())
56 } else {
57 options.title.clone()
58 };
59 write_epub(&reconstruction.blocks, &title, &options.language, output)?;
60
61 let baseline = tools.pdf_to_text(input)?;
62 let baseline_chars = baseline.chars().filter(|ch| !ch.is_whitespace()).count();
63 let reconstructed_chars: usize = reconstruction.blocks.iter().map(DocBlock::char_count).sum();
64
65 let report = ConversionReport::build(
66 &input.to_string_lossy(),
67 &output.to_string_lossy(),
68 reconstruction.pages,
69 reconstruction.blocks.len(),
70 reconstructed_chars,
71 baseline_chars,
72 );
73
74 Ok(ConvertOutcome {
75 output: output.to_path_buf(),
76 report,
77 })
78}