Skip to main content

bookforge_pdf/
convert.rs

1//! End-to-end conversion orchestration: poppler → parse → reconstruct →
2//! EPUB + report.
3
4use std::path::{Path, PathBuf};
5
6use crate::{
7    Result,
8    epub::write_epub,
9    model::{ColumnMode, DocBlock},
10    parse::parse_pdf2xml,
11    reconstruct::reconstruct,
12    report::ConversionReport,
13    tools::PopplerTools,
14};
15
16#[derive(Debug, Clone)]
17pub struct ConvertOptions {
18    pub columns: ColumnMode,
19    /// dc:language for the produced EPUB (source language of the PDF).
20    pub language: String,
21    /// dc:title; defaults to the input file stem when empty.
22    pub title: String,
23}
24
25impl Default for ConvertOptions {
26    fn default() -> Self {
27        Self {
28            columns: ColumnMode::Auto,
29            language: "en".to_string(),
30            title: String::new(),
31        }
32    }
33}
34
35pub struct ConvertOutcome {
36    pub output: PathBuf,
37    pub report: ConversionReport,
38}
39
40pub fn convert_pdf(
41    input: &Path,
42    output: &Path,
43    options: &ConvertOptions,
44) -> Result<ConvertOutcome> {
45    let tools = PopplerTools::discover()?;
46
47    let xml = tools.pdf_to_xml(input)?;
48    let pages = parse_pdf2xml(&xml)?;
49    let reconstruction = reconstruct(&pages, options.columns);
50
51    let title = if options.title.is_empty() {
52        input
53            .file_stem()
54            .map(|stem| stem.to_string_lossy().into_owned())
55            .unwrap_or_else(|| "Converted PDF".to_string())
56    } else {
57        options.title.clone()
58    };
59    write_epub(&reconstruction.blocks, &title, &options.language, output)?;
60
61    let baseline = tools.pdf_to_text(input)?;
62    let baseline_chars = baseline.chars().filter(|ch| !ch.is_whitespace()).count();
63    let reconstructed_chars: usize = reconstruction.blocks.iter().map(DocBlock::char_count).sum();
64
65    let report = ConversionReport::build(
66        &input.to_string_lossy(),
67        &output.to_string_lossy(),
68        reconstruction.pages,
69        reconstruction.blocks.len(),
70        reconstructed_chars,
71        baseline_chars,
72    );
73
74    Ok(ConvertOutcome {
75        output: output.to_path_buf(),
76        report,
77    })
78}