Skip to main content

bookforge_pdf/
lib.rs

1//! PDF ingestion for BookForge (ROADMAP §9b).
2//!
3//! Layout extraction is delegated to poppler's command-line tools;
4//! everything after the `pdftohtml -xml` output is deterministic Rust:
5//! line merging, column detection, reading order, paragraph clustering,
6//! heading detection, and synthetic-EPUB assembly. The produced EPUB
7//! flows through the ordinary BookForge pipeline — this crate is an
8//! ingestion front-end, not a parallel translation path.
9
10pub mod convert;
11pub mod epub;
12pub mod model;
13pub mod parse;
14pub mod reconstruct;
15pub mod report;
16pub mod tools;
17
18pub use convert::{ConvertOptions, ConvertOutcome, convert_pdf};
19pub use model::{ColumnMode, DocBlock, Line, Page, Span};
20pub use parse::parse_pdf2xml;
21pub use reconstruct::reconstruct;
22pub use report::ConversionReport;
23pub use tools::{PopplerTools, ToolError};
24
25#[derive(Debug, thiserror::Error)]
26pub enum PdfError {
27    #[error("poppler tooling: {0}")]
28    Tool(#[from] tools::ToolError),
29
30    #[error("pdftohtml XML parse: {0}")]
31    Xml(#[from] quick_xml::Error),
32
33    #[error("invalid pdftohtml output: {0}")]
34    InvalidInput(String),
35
36    #[error(transparent)]
37    Io(#[from] std::io::Error),
38
39    #[error("zip: {0}")]
40    Zip(#[from] zip::result::ZipError),
41}
42
43pub type Result<T> = std::result::Result<T, PdfError>;