1use serde::Serialize;
5
6use crate::reconstruct::PageStats;
7
8#[derive(Debug, Serialize)]
9pub struct ConversionReport {
10 pub input: String,
11 pub output: String,
12 pub pages: usize,
13 pub blocks: usize,
14 pub reconstructed_chars: usize,
16 pub baseline_chars: usize,
18 pub coverage_percent: f64,
22 pub two_column_pages: usize,
23 pub page_stats: Vec<PageStats>,
24 pub warnings: Vec<String>,
25}
26
27impl ConversionReport {
28 pub fn build(
29 input: &str,
30 output: &str,
31 page_stats: Vec<PageStats>,
32 blocks: usize,
33 reconstructed_chars: usize,
34 baseline_chars: usize,
35 ) -> Self {
36 let coverage_percent = if baseline_chars == 0 {
37 100.0
38 } else {
39 (reconstructed_chars as f64 / baseline_chars as f64 * 100.0).min(100.0)
40 };
41
42 let mut warnings = Vec::new();
43 if coverage_percent < 95.0 {
44 warnings.push(format!(
45 "reconstructed text covers only {coverage_percent:.1}% of the pdftotext baseline; some content was not captured"
46 ));
47 }
48 for page in &page_stats {
49 if page.chars == 0 {
50 warnings.push(format!(
51 "page {}: no text reconstructed (image-only page, or extraction failure)",
52 page.page
53 ));
54 }
55 }
56
57 Self {
58 input: input.to_string(),
59 output: output.to_string(),
60 pages: page_stats.len(),
61 blocks,
62 reconstructed_chars,
63 baseline_chars,
64 coverage_percent,
65 two_column_pages: page_stats.iter().filter(|page| page.two_column).count(),
66 page_stats,
67 warnings,
68 }
69 }
70
71 pub fn summary(&self) -> String {
72 let mut out = format!(
73 "Pages: {}\nBlocks: {}\nTwo-column pages: {}\nText coverage vs pdftotext: {:.1}% ({} reconstructed / {} baseline characters)\n",
74 self.pages,
75 self.blocks,
76 self.two_column_pages,
77 self.coverage_percent,
78 self.reconstructed_chars,
79 self.baseline_chars,
80 );
81 if self.warnings.is_empty() {
82 out.push_str("Warnings: none\n");
83 } else {
84 out.push_str("Warnings:\n");
85 for warning in &self.warnings {
86 out.push_str(&format!(" - {warning}\n"));
87 }
88 }
89 out
90 }
91}
92
93#[cfg(test)]
94mod tests {
95 use super::*;
96
97 #[test]
98 fn summary_does_not_describe_over_baseline_reconstruction_as_of_total() {
99 let report = ConversionReport::build("in.pdf", "out.epub", Vec::new(), 1, 101, 100);
100
101 assert_eq!(report.coverage_percent, 100.0);
102 assert!(
103 report
104 .summary()
105 .contains("101 reconstructed / 100 baseline characters")
106 );
107 assert!(!report.summary().contains("101 of 100"));
108 }
109}