1use crate::error::Result;
7use crate::verification::parser::{parse_pdf, ParsedPdf};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub struct PdfDifference {
13 pub location: String,
14 pub expected: String,
15 pub actual: String,
16 pub severity: DifferenceSeverity,
17}
18
19#[derive(Debug, Clone, PartialEq)]
20pub enum DifferenceSeverity {
21 Critical,
23 Important,
25 Minor,
27 Cosmetic,
29}
30
31#[derive(Debug, Clone)]
33pub struct ComparisonResult {
34 pub structurally_equivalent: bool,
35 pub content_equivalent: bool,
36 pub differences: Vec<PdfDifference>,
37 pub similarity_score: f64, }
39
40pub fn compare_pdfs(generated: &[u8], reference: &[u8]) -> Result<ComparisonResult> {
42 let parsed_generated = parse_pdf(generated)?;
43 let parsed_reference = parse_pdf(reference)?;
44
45 let differences = find_differences(&parsed_generated, &parsed_reference);
46 let similarity_score = calculate_similarity_score(&differences);
47
48 let structurally_equivalent = differences.iter().all(|diff| {
49 diff.severity == DifferenceSeverity::Cosmetic || diff.severity == DifferenceSeverity::Minor
50 });
51
52 let content_equivalent = differences
53 .iter()
54 .all(|diff| diff.severity == DifferenceSeverity::Cosmetic);
55
56 Ok(ComparisonResult {
57 structurally_equivalent,
58 content_equivalent,
59 differences,
60 similarity_score,
61 })
62}
63
64fn find_differences(generated: &ParsedPdf, reference: &ParsedPdf) -> Vec<PdfDifference> {
66 let mut differences = Vec::new();
67
68 if generated.version != reference.version {
70 let severity = if generated.version.chars().next() != reference.version.chars().next() {
71 DifferenceSeverity::Important
72 } else {
73 DifferenceSeverity::Minor
74 };
75
76 differences.push(PdfDifference {
77 location: "PDF Version".to_string(),
78 expected: reference.version.clone(),
79 actual: generated.version.clone(),
80 severity,
81 });
82 }
83
84 differences.extend(compare_catalogs(&generated.catalog, &reference.catalog));
86
87 differences.extend(compare_page_trees(
89 &generated.page_tree,
90 &reference.page_tree,
91 ));
92
93 differences.extend(compare_fonts(&generated.fonts, &reference.fonts));
95
96 differences.extend(compare_color_spaces(generated, reference));
98
99 differences.extend(compare_graphics_states(
101 &generated.graphics_states,
102 &reference.graphics_states,
103 ));
104
105 differences.extend(compare_text_objects(
107 &generated.text_objects,
108 &reference.text_objects,
109 ));
110
111 differences.extend(compare_annotations(
113 &generated.annotations,
114 &reference.annotations,
115 ));
116
117 if generated.xref_valid != reference.xref_valid {
119 differences.push(PdfDifference {
120 location: "Cross-reference table".to_string(),
121 expected: reference.xref_valid.to_string(),
122 actual: generated.xref_valid.to_string(),
123 severity: DifferenceSeverity::Critical,
124 });
125 }
126
127 differences
128}
129
130fn compare_catalogs(
132 generated: &Option<HashMap<String, String>>,
133 reference: &Option<HashMap<String, String>>,
134) -> Vec<PdfDifference> {
135 let mut differences = Vec::new();
136
137 match (generated, reference) {
138 (Some(gen_catalog), Some(ref_catalog)) => {
139 for key in ["Type", "Pages"] {
141 match (gen_catalog.get(key), ref_catalog.get(key)) {
142 (Some(gen_val), Some(ref_val)) => {
143 if gen_val != ref_val {
144 differences.push(PdfDifference {
145 location: format!("Catalog/{}", key),
146 expected: ref_val.clone(),
147 actual: gen_val.clone(),
148 severity: DifferenceSeverity::Critical,
149 });
150 }
151 }
152 (None, Some(ref_val)) => {
153 differences.push(PdfDifference {
154 location: format!("Catalog/{}", key),
155 expected: ref_val.clone(),
156 actual: "missing".to_string(),
157 severity: DifferenceSeverity::Critical,
158 });
159 }
160 (Some(gen_val), None) => {
161 differences.push(PdfDifference {
162 location: format!("Catalog/{}", key),
163 expected: "missing".to_string(),
164 actual: gen_val.clone(),
165 severity: DifferenceSeverity::Minor,
166 });
167 }
168 (None, None) => {} }
170 }
171 }
172 (None, Some(_)) => {
173 differences.push(PdfDifference {
174 location: "Document Catalog".to_string(),
175 expected: "present".to_string(),
176 actual: "missing".to_string(),
177 severity: DifferenceSeverity::Critical,
178 });
179 }
180 (Some(_), None) => {
181 differences.push(PdfDifference {
182 location: "Document Catalog".to_string(),
183 expected: "missing".to_string(),
184 actual: "present".to_string(),
185 severity: DifferenceSeverity::Minor,
186 });
187 }
188 (None, None) => {
189 differences.push(PdfDifference {
190 location: "Document Catalog".to_string(),
191 expected: "present".to_string(),
192 actual: "missing".to_string(),
193 severity: DifferenceSeverity::Critical,
194 });
195 }
196 }
197
198 differences
199}
200
201fn compare_page_trees(
203 generated: &Option<crate::verification::parser::PageTree>,
204 reference: &Option<crate::verification::parser::PageTree>,
205) -> Vec<PdfDifference> {
206 let mut differences = Vec::new();
207
208 match (generated, reference) {
209 (Some(gen_tree), Some(ref_tree)) => {
210 if gen_tree.page_count != ref_tree.page_count {
211 differences.push(PdfDifference {
212 location: "Page Tree/Count".to_string(),
213 expected: ref_tree.page_count.to_string(),
214 actual: gen_tree.page_count.to_string(),
215 severity: DifferenceSeverity::Critical,
216 });
217 }
218
219 if gen_tree.root_type != ref_tree.root_type {
220 differences.push(PdfDifference {
221 location: "Page Tree/Type".to_string(),
222 expected: ref_tree.root_type.clone(),
223 actual: gen_tree.root_type.clone(),
224 severity: DifferenceSeverity::Critical,
225 });
226 }
227 }
228 (None, Some(_)) => {
229 differences.push(PdfDifference {
230 location: "Page Tree".to_string(),
231 expected: "present".to_string(),
232 actual: "missing".to_string(),
233 severity: DifferenceSeverity::Critical,
234 });
235 }
236 (Some(_), None) => {
237 differences.push(PdfDifference {
238 location: "Page Tree".to_string(),
239 expected: "missing".to_string(),
240 actual: "present".to_string(),
241 severity: DifferenceSeverity::Minor,
242 });
243 }
244 (None, None) => {} }
246
247 differences
248}
249
250fn compare_fonts(generated: &[String], reference: &[String]) -> Vec<PdfDifference> {
252 let mut differences = Vec::new();
253
254 for ref_font in reference {
256 if !generated.contains(ref_font) {
257 differences.push(PdfDifference {
258 location: format!("Fonts/{}", ref_font),
259 expected: "present".to_string(),
260 actual: "missing".to_string(),
261 severity: DifferenceSeverity::Important,
262 });
263 }
264 }
265
266 for gen_font in generated {
268 if !reference.contains(gen_font) {
269 differences.push(PdfDifference {
270 location: format!("Fonts/{}", gen_font),
271 expected: "missing".to_string(),
272 actual: "present".to_string(),
273 severity: DifferenceSeverity::Minor,
274 });
275 }
276 }
277
278 differences
279}
280
281fn compare_color_spaces(generated: &ParsedPdf, reference: &ParsedPdf) -> Vec<PdfDifference> {
283 let mut differences = Vec::new();
284
285 if generated.uses_device_rgb != reference.uses_device_rgb {
286 differences.push(PdfDifference {
287 location: "Color Spaces/DeviceRGB".to_string(),
288 expected: reference.uses_device_rgb.to_string(),
289 actual: generated.uses_device_rgb.to_string(),
290 severity: DifferenceSeverity::Important,
291 });
292 }
293
294 if generated.uses_device_cmyk != reference.uses_device_cmyk {
295 differences.push(PdfDifference {
296 location: "Color Spaces/DeviceCMYK".to_string(),
297 expected: reference.uses_device_cmyk.to_string(),
298 actual: generated.uses_device_cmyk.to_string(),
299 severity: DifferenceSeverity::Important,
300 });
301 }
302
303 if generated.uses_device_gray != reference.uses_device_gray {
304 differences.push(PdfDifference {
305 location: "Color Spaces/DeviceGray".to_string(),
306 expected: reference.uses_device_gray.to_string(),
307 actual: generated.uses_device_gray.to_string(),
308 severity: DifferenceSeverity::Important,
309 });
310 }
311
312 differences
313}
314
315fn compare_graphics_states(
317 generated: &[crate::verification::parser::GraphicsState],
318 reference: &[crate::verification::parser::GraphicsState],
319) -> Vec<PdfDifference> {
320 let mut differences = Vec::new();
321
322 if generated.len() != reference.len() {
323 differences.push(PdfDifference {
324 location: "Graphics States/Count".to_string(),
325 expected: reference.len().to_string(),
326 actual: generated.len().to_string(),
327 severity: DifferenceSeverity::Important,
328 });
329 }
330
331 let min_len = generated.len().min(reference.len());
333 for i in 0..min_len.min(3) {
334 let gen_state = &generated[i];
336 let ref_state = &reference[i];
337
338 if gen_state.line_width != ref_state.line_width {
339 differences.push(PdfDifference {
340 location: format!("Graphics State {}/LineWidth", i),
341 expected: format!("{:?}", ref_state.line_width),
342 actual: format!("{:?}", gen_state.line_width),
343 severity: DifferenceSeverity::Minor,
344 });
345 }
346 }
347
348 differences
349}
350
351fn compare_text_objects(
353 generated: &[crate::verification::parser::TextObject],
354 reference: &[crate::verification::parser::TextObject],
355) -> Vec<PdfDifference> {
356 let mut differences = Vec::new();
357
358 if generated.len() != reference.len() {
359 differences.push(PdfDifference {
360 location: "Text Objects/Count".to_string(),
361 expected: reference.len().to_string(),
362 actual: generated.len().to_string(),
363 severity: DifferenceSeverity::Important,
364 });
365 }
366
367 let min_len = generated.len().min(reference.len());
369 for i in 0..min_len {
370 let gen_text = &generated[i];
371 let ref_text = &reference[i];
372
373 if gen_text.text_content != ref_text.text_content {
374 differences.push(PdfDifference {
375 location: format!("Text Object {}/Content", i),
376 expected: ref_text.text_content.clone(),
377 actual: gen_text.text_content.clone(),
378 severity: DifferenceSeverity::Important,
379 });
380 }
381 }
382
383 differences
384}
385
386fn compare_annotations(
388 generated: &[crate::verification::parser::Annotation],
389 reference: &[crate::verification::parser::Annotation],
390) -> Vec<PdfDifference> {
391 let mut differences = Vec::new();
392
393 if generated.len() != reference.len() {
394 differences.push(PdfDifference {
395 location: "Annotations/Count".to_string(),
396 expected: reference.len().to_string(),
397 actual: generated.len().to_string(),
398 severity: DifferenceSeverity::Important,
399 });
400 }
401
402 differences
403}
404
405fn calculate_similarity_score(differences: &[PdfDifference]) -> f64 {
407 if differences.is_empty() {
408 return 1.0;
409 }
410
411 let mut penalty = 0.0;
412 for diff in differences {
413 penalty += match diff.severity {
414 DifferenceSeverity::Critical => 0.3,
415 DifferenceSeverity::Important => 0.1,
416 DifferenceSeverity::Minor => 0.05,
417 DifferenceSeverity::Cosmetic => 0.01,
418 };
419 }
420
421 (1.0f64 - penalty).max(0.0)
422}
423
424pub fn pdfs_structurally_equivalent(generated: &[u8], reference: &[u8]) -> bool {
426 match compare_pdfs(generated, reference) {
427 Ok(result) => result.structurally_equivalent,
428 Err(_) => false,
429 }
430}
431
432pub fn extract_pdf_differences(generated: &[u8], reference: &[u8]) -> Result<Vec<PdfDifference>> {
434 let result = compare_pdfs(generated, reference)?;
435 Ok(result.differences)
436}
437
438#[cfg(test)]
439mod tests {
440 use super::*;
441
442 fn create_test_pdf(version: &str, catalog_type: &str) -> Vec<u8> {
443 format!(
444 "%PDF-{}\n1 0 obj\n<<\n/Type /{}\n>>\nendobj\n%%EOF",
445 version, catalog_type
446 )
447 .into_bytes()
448 }
449
450 #[test]
451 fn test_identical_pdfs() {
452 let pdf1 = create_test_pdf("1.4", "Catalog");
453 let pdf2 = create_test_pdf("1.4", "Catalog");
454
455 let result = compare_pdfs(&pdf1, &pdf2).unwrap();
456 assert!(result.content_equivalent);
457 assert_eq!(result.similarity_score, 1.0);
458 }
459
460 #[test]
461 fn test_version_difference() {
462 let pdf1 = create_test_pdf("1.4", "Catalog");
463 let pdf2 = create_test_pdf("1.7", "Catalog");
464
465 let result = compare_pdfs(&pdf1, &pdf2).unwrap();
466 assert!(!result.content_equivalent);
467 assert!(result.similarity_score < 1.0);
468 assert!(result
469 .differences
470 .iter()
471 .any(|d| d.location == "PDF Version"));
472 }
473
474 #[test]
475 fn test_structural_difference() {
476 let pdf1 = create_test_pdf("1.4", "Catalog");
477 let pdf2 = create_test_pdf("1.7", "Catalog"); let result = compare_pdfs(&pdf1, &pdf2).unwrap();
480
481 assert!(result.structurally_equivalent);
483 assert!(!result.differences.is_empty()); assert!(result
487 .differences
488 .iter()
489 .any(|d| d.location == "PDF Version"));
490 }
491
492 #[test]
493 fn test_calculate_similarity_score() {
494 let differences = vec![PdfDifference {
495 location: "test".to_string(),
496 expected: "a".to_string(),
497 actual: "b".to_string(),
498 severity: DifferenceSeverity::Critical,
499 }];
500
501 let score = calculate_similarity_score(&differences);
502 assert_eq!(score, 0.7); }
504}