Skip to main content

oxidize_pdf/verification/
mod.rs

1//! PDF Verification Module
2//!
3//! This module provides REAL verification of generated PDFs against ISO 32000-1:2008
4//! standards. Unlike superficial tests, this module:
5//!
6//! 1. Parses the actual PDF bytes generated
7//! 2. Verifies internal object structure
8//! 3. Validates with external tools (qpdf, veraPDF)
9//! 4. Compares against ISO reference PDFs
10//!
11//! The goal is to provide HONEST assessment of ISO compliance, not just "API exists".
12
13pub mod comparators;
14pub mod compliance_report;
15pub mod curated_matrix;
16pub mod iso_matrix;
17pub mod parser;
18pub mod validators;
19
20// Disabled vanity ISO compliance tests - these test PDF syntax rather than functionality
21// See CLAUDE.md: "Focus on practical PDF functionality, not compliance metrics"
22// The 148 vanity ISO tests have been disabled to focus on real functionality
23// #[cfg(test)]
24// pub mod tests;
25
26use crate::error::Result;
27
28/// Verification levels for ISO compliance
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
30pub enum VerificationLevel {
31    /// Not implemented (0%)
32    NotImplemented = 0,
33    /// Code exists, doesn't crash (25%)
34    CodeExists = 1,
35    /// Generates valid PDF (50%)
36    GeneratesPdf = 2,
37    /// Content verified with parser (75%)
38    ContentVerified = 3,
39    /// ISO compliant with external validation (100%)
40    IsoCompliant = 4,
41}
42
43impl VerificationLevel {
44    pub fn as_percentage(&self) -> f64 {
45        match self {
46            VerificationLevel::NotImplemented => 0.0,
47            VerificationLevel::CodeExists => 25.0,
48            VerificationLevel::GeneratesPdf => 50.0,
49            VerificationLevel::ContentVerified => 75.0,
50            VerificationLevel::IsoCompliant => 100.0,
51        }
52    }
53
54    pub fn from_u8(level: u8) -> Option<Self> {
55        match level {
56            0 => Some(VerificationLevel::NotImplemented),
57            1 => Some(VerificationLevel::CodeExists),
58            2 => Some(VerificationLevel::GeneratesPdf),
59            3 => Some(VerificationLevel::ContentVerified),
60            4 => Some(VerificationLevel::IsoCompliant),
61            _ => None,
62        }
63    }
64}
65
66/// Result of PDF verification
67#[derive(Debug, Clone)]
68pub struct VerificationResult {
69    pub level: VerificationLevel,
70    pub passed: bool,
71    pub details: String,
72    pub external_validation: Option<ExternalValidationResult>,
73}
74
75/// Result from external validation tools
76#[derive(Debug, Clone)]
77pub struct ExternalValidationResult {
78    pub qpdf_passed: Option<bool>,
79    pub verapdf_passed: Option<bool>,
80    pub adobe_preflight_passed: Option<bool>,
81    pub error_messages: Vec<String>,
82}
83
84/// ISO requirement for tracking compliance
85#[derive(Debug, Clone)]
86pub struct IsoRequirement {
87    pub id: String,
88    pub name: String,
89    pub description: String,
90    pub iso_reference: String,
91    pub implementation: Option<String>,
92    pub test_file: Option<String>,
93    pub level: VerificationLevel,
94    pub verified: bool,
95    pub notes: String,
96}
97
98/// Complete verification of a PDF against an ISO requirement
99pub fn verify_iso_requirement(
100    pdf_bytes: &[u8],
101    requirement: &IsoRequirement,
102) -> Result<VerificationResult> {
103    match requirement.level {
104        VerificationLevel::NotImplemented => Ok(VerificationResult {
105            level: VerificationLevel::NotImplemented,
106            passed: false,
107            details: "Feature not implemented".to_string(),
108            external_validation: None,
109        }),
110        VerificationLevel::CodeExists => {
111            // At this level, we just verify the code doesn't crash
112            // This should be tested in unit tests, not here
113            Ok(VerificationResult {
114                level: VerificationLevel::CodeExists,
115                passed: true,
116                details: "Code exists and executes without crash".to_string(),
117                external_validation: None,
118            })
119        }
120        VerificationLevel::GeneratesPdf => verify_pdf_generation(pdf_bytes),
121        VerificationLevel::ContentVerified => verify_pdf_content(pdf_bytes, requirement),
122        VerificationLevel::IsoCompliant => verify_iso_compliance(pdf_bytes, requirement),
123    }
124}
125
126/// Verify that PDF is generated with basic structure
127fn verify_pdf_generation(pdf_bytes: &[u8]) -> Result<VerificationResult> {
128    if pdf_bytes.is_empty() {
129        return Ok(VerificationResult {
130            level: VerificationLevel::GeneratesPdf,
131            passed: false,
132            details: "PDF is empty".to_string(),
133            external_validation: None,
134        });
135    }
136
137    if !pdf_bytes.starts_with(b"%PDF-") {
138        return Ok(VerificationResult {
139            level: VerificationLevel::GeneratesPdf,
140            passed: false,
141            details: "PDF does not start with PDF header".to_string(),
142            external_validation: None,
143        });
144    }
145
146    if pdf_bytes.len() < 1000 {
147        return Ok(VerificationResult {
148            level: VerificationLevel::GeneratesPdf,
149            passed: false,
150            details: format!("PDF too small: {} bytes", pdf_bytes.len()),
151            external_validation: None,
152        });
153    }
154
155    Ok(VerificationResult {
156        level: VerificationLevel::GeneratesPdf,
157        passed: true,
158        details: format!("Valid PDF generated: {} bytes", pdf_bytes.len()),
159        external_validation: None,
160    })
161}
162
163/// Verify PDF content structure with internal parser
164fn verify_pdf_content(
165    pdf_bytes: &[u8],
166    requirement: &IsoRequirement,
167) -> Result<VerificationResult> {
168    // First check basic generation
169    let gen_result = verify_pdf_generation(pdf_bytes)?;
170    if !gen_result.passed {
171        return Ok(gen_result);
172    }
173
174    // Parse PDF and verify content
175    match parser::parse_pdf(pdf_bytes) {
176        Ok(parsed_pdf) => {
177            let content_check = verify_requirement_content(&parsed_pdf, requirement);
178            Ok(VerificationResult {
179                level: VerificationLevel::ContentVerified,
180                passed: content_check.0,
181                details: content_check.1,
182                external_validation: None,
183            })
184        }
185        Err(e) => Ok(VerificationResult {
186            level: VerificationLevel::ContentVerified,
187            passed: false,
188            details: format!("Failed to parse PDF: {}", e),
189            external_validation: None,
190        }),
191    }
192}
193
194/// Verify full ISO compliance with external validation
195fn verify_iso_compliance(
196    pdf_bytes: &[u8],
197    requirement: &IsoRequirement,
198) -> Result<VerificationResult> {
199    // First check content verification
200    let content_result = verify_pdf_content(pdf_bytes, requirement)?;
201    if !content_result.passed {
202        return Ok(content_result);
203    }
204
205    // Run external validation
206    let external_result = validators::validate_external(pdf_bytes)?;
207
208    let all_passed = external_result.qpdf_passed.unwrap_or(false)
209        && external_result.verapdf_passed.unwrap_or(true); // veraPDF optional
210
211    Ok(VerificationResult {
212        level: VerificationLevel::IsoCompliant,
213        passed: all_passed,
214        details: if all_passed {
215            "Passed all external validation checks".to_string()
216        } else {
217            format!(
218                "External validation failed: {:?}",
219                external_result.error_messages
220            )
221        },
222        external_validation: Some(external_result),
223    })
224}
225
226/// Verify specific requirement content in parsed PDF
227fn verify_requirement_content(
228    parsed_pdf: &parser::ParsedPdf,
229    requirement: &IsoRequirement,
230) -> (bool, String) {
231    // This is where we implement specific verification logic for each ISO requirement
232    // For now, we'll implement a few key ones and expand over time
233
234    match requirement.id.as_str() {
235        "7.5.2.1" => {
236            // Document catalog must have /Type /Catalog
237            if let Some(catalog) = &parsed_pdf.catalog {
238                if catalog.contains_key("Type") {
239                    (true, "Catalog contains required /Type entry".to_string())
240                } else {
241                    (false, "Catalog missing /Type entry".to_string())
242                }
243            } else {
244                (false, "No document catalog found".to_string())
245            }
246        }
247        "8.6.3.1" => {
248            // DeviceRGB color space verification
249            if parsed_pdf.uses_device_rgb {
250                (true, "PDF uses DeviceRGB color space correctly".to_string())
251            } else {
252                (
253                    false,
254                    "DeviceRGB color space not found or incorrect".to_string(),
255                )
256            }
257        }
258        "9.7.1.1" => {
259            // Standard 14 fonts verification
260            let standard_fonts = &[
261                "Helvetica",
262                "Times-Roman",
263                "Courier",
264                "Symbol",
265                "ZapfDingbats",
266            ];
267            let found_fonts: Vec<_> = parsed_pdf
268                .fonts
269                .iter()
270                .filter(|font| standard_fonts.contains(&font.as_str()))
271                .collect();
272
273            if !found_fonts.is_empty() {
274                (true, format!("Found standard fonts: {:?}", found_fonts))
275            } else {
276                (false, "No standard fonts found".to_string())
277            }
278        }
279        _ => {
280            // For requirements we haven't implemented specific verification yet
281            (
282                true,
283                format!(
284                    "Content verification not yet implemented for {}",
285                    requirement.id
286                ),
287            )
288        }
289    }
290}
291
292#[cfg(test)]
293mod unit_tests {
294    use super::*;
295
296    #[test]
297    fn test_verification_level_percentage() {
298        assert_eq!(VerificationLevel::NotImplemented.as_percentage(), 0.0);
299        assert_eq!(VerificationLevel::CodeExists.as_percentage(), 25.0);
300        assert_eq!(VerificationLevel::GeneratesPdf.as_percentage(), 50.0);
301        assert_eq!(VerificationLevel::ContentVerified.as_percentage(), 75.0);
302        assert_eq!(VerificationLevel::IsoCompliant.as_percentage(), 100.0);
303    }
304
305    #[test]
306    fn test_verification_level_from_u8() {
307        assert_eq!(
308            VerificationLevel::from_u8(0),
309            Some(VerificationLevel::NotImplemented)
310        );
311        assert_eq!(
312            VerificationLevel::from_u8(4),
313            Some(VerificationLevel::IsoCompliant)
314        );
315        assert_eq!(VerificationLevel::from_u8(5), None);
316    }
317
318    #[test]
319    fn test_pdf_generation_verification() {
320        // Test empty PDF
321        let empty_pdf = b"";
322        let result = verify_pdf_generation(empty_pdf).unwrap();
323        assert!(!result.passed);
324        assert!(result.details.contains("empty"));
325
326        // Test invalid header
327        let invalid_pdf = b"This is not a PDF";
328        let result = verify_pdf_generation(invalid_pdf).unwrap();
329        assert!(!result.passed);
330        assert!(result.details.contains("PDF header"));
331
332        // Test too small PDF
333        let small_pdf = b"%PDF-1.4\n%%EOF";
334        let result = verify_pdf_generation(small_pdf).unwrap();
335        assert!(!result.passed);
336        assert!(result.details.contains("too small"));
337
338        // Test valid PDF (mock)
339        let valid_pdf = format!("%PDF-1.4\n{}\n%%EOF", "x".repeat(1000));
340        let result = verify_pdf_generation(valid_pdf.as_bytes()).unwrap();
341        assert!(result.passed);
342        assert!(result.details.contains("Valid PDF generated"));
343    }
344}
345
346/// Check if two PDFs are structurally equivalent for ISO compliance
347pub fn pdfs_structurally_equivalent(generated: &[u8], reference: &[u8]) -> bool {
348    comparators::pdfs_structurally_equivalent(generated, reference)
349}
350
351/// Extract structural differences between PDFs
352pub fn extract_pdf_differences(
353    generated: &[u8],
354    reference: &[u8],
355) -> Result<Vec<comparators::PdfDifference>> {
356    comparators::extract_pdf_differences(generated, reference)
357}