Skip to main content

pdf_ast/validation/
mod.rs

1use crate::ast::{NodeId, NodeType, PdfDocument};
2use serde::{Deserialize, Serialize};
3use std::collections::{HashMap, HashSet};
4use std::time::{SystemTime, UNIX_EPOCH};
5
6pub mod constraints;
7pub mod pdf_standards;
8pub mod pdfa;
9pub mod schema;
10
11pub use constraints::*;
12pub use pdf_standards::*;
13pub use pdfa::*;
14pub use schema::*;
15
16/// Validation severity levels
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
18pub enum ValidationSeverity {
19    Info,
20    Warning,
21    Error,
22    Critical,
23}
24
25/// Validation result for a single check
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ValidationIssue {
28    pub severity: ValidationSeverity,
29    pub code: String,
30    pub message: String,
31    pub node_id: Option<NodeId>,
32    pub location: Option<String>,
33    pub suggestion: Option<String>,
34}
35
36/// Complete validation report
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct ValidationReport {
39    pub schema_name: String,
40    pub schema_version: String,
41    pub is_valid: bool,
42    pub issues: Vec<ValidationIssue>,
43    pub statistics: ValidationStatistics,
44    pub metadata: HashMap<String, String>,
45}
46
47/// Versioned validation report envelope for stable exports
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct ValidationReportEnvelope {
50    pub report_format_version: String,
51    pub generated_at_unix: u64,
52    pub report: ValidationReport,
53}
54
55/// Validation statistics
56#[derive(Debug, Clone, Serialize, Deserialize, Default)]
57pub struct ValidationStatistics {
58    pub total_checks: usize,
59    pub passed_checks: usize,
60    pub failed_checks: usize,
61    pub info_count: usize,
62    pub warning_count: usize,
63    pub error_count: usize,
64    pub critical_count: usize,
65}
66
67impl ValidationReport {
68    pub fn new(schema_name: String, schema_version: String) -> Self {
69        Self {
70            schema_name,
71            schema_version,
72            is_valid: true,
73            issues: Vec::new(),
74            statistics: ValidationStatistics::default(),
75            metadata: HashMap::new(),
76        }
77    }
78
79    pub fn add_issue(&mut self, issue: ValidationIssue) {
80        match issue.severity {
81            ValidationSeverity::Info => self.statistics.info_count += 1,
82            ValidationSeverity::Warning => self.statistics.warning_count += 1,
83            ValidationSeverity::Error => {
84                self.statistics.error_count += 1;
85                self.is_valid = false;
86            }
87            ValidationSeverity::Critical => {
88                self.statistics.critical_count += 1;
89                self.is_valid = false;
90            }
91        }
92
93        self.statistics.failed_checks += 1;
94        self.issues.push(issue);
95    }
96
97    pub fn add_passed_check(&mut self) {
98        self.statistics.passed_checks += 1;
99        self.statistics.total_checks += 1;
100    }
101
102    pub fn finalize(&mut self) {
103        self.statistics.total_checks =
104            self.statistics.passed_checks + self.statistics.failed_checks;
105    }
106
107    pub fn into_envelope(self) -> ValidationReportEnvelope {
108        let generated_at_unix = SystemTime::now()
109            .duration_since(UNIX_EPOCH)
110            .unwrap_or_default()
111            .as_secs();
112        ValidationReportEnvelope {
113            report_format_version: "1.0".to_string(),
114            generated_at_unix,
115            report: self,
116        }
117    }
118}
119
120/// Base trait for PDF schema validation
121pub trait PdfSchema: Send + Sync {
122    /// Get schema name
123    fn name(&self) -> &str;
124
125    /// Get schema version
126    fn version(&self) -> &str;
127
128    /// Validate a complete document
129    fn validate(&self, document: &PdfDocument) -> ValidationReport;
130
131    /// Get all constraints for this schema
132    fn get_constraints(&self) -> Vec<Box<dyn SchemaConstraint>>;
133
134    /// Check if schema supports specific PDF version
135    fn supports_pdf_version(&self, version: &crate::ast::PdfVersion) -> bool;
136
137    /// Get schema description
138    fn description(&self) -> &str {
139        ""
140    }
141
142    /// Get schema URL/reference
143    fn reference_url(&self) -> Option<&str> {
144        None
145    }
146}
147
148/// Base trait for schema constraints
149pub trait SchemaConstraint: Send + Sync {
150    /// Get constraint name
151    fn name(&self) -> &str;
152
153    /// Get constraint description
154    fn description(&self) -> &str;
155
156    /// Check constraint against a document
157    fn check(&self, document: &PdfDocument, report: &mut ValidationReport);
158
159    /// Get constraint category
160    fn category(&self) -> ConstraintCategory {
161        ConstraintCategory::General
162    }
163
164    /// Get required node types for this constraint
165    fn required_node_types(&self) -> Vec<NodeType> {
166        Vec::new()
167    }
168
169    /// ISO 32000-2 reference for audit mapping
170    fn iso_reference(&self) -> Option<&str> {
171        None
172    }
173}
174
175/// Constraint categories
176#[derive(Debug, Clone, Copy, PartialEq, Eq)]
177pub enum ConstraintCategory {
178    General,
179    Structure,
180    Content,
181    Metadata,
182    Security,
183    Accessibility,
184    Graphics,
185    Fonts,
186    Images,
187    Annotations,
188    Forms,
189    JavaScript,
190}
191
192/// PDF schema registry
193pub struct SchemaRegistry {
194    schemas: HashMap<String, Box<dyn PdfSchema>>,
195}
196
197impl SchemaRegistry {
198    pub fn new() -> Self {
199        let mut registry = Self {
200            schemas: HashMap::new(),
201        };
202
203        // Register standard schemas
204        registry.register_standard_schemas();
205        registry
206    }
207
208    fn register_standard_schemas(&mut self) {
209        // PDF 2.0 base schema
210        self.register(Box::new(Pdf20Schema::new()));
211
212        // PDF/A schemas
213        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA1a)));
214        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA1b)));
215        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA2a)));
216        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA2b)));
217        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA2u)));
218        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA3a)));
219        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA3b)));
220        self.register(Box::new(PdfASchema::new(PdfALevel::PdfA3u)));
221
222        // PDF/X schemas
223        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX1a)));
224        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX3)));
225        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX4)));
226        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX4p)));
227        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX5g)));
228        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX5n)));
229        self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX5pg)));
230
231        // PDF/UA schema
232        self.register(Box::new(PdfUASchema::new(PdfUALevel::PdfUA1)));
233        self.register(Box::new(PdfUASchema::new(PdfUALevel::PdfUA2)));
234    }
235
236    pub fn register(&mut self, schema: Box<dyn PdfSchema>) {
237        let name = schema.name().to_string();
238        self.schemas.insert(name, schema);
239    }
240
241    pub fn get_schema(&self, name: &str) -> Option<&dyn PdfSchema> {
242        self.schemas.get(name).map(|s| s.as_ref())
243    }
244
245    pub fn list_schemas(&self) -> Vec<&str> {
246        self.schemas.keys().map(|s| s.as_str()).collect()
247    }
248
249    pub fn validate(&self, document: &PdfDocument, schema_name: &str) -> Option<ValidationReport> {
250        self.get_schema(schema_name)
251            .map(|schema| schema.validate(document))
252    }
253
254    pub fn validate_all(&self, document: &PdfDocument) -> HashMap<String, ValidationReport> {
255        let mut results = HashMap::new();
256
257        for (name, schema) in &self.schemas {
258            if schema.supports_pdf_version(&document.version) {
259                let report = schema.validate(document);
260                results.insert(name.clone(), report);
261            }
262        }
263
264        results
265    }
266
267    pub fn verify_report(&self, report: &ValidationReport) -> bool {
268        self.get_schema(&report.schema_name)
269            .map(|schema| schema.version() == report.schema_version)
270            .unwrap_or(false)
271    }
272
273    pub fn verify_envelope(&self, envelope: &ValidationReportEnvelope) -> bool {
274        envelope.report_format_version == "1.0" && self.verify_report(&envelope.report)
275    }
276}
277
278impl Default for SchemaRegistry {
279    fn default() -> Self {
280        Self::new()
281    }
282}
283
284/// Validation context for passing state between constraints
285pub struct ValidationContext<'a> {
286    pub document: &'a PdfDocument,
287    pub report: &'a mut ValidationReport,
288    pub visited_nodes: HashSet<NodeId>,
289    pub context_data: HashMap<String, String>,
290}
291
292impl<'a> ValidationContext<'a> {
293    pub fn new(document: &'a PdfDocument, report: &'a mut ValidationReport) -> Self {
294        Self {
295            document,
296            report,
297            visited_nodes: HashSet::new(),
298            context_data: HashMap::new(),
299        }
300    }
301
302    pub fn mark_visited(&mut self, node_id: NodeId) {
303        self.visited_nodes.insert(node_id);
304    }
305
306    pub fn is_visited(&self, node_id: NodeId) -> bool {
307        self.visited_nodes.contains(&node_id)
308    }
309
310    pub fn set_context(&mut self, key: String, value: String) {
311        self.context_data.insert(key, value);
312    }
313
314    pub fn get_context(&self, key: &str) -> Option<&String> {
315        self.context_data.get(key)
316    }
317}