1use crate::ast::{NodeId, NodeType, PdfDocument};
2use serde::{Deserialize, Serialize};
3use std::collections::{HashMap, HashSet};
4use std::time::{SystemTime, UNIX_EPOCH};
5
6pub mod constraints;
7pub mod pdf_standards;
8pub mod pdfa;
9pub mod schema;
10
11pub use constraints::*;
12pub use pdf_standards::*;
13pub use pdfa::*;
14pub use schema::*;
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
18pub enum ValidationSeverity {
19 Info,
20 Warning,
21 Error,
22 Critical,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ValidationIssue {
28 pub severity: ValidationSeverity,
29 pub code: String,
30 pub message: String,
31 pub node_id: Option<NodeId>,
32 pub location: Option<String>,
33 pub suggestion: Option<String>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct ValidationReport {
39 pub schema_name: String,
40 pub schema_version: String,
41 pub is_valid: bool,
42 pub issues: Vec<ValidationIssue>,
43 pub statistics: ValidationStatistics,
44 pub metadata: HashMap<String, String>,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct ValidationReportEnvelope {
50 pub report_format_version: String,
51 pub generated_at_unix: u64,
52 pub report: ValidationReport,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize, Default)]
57pub struct ValidationStatistics {
58 pub total_checks: usize,
59 pub passed_checks: usize,
60 pub failed_checks: usize,
61 pub info_count: usize,
62 pub warning_count: usize,
63 pub error_count: usize,
64 pub critical_count: usize,
65}
66
67impl ValidationReport {
68 pub fn new(schema_name: String, schema_version: String) -> Self {
69 Self {
70 schema_name,
71 schema_version,
72 is_valid: true,
73 issues: Vec::new(),
74 statistics: ValidationStatistics::default(),
75 metadata: HashMap::new(),
76 }
77 }
78
79 pub fn add_issue(&mut self, issue: ValidationIssue) {
80 match issue.severity {
81 ValidationSeverity::Info => self.statistics.info_count += 1,
82 ValidationSeverity::Warning => self.statistics.warning_count += 1,
83 ValidationSeverity::Error => {
84 self.statistics.error_count += 1;
85 self.is_valid = false;
86 }
87 ValidationSeverity::Critical => {
88 self.statistics.critical_count += 1;
89 self.is_valid = false;
90 }
91 }
92
93 self.statistics.failed_checks += 1;
94 self.issues.push(issue);
95 }
96
97 pub fn add_passed_check(&mut self) {
98 self.statistics.passed_checks += 1;
99 self.statistics.total_checks += 1;
100 }
101
102 pub fn finalize(&mut self) {
103 self.statistics.total_checks =
104 self.statistics.passed_checks + self.statistics.failed_checks;
105 }
106
107 pub fn into_envelope(self) -> ValidationReportEnvelope {
108 let generated_at_unix = SystemTime::now()
109 .duration_since(UNIX_EPOCH)
110 .unwrap_or_default()
111 .as_secs();
112 ValidationReportEnvelope {
113 report_format_version: "1.0".to_string(),
114 generated_at_unix,
115 report: self,
116 }
117 }
118}
119
120pub trait PdfSchema: Send + Sync {
122 fn name(&self) -> &str;
124
125 fn version(&self) -> &str;
127
128 fn validate(&self, document: &PdfDocument) -> ValidationReport;
130
131 fn get_constraints(&self) -> Vec<Box<dyn SchemaConstraint>>;
133
134 fn supports_pdf_version(&self, version: &crate::ast::PdfVersion) -> bool;
136
137 fn description(&self) -> &str {
139 ""
140 }
141
142 fn reference_url(&self) -> Option<&str> {
144 None
145 }
146}
147
148pub trait SchemaConstraint: Send + Sync {
150 fn name(&self) -> &str;
152
153 fn description(&self) -> &str;
155
156 fn check(&self, document: &PdfDocument, report: &mut ValidationReport);
158
159 fn category(&self) -> ConstraintCategory {
161 ConstraintCategory::General
162 }
163
164 fn required_node_types(&self) -> Vec<NodeType> {
166 Vec::new()
167 }
168
169 fn iso_reference(&self) -> Option<&str> {
171 None
172 }
173}
174
175#[derive(Debug, Clone, Copy, PartialEq, Eq)]
177pub enum ConstraintCategory {
178 General,
179 Structure,
180 Content,
181 Metadata,
182 Security,
183 Accessibility,
184 Graphics,
185 Fonts,
186 Images,
187 Annotations,
188 Forms,
189 JavaScript,
190}
191
192pub struct SchemaRegistry {
194 schemas: HashMap<String, Box<dyn PdfSchema>>,
195}
196
197impl SchemaRegistry {
198 pub fn new() -> Self {
199 let mut registry = Self {
200 schemas: HashMap::new(),
201 };
202
203 registry.register_standard_schemas();
205 registry
206 }
207
208 fn register_standard_schemas(&mut self) {
209 self.register(Box::new(Pdf20Schema::new()));
211
212 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA1a)));
214 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA1b)));
215 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA2a)));
216 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA2b)));
217 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA2u)));
218 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA3a)));
219 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA3b)));
220 self.register(Box::new(PdfASchema::new(PdfALevel::PdfA3u)));
221
222 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX1a)));
224 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX3)));
225 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX4)));
226 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX4p)));
227 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX5g)));
228 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX5n)));
229 self.register(Box::new(PdfXSchema::new(PdfXLevel::PdfX5pg)));
230
231 self.register(Box::new(PdfUASchema::new(PdfUALevel::PdfUA1)));
233 self.register(Box::new(PdfUASchema::new(PdfUALevel::PdfUA2)));
234 }
235
236 pub fn register(&mut self, schema: Box<dyn PdfSchema>) {
237 let name = schema.name().to_string();
238 self.schemas.insert(name, schema);
239 }
240
241 pub fn get_schema(&self, name: &str) -> Option<&dyn PdfSchema> {
242 self.schemas.get(name).map(|s| s.as_ref())
243 }
244
245 pub fn list_schemas(&self) -> Vec<&str> {
246 self.schemas.keys().map(|s| s.as_str()).collect()
247 }
248
249 pub fn validate(&self, document: &PdfDocument, schema_name: &str) -> Option<ValidationReport> {
250 self.get_schema(schema_name)
251 .map(|schema| schema.validate(document))
252 }
253
254 pub fn validate_all(&self, document: &PdfDocument) -> HashMap<String, ValidationReport> {
255 let mut results = HashMap::new();
256
257 for (name, schema) in &self.schemas {
258 if schema.supports_pdf_version(&document.version) {
259 let report = schema.validate(document);
260 results.insert(name.clone(), report);
261 }
262 }
263
264 results
265 }
266
267 pub fn verify_report(&self, report: &ValidationReport) -> bool {
268 self.get_schema(&report.schema_name)
269 .map(|schema| schema.version() == report.schema_version)
270 .unwrap_or(false)
271 }
272
273 pub fn verify_envelope(&self, envelope: &ValidationReportEnvelope) -> bool {
274 envelope.report_format_version == "1.0" && self.verify_report(&envelope.report)
275 }
276}
277
278impl Default for SchemaRegistry {
279 fn default() -> Self {
280 Self::new()
281 }
282}
283
284pub struct ValidationContext<'a> {
286 pub document: &'a PdfDocument,
287 pub report: &'a mut ValidationReport,
288 pub visited_nodes: HashSet<NodeId>,
289 pub context_data: HashMap<String, String>,
290}
291
292impl<'a> ValidationContext<'a> {
293 pub fn new(document: &'a PdfDocument, report: &'a mut ValidationReport) -> Self {
294 Self {
295 document,
296 report,
297 visited_nodes: HashSet::new(),
298 context_data: HashMap::new(),
299 }
300 }
301
302 pub fn mark_visited(&mut self, node_id: NodeId) {
303 self.visited_nodes.insert(node_id);
304 }
305
306 pub fn is_visited(&self, node_id: NodeId) -> bool {
307 self.visited_nodes.contains(&node_id)
308 }
309
310 pub fn set_context(&mut self, key: String, value: String) {
311 self.context_data.insert(key, value);
312 }
313
314 pub fn get_context(&self, key: &str) -> Option<&String> {
315 self.context_data.get(key)
316 }
317}