Skip to main content

pdf_ast/validation/
schema.rs

1use super::*;
2use crate::ast::PdfDocument;
3
4/// Basic schema implementation for core PDF validation
5pub struct BasicPdfSchema {
6    name: String,
7    version: String,
8    description: String,
9}
10
11impl BasicPdfSchema {
12    pub fn new() -> Self {
13        Self {
14            name: "Basic PDF".to_string(),
15            version: "1.0".to_string(),
16            description: "Basic PDF document validation".to_string(),
17        }
18    }
19}
20
21impl PdfSchema for BasicPdfSchema {
22    fn name(&self) -> &str {
23        &self.name
24    }
25
26    fn version(&self) -> &str {
27        &self.version
28    }
29
30    fn description(&self) -> &str {
31        &self.description
32    }
33
34    fn supports_pdf_version(&self, _version: &crate::ast::PdfVersion) -> bool {
35        true // Basic schema supports all versions
36    }
37
38    fn validate(&self, document: &PdfDocument) -> ValidationReport {
39        let mut report = ValidationReport::new(self.name().to_string(), self.version().to_string());
40
41        // Run basic constraints
42        for constraint in self.get_constraints() {
43            constraint.check(document, &mut report);
44        }
45
46        report.finalize();
47        report
48    }
49
50    fn get_constraints(&self) -> Vec<Box<dyn SchemaConstraint>> {
51        vec![
52            Box::new(BasicStructureConstraint),
53            Box::new(BasicCatalogConstraint),
54        ]
55    }
56}
57
58impl Default for BasicPdfSchema {
59    fn default() -> Self {
60        Self::new()
61    }
62}
63
64/// Basic structure constraint
65pub struct BasicStructureConstraint;
66
67impl SchemaConstraint for BasicStructureConstraint {
68    fn name(&self) -> &str {
69        "BasicStructure"
70    }
71
72    fn description(&self) -> &str {
73        "Checks basic PDF document structure"
74    }
75
76    fn check(&self, document: &PdfDocument, report: &mut ValidationReport) {
77        // Check if document has nodes
78        if document.ast.get_all_nodes().is_empty() {
79            report.add_issue(ValidationIssue {
80                severity: ValidationSeverity::Critical,
81                code: "EMPTY_DOCUMENT".to_string(),
82                message: "Document has no nodes".to_string(),
83                node_id: None,
84                location: None,
85                suggestion: Some("Ensure the PDF document was parsed correctly".to_string()),
86            });
87        } else {
88            report.add_passed_check();
89        }
90
91        // Check if document has root
92        if document.ast.get_root().is_none() {
93            report.add_issue(ValidationIssue {
94                severity: ValidationSeverity::Critical,
95                code: "NO_ROOT".to_string(),
96                message: "Document has no root node".to_string(),
97                node_id: None,
98                location: None,
99                suggestion: Some("Ensure the document has a valid catalog".to_string()),
100            });
101        } else {
102            report.add_passed_check();
103        }
104    }
105}
106
107/// Basic catalog constraint
108pub struct BasicCatalogConstraint;
109
110impl SchemaConstraint for BasicCatalogConstraint {
111    fn name(&self) -> &str {
112        "BasicCatalog"
113    }
114
115    fn description(&self) -> &str {
116        "Checks basic catalog requirements"
117    }
118
119    fn check(&self, document: &PdfDocument, report: &mut ValidationReport) {
120        if let Some(root_id) = document.ast.get_root() {
121            if let Some(root_node) = document.ast.get_node(root_id) {
122                // Check if root is catalog type
123                if !matches!(root_node.node_type, crate::ast::NodeType::Catalog) {
124                    report.add_issue(ValidationIssue {
125                        severity: ValidationSeverity::Error,
126                        code: "ROOT_NOT_CATALOG".to_string(),
127                        message: "Root node is not a catalog".to_string(),
128                        node_id: Some(root_id),
129                        location: None,
130                        suggestion: Some("Root node should be of type Catalog".to_string()),
131                    });
132                } else {
133                    report.add_passed_check();
134                }
135
136                // Check catalog content
137                if let crate::types::PdfValue::Dictionary(dict) = &root_node.value {
138                    // Check Type entry
139                    if let Some(type_value) = dict.get("Type") {
140                        if let crate::types::PdfValue::Name(name) = type_value {
141                            if name.as_str() != "/Catalog" {
142                                report.add_issue(ValidationIssue {
143                                    severity: ValidationSeverity::Error,
144                                    code: "CATALOG_WRONG_TYPE".to_string(),
145                                    message: "Catalog Type entry is not 'Catalog'".to_string(),
146                                    node_id: Some(root_id),
147                                    location: Some("Type".to_string()),
148                                    suggestion: Some("Set Type to /Catalog".to_string()),
149                                });
150                            } else {
151                                report.add_passed_check();
152                            }
153                        } else {
154                            report.add_issue(ValidationIssue {
155                                severity: ValidationSeverity::Error,
156                                code: "CATALOG_TYPE_NOT_NAME".to_string(),
157                                message: "Catalog Type entry is not a name".to_string(),
158                                node_id: Some(root_id),
159                                location: Some("Type".to_string()),
160                                suggestion: Some("Set Type to /Catalog".to_string()),
161                            });
162                        }
163                    } else {
164                        report.add_issue(ValidationIssue {
165                            severity: ValidationSeverity::Error,
166                            code: "CATALOG_NO_TYPE".to_string(),
167                            message: "Catalog missing Type entry".to_string(),
168                            node_id: Some(root_id),
169                            location: None,
170                            suggestion: Some("Add Type entry with value /Catalog".to_string()),
171                        });
172                    }
173
174                    // Check Pages entry
175                    if !dict.contains_key("Pages") {
176                        report.add_issue(ValidationIssue {
177                            severity: ValidationSeverity::Error,
178                            code: "CATALOG_NO_PAGES".to_string(),
179                            message: "Catalog missing Pages entry".to_string(),
180                            node_id: Some(root_id),
181                            location: None,
182                            suggestion: Some(
183                                "Add Pages entry referencing the page tree".to_string(),
184                            ),
185                        });
186                    } else {
187                        report.add_passed_check();
188                    }
189                } else {
190                    report.add_issue(ValidationIssue {
191                        severity: ValidationSeverity::Critical,
192                        code: "CATALOG_NOT_DICT".to_string(),
193                        message: "Catalog is not a dictionary".to_string(),
194                        node_id: Some(root_id),
195                        location: None,
196                        suggestion: Some("Catalog must be a dictionary object".to_string()),
197                    });
198                }
199            }
200        }
201    }
202}