Skip to main content

pdfv_core/
validation.rs

1//! End-to-end validation session and validation model graph.
2
3use std::{
4    collections::{BTreeMap, BTreeSet, HashMap, HashSet},
5    io::{Read, Seek, SeekFrom},
6    num::NonZeroU64,
7    path::{Path, PathBuf},
8    sync::Arc,
9    time::Instant,
10};
11
12use crate::{
13    Assertion, BoundedText, BuiltinProfileRepository, ENGINE_VERSION, ErrorArgument, FeatureObject,
14    FeatureReport, FeatureValue, Identifier, IndirectObject, InputKind, InputSummary, ModelValue,
15    ObjectKey, ObjectLocation, ObjectTypeName, ParsedDocument, Parser, PdfName, PdfvError,
16    PolicyOperator, PolicyReport, PolicyRule, PolicyRuleResult, PolicySet, PolicyValue,
17    ProfileReport, ProfileRepository, PropertyName, ResourceLimits, Result, Rule, RuleEvaluator,
18    RuleId, RuleOutcome, TaskDuration, UnsupportedRule, ValidationError, ValidationOptions,
19    ValidationReport, ValidationStatus,
20    profile::DefaultRuleEvaluator,
21    xmp::{FlavourDetector, parse_document_xmp},
22};
23
24const CATALOG_DIRECT_PROPERTIES: &[&str] = &["Type", "Metadata", "Pages", "OutputIntents"];
25const METADATA_DIRECT_PROPERTIES: &[&str] = &["Type", "Subtype", "Filter", "Length"];
26const PAGE_DIRECT_PROPERTIES: &[&str] = &["Type", "Parent", "Contents", "Resources", "Annots"];
27const FONT_DIRECT_PROPERTIES: &[&str] = &[
28    "Type",
29    "Subtype",
30    "BaseFont",
31    "FontDescriptor",
32    "FirstChar",
33    "LastChar",
34    "Widths",
35    "Encoding",
36    "ToUnicode",
37    "CIDToGIDMap",
38];
39const ANNOTATION_DIRECT_PROPERTIES: &[&str] = &[
40    "Type", "Subtype", "F", "C", "IC", "AP", "FT", "CA", "A", "AA",
41];
42const OUTPUT_INTENT_DIRECT_PROPERTIES: &[&str] = &[
43    "Type",
44    "S",
45    "DestOutputProfile",
46    "OutputConditionIdentifier",
47    "Info",
48];
49const STREAM_DIRECT_PROPERTIES: &[&str] = &[
50    "Type",
51    "Subtype",
52    "Filter",
53    "DecodeParms",
54    "F",
55    "FFilter",
56    "FDecodeParms",
57];
58
59const RESOURCE_DIRECT_PROPERTIES: &[&str] = &[
60    "Font",
61    "XObject",
62    "ColorSpace",
63    "ExtGState",
64    "Pattern",
65    "Shading",
66    "Properties",
67    "ProcSet",
68];
69const ACRO_FORM_DIRECT_PROPERTIES: &[&str] = &[
70    "Fields",
71    "NeedAppearances",
72    "SigFlags",
73    "DR",
74    "DA",
75    "Q",
76    "XFA",
77];
78const STRUCTURE_DIRECT_PROPERTIES: &[&str] = &[
79    "Type",
80    "K",
81    "ParentTree",
82    "ParentTreeNextKey",
83    "RoleMap",
84    "ClassMap",
85    "IDTree",
86];
87const OPTIONAL_CONTENT_DIRECT_PROPERTIES: &[&str] = &["OCGs", "D", "Configs"];
88const NAMES_DIRECT_PROPERTIES: &[&str] = &[
89    "Dests",
90    "AP",
91    "JavaScript",
92    "Pages",
93    "Templates",
94    "IDS",
95    "URLS",
96    "EmbeddedFiles",
97    "AlternatePresentations",
98    "Renditions",
99];
100const OUTLINES_DIRECT_PROPERTIES: &[&str] = &["Type", "First", "Last", "Count"];
101const DESTINATION_DIRECT_PROPERTIES: &[&str] = &["D", "Dest", "A"];
102const ACTION_DIRECT_PROPERTIES: &[&str] = &["Type", "S", "D", "URI", "Next", "NewWindow"];
103const FORM_FIELD_DIRECT_PROPERTIES: &[&str] = &[
104    "FT", "T", "TU", "TM", "Ff", "V", "DV", "Kids", "Parent", "AA",
105];
106const IMAGE_DIRECT_PROPERTIES: &[&str] = &[
107    "Type",
108    "Subtype",
109    "Width",
110    "Height",
111    "ColorSpace",
112    "BitsPerComponent",
113    "Filter",
114    "DecodeParms",
115    "SMask",
116    "Mask",
117    "Intent",
118];
119const XOBJECT_DIRECT_PROPERTIES: &[&str] = &[
120    "Type",
121    "Subtype",
122    "BBox",
123    "Matrix",
124    "Resources",
125    "Group",
126    "Filter",
127    "DecodeParms",
128];
129const CMAP_DIRECT_PROPERTIES: &[&str] = &["Type", "Subtype", "CMapName", "CIDSystemInfo"];
130const COLOR_SPACE_DIRECT_PROPERTIES: &[&str] =
131    &["Type", "N", "Alternate", "Range", "Metadata", "Filter"];
132const EXT_GSTATE_DIRECT_PROPERTIES: &[&str] =
133    &["Type", "BM", "CA", "ca", "SMask", "AIS", "OP", "op", "OPM"];
134const SIGNATURE_DIRECT_PROPERTIES: &[&str] = &[
135    "Type",
136    "Filter",
137    "SubFilter",
138    "ByteRange",
139    "Contents",
140    "Reference",
141    "M",
142];
143const SECURITY_DIRECT_PROPERTIES: &[&str] = &["Filter", "SubFilter", "V", "R", "Length", "P"];
144
145const DIRECT_PROPERTY_NAMES: &[&str] = &[
146    "A",
147    "AA",
148    "AIS",
149    "AP",
150    "Alternate",
151    "AlternatePresentations",
152    "Annot",
153    "Annots",
154    "BBox",
155    "BM",
156    "BaseFont",
157    "BitsPerComponent",
158    "ByteRange",
159    "C",
160    "CA",
161    "CIDSystemInfo",
162    "CIDToGIDMap",
163    "ClassMap",
164    "ColorSpace",
165    "Configs",
166    "Contents",
167    "Count",
168    "D",
169    "DA",
170    "DR",
171    "DV",
172    "DecodeParms",
173    "Dest",
174    "DestOutputProfile",
175    "Dests",
176    "EmbeddedFiles",
177    "Encoding",
178    "F",
179    "FDecodeParms",
180    "FFilter",
181    "FT",
182    "Ff",
183    "Fields",
184    "Filter",
185    "First",
186    "FirstChar",
187    "Font",
188    "FontDescriptor",
189    "Group",
190    "Height",
191    "IC",
192    "IDS",
193    "IDTree",
194    "Info",
195    "Intent",
196    "JavaScript",
197    "K",
198    "Kids",
199    "Last",
200    "LastChar",
201    "Length",
202    "M",
203    "Mask",
204    "Matrix",
205    "Metadata",
206    "N",
207    "NeedAppearances",
208    "Next",
209    "OCGs",
210    "OP",
211    "OPM",
212    "OutputConditionIdentifier",
213    "P",
214    "Pages",
215    "Parent",
216    "ParentTree",
217    "ParentTreeNextKey",
218    "Pattern",
219    "ProcSet",
220    "Properties",
221    "Q",
222    "Range",
223    "Reference",
224    "Renditions",
225    "Resources",
226    "RoleMap",
227    "S",
228    "SMask",
229    "Shading",
230    "SigFlags",
231    "SubFilter",
232    "Subtype",
233    "T",
234    "TM",
235    "TU",
236    "Templates",
237    "ToUnicode",
238    "Type",
239    "URI",
240    "URLS",
241    "V",
242    "Width",
243    "Widths",
244    "XFA",
245    "XObject",
246    "ca",
247    "op",
248];
249
250const OBJECT_PROPERTIES: &[&str] = &["Type", "Subtype"];
251const DOCUMENT_PROPERTIES: &[&str] = &[
252    "headerOffset",
253    "postEOFDataSize",
254    "header",
255    "encrypted",
256    "isEncrypted",
257    "hasCatalog",
258    "containsXRefStream",
259    "nrIndirects",
260    "containsPDFUAIdentification",
261    "containsPDFAIdentification",
262    "part",
263    "partPrefix",
264    "rev",
265    "revPrefix",
266];
267const CATALOG_PROPERTIES: &[&str] = &[
268    "hasMetadata",
269    "hasAcroForm",
270    "hasStructTreeRoot",
271    "hasOCProperties",
272    "hasLang",
273    "hasOutlines",
274    "hasNames",
275    "hasDests",
276    "language",
277    "permissions",
278    "containsStructTreeRoot",
279    "containsOCProperties",
280    "containsAcroForm",
281    "Marked",
282    "Type",
283    "Metadata",
284    "Pages",
285    "OutputIntents",
286    "AcroForm",
287    "StructTreeRoot",
288    "OCProperties",
289    "Lang",
290    "Perms",
291    "Outlines",
292    "Names",
293    "Dests",
294];
295const METADATA_PROPERTIES: &[&str] = &[
296    "present",
297    "catalogMetadata",
298    "containsPDFAIdentification",
299    "containsPDFUAIdentification",
300    "part",
301    "partPrefix",
302    "conformance",
303    "conformancePrefix",
304    "rev",
305    "revPrefix",
306    "amdPrefix",
307    "corrPrefix",
308    "declarations",
309    "Type",
310    "Subtype",
311    "Filter",
312    "Length",
313];
314const PAGE_PROPERTIES: &[&str] = &[
315    "hasContents",
316    "hasResources",
317    "annotationCount",
318    "Type",
319    "Parent",
320    "Contents",
321    "Resources",
322    "Annots",
323];
324const PAGE_TREE_PROPERTIES: &[&str] = &["Type", "Kids", "Count", "Parent", "Resources"];
325const RESOURCE_PROPERTIES: &[&str] = RESOURCE_DIRECT_PROPERTIES;
326const NAMES_PROPERTIES: &[&str] = NAMES_DIRECT_PROPERTIES;
327const OUTLINE_PROPERTIES: &[&str] = OUTLINES_DIRECT_PROPERTIES;
328const DESTINATION_PROPERTIES: &[&str] = DESTINATION_DIRECT_PROPERTIES;
329const ACRO_FORM_PROPERTIES: &[&str] = ACRO_FORM_DIRECT_PROPERTIES;
330const OPTIONAL_CONTENT_PROPERTIES: &[&str] = OPTIONAL_CONTENT_DIRECT_PROPERTIES;
331const PERMISSIONS_PROPERTIES: &[&str] = &["DocMDP", "UR", "UR3"];
332const FONT_PROPERTIES: &[&str] = &[
333    "embedded",
334    "hasSubtype",
335    "Type",
336    "Subtype",
337    "BaseFont",
338    "FontDescriptor",
339    "FirstChar",
340    "LastChar",
341    "Widths",
342    "Encoding",
343    "ToUnicode",
344    "CIDToGIDMap",
345];
346const CMAP_PROPERTIES: &[&str] = CMAP_DIRECT_PROPERTIES;
347const IMAGE_PROPERTIES: &[&str] = IMAGE_DIRECT_PROPERTIES;
348const XOBJECT_PROPERTIES: &[&str] = XOBJECT_DIRECT_PROPERTIES;
349const CONTENT_STREAM_PROPERTIES: &[&str] = &[
350    "lengthMatches",
351    "declaredLength",
352    "discoveredLength",
353    "operatorCount",
354    "markedContentCount",
355    "Type",
356    "Subtype",
357    "Filter",
358    "DecodeParms",
359    "F",
360    "FFilter",
361    "FDecodeParms",
362];
363const UNDEFINED_OPERATOR_PROPERTIES: &[&str] = &["name"];
364const ANNOTATION_PROPERTIES: &[&str] = &[
365    "hasSubtype",
366    "Type",
367    "Subtype",
368    "F",
369    "C",
370    "IC",
371    "AP",
372    "FT",
373    "CA",
374    "A",
375    "AA",
376];
377const ACTION_PROPERTIES: &[&str] = ACTION_DIRECT_PROPERTIES;
378const FORM_FIELD_PROPERTIES: &[&str] = FORM_FIELD_DIRECT_PROPERTIES;
379const COLOR_SPACE_PROPERTIES: &[&str] = COLOR_SPACE_DIRECT_PROPERTIES;
380const EXT_GSTATE_PROPERTIES: &[&str] = EXT_GSTATE_DIRECT_PROPERTIES;
381const STRUCTURE_PROPERTIES: &[&str] = STRUCTURE_DIRECT_PROPERTIES;
382const STRUCTURE_ELEMENT_PROPERTIES: &[&str] = &[
383    "Type",
384    "S",
385    "P",
386    "K",
387    "Pg",
388    "Alt",
389    "ActualText",
390    "Lang",
391    "A",
392    "C",
393    "ID",
394    "containsParent",
395    "containsRef",
396    "parentStandardType",
397    "parentStandardTypeNamespaceURL",
398    "parentType",
399    "parentNamespaceURL",
400    "structParentStandardType",
401    "structParentType",
402    "firstChildStandardTypeNamespaceURL",
403    "kidsStandardTypes",
404    "hasContentItems",
405    "containsLabels",
406    "ListNumbering",
407    "NoteType",
408    "orphanRefs",
409    "ghostRefs",
410    "isArtifact",
411    "isTaggedContent",
412    "parentsTags",
413    "isNotMappedToStandardType",
414    "circularMappingExist",
415    "roleMapToSameNamespaceTag",
416    "remappedStandardType",
417    "hasIntersection",
418    "numberOfColumnWithWrongRowSpan",
419    "numberOfRowWithWrongColumnSpan",
420    "wrongColumnSpan",
421    "differentTargetAnnotObjectKey",
422];
423const SIGNATURE_PROPERTIES: &[&str] = SIGNATURE_DIRECT_PROPERTIES;
424const SECURITY_PROPERTIES: &[&str] = SECURITY_DIRECT_PROPERTIES;
425const OUTPUT_INTENT_PROPERTIES: &[&str] = &[
426    "hasDestOutputProfile",
427    "Type",
428    "S",
429    "DestOutputProfile",
430    "OutputConditionIdentifier",
431    "Info",
432];
433const STREAM_PROPERTIES: &[&str] = &[
434    "lengthMatches",
435    "declaredLength",
436    "discoveredLength",
437    "streamKeywordCRLFCompliant",
438    "endstreamKeywordEOLCompliant",
439    "Type",
440    "Subtype",
441    "Filter",
442    "DecodeParms",
443    "F",
444    "FFilter",
445    "FDecodeParms",
446];
447
448const SAFE_FEATURE_STRING_PROPERTIES: &[&str] = &[
449    "BaseFont",
450    "CIDToGIDMap",
451    "CMapName",
452    "Encoding",
453    "FT",
454    "Filter",
455    "S",
456    "Subtype",
457    "Type",
458    "conformance",
459    "conformancePrefix",
460    "header",
461    "partPrefix",
462    "revPrefix",
463];
464
465const EMPTY_LINK_NAMES: &[(&str, &str)] = &[];
466const DOCUMENT_LINKS: &[(&str, &str)] = &[("catalog", "catalog"), ("streams", "stream")];
467const CATALOG_LINKS: &[(&str, &str)] = &[
468    ("metadata", "metadata"),
469    ("pages", "page"),
470    ("outputIntents", "outputIntent"),
471    ("acroForm", "acroForm"),
472    ("structureTreeRoot", "structureTreeRoot"),
473    ("optionalContentProperties", "optionalContentProperties"),
474    ("names", "names"),
475    ("outlines", "outline"),
476    ("destinations", "destination"),
477    ("permissions", "permissions"),
478];
479const PAGE_LINKS: &[(&str, &str)] = &[
480    ("resources", "resource"),
481    ("fonts", "font"),
482    ("annotations", "annotation"),
483    ("contentStreams", "contentStream"),
484];
485
486/// Feature extraction selection.
487#[derive(Clone, Debug, Default, serde::Deserialize, Eq, PartialEq, serde::Serialize)]
488#[non_exhaustive]
489#[serde(rename_all = "camelCase", deny_unknown_fields)]
490pub enum FeatureSelection {
491    /// Do not extract features.
492    #[default]
493    None,
494    /// Extract all built-in feature families.
495    All,
496    /// Extract selected feature families.
497    Families {
498        /// Selected validation-model family names.
499        families: Vec<ObjectTypeName>,
500    },
501}
502
503impl FeatureSelection {
504    /// Returns true when feature extraction is enabled.
505    #[must_use]
506    pub fn is_enabled(&self) -> bool {
507        !matches!(self, Self::None)
508    }
509}
510
511/// Bounded input name used by reader validation.
512#[derive(Clone, Debug, Eq, PartialEq)]
513pub struct InputName(Option<PathBuf>);
514
515impl InputName {
516    /// Creates an in-memory input name.
517    #[must_use]
518    pub fn memory() -> Self {
519        Self(None)
520    }
521
522    /// Creates a filesystem input name.
523    #[must_use]
524    pub fn path(path: impl Into<PathBuf>) -> Self {
525        Self(Some(path.into()))
526    }
527
528    fn summary(&self, kind: InputKind, bytes: Option<u64>) -> InputSummary {
529        InputSummary::new(kind, self.0.clone(), bytes)
530    }
531}
532
533/// Validation facade for parser, profile selection, traversal, and reports.
534#[derive(Clone)]
535pub struct Validator {
536    options: ValidationOptions,
537    profiles: Arc<dyn ProfileRepository + Send + Sync>,
538}
539
540impl std::fmt::Debug for Validator {
541    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
542        formatter
543            .debug_struct("Validator")
544            .field("options", &self.options)
545            .finish_non_exhaustive()
546    }
547}
548
549impl Validator {
550    /// Creates a validator with the built-in profile repository.
551    ///
552    /// # Errors
553    ///
554    /// Returns [`PdfvError`] if profile selection for the supplied options is invalid.
555    pub fn new(options: ValidationOptions) -> Result<Self> {
556        let validator = Self {
557            options,
558            profiles: Arc::new(BuiltinProfileRepository::new()),
559        };
560        validator
561            .profiles
562            .profiles_for(&validator.options.flavour)?;
563        validate_feature_configuration(&validator.options)?;
564        Ok(validator)
565    }
566
567    /// Creates a validator with an explicit profile repository.
568    ///
569    /// # Errors
570    ///
571    /// Returns [`PdfvError`] if profile selection for the supplied options is invalid.
572    pub fn with_profiles(
573        options: ValidationOptions,
574        profiles: Arc<dyn ProfileRepository + Send + Sync>,
575    ) -> Result<Self> {
576        let validator = Self { options, profiles };
577        validator
578            .profiles
579            .profiles_for(&validator.options.flavour)?;
580        validate_feature_configuration(&validator.options)?;
581        Ok(validator)
582    }
583
584    /// Validates a PDF file from disk.
585    ///
586    /// # Errors
587    ///
588    /// Returns [`PdfvError`] for I/O failures or validation engine failures.
589    #[allow(
590        clippy::disallowed_types,
591        reason = "core validation is synchronous per spec; async file I/O belongs to the CLI phase"
592    )]
593    pub fn validate_path(&self, path: impl AsRef<Path>) -> Result<ValidationReport> {
594        let path = path.as_ref();
595        let file = std::fs::File::open(path).map_err(|source| PdfvError::Io {
596            path: Some(path.to_path_buf()),
597            source,
598        })?;
599        let name = InputName::path(path);
600        self.validate_reader_with_kind(file, &name, InputKind::File)
601    }
602
603    /// Validates a seekable PDF reader.
604    ///
605    /// # Errors
606    ///
607    /// Returns [`PdfvError`] for I/O failures or validation engine failures.
608    #[allow(
609        clippy::needless_pass_by_value,
610        reason = "public API owns InputName to match the validation facade contract"
611    )]
612    pub fn validate_reader<R: Read + Seek>(
613        &self,
614        source: R,
615        name: InputName,
616    ) -> Result<ValidationReport> {
617        self.validate_reader_with_kind(source, &name, InputKind::Memory)
618    }
619
620    #[allow(
621        clippy::too_many_lines,
622        reason = "the facade keeps parse, validation, feature, and policy task ordering in one \
623                  place so report construction remains auditable"
624    )]
625    fn validate_reader_with_kind<R: Read + Seek>(
626        &self,
627        mut source: R,
628        name: &InputName,
629        kind: InputKind,
630    ) -> Result<ValidationReport> {
631        let started = Instant::now();
632        let bytes = reader_len(&mut source)?;
633        source
634            .rewind()
635            .map_err(|source| PdfvError::Io { path: None, source })?;
636        let source_summary = name.summary(kind, bytes);
637        let parser = Parser::new(self.options.resource_limits.clone());
638        let parsed = match parser.parse_with_options(
639            source,
640            crate::ParseOptions {
641                password: self.options.password.as_ref(),
642            },
643        ) {
644            Ok(parsed) => parsed,
645            Err(PdfvError::Parse(error)) => {
646                return parse_failed_report(source_summary, &error, started.elapsed());
647            }
648            Err(error) => return Err(error),
649        };
650
651        let mut parsed = parsed;
652        if parsed.is_encrypted() {
653            let xmp = parse_document_xmp(&parsed, &self.options.resource_limits, false)?;
654            parsed.parse_facts.extend(xmp.parse_facts);
655            parsed.warnings.extend(xmp.warnings);
656            return base_report(
657                source_summary,
658                ValidationStatus::Encrypted,
659                Vec::new(),
660                parsed,
661                started.elapsed(),
662            );
663        }
664
665        let profiles = match &self.options.flavour {
666            crate::FlavourSelection::Auto { default } => {
667                let detected = FlavourDetector::new(Arc::clone(&self.profiles)).detect(
668                    &parsed,
669                    default.as_ref(),
670                    &self.options.resource_limits,
671                )?;
672                parsed.parse_facts.extend(detected.parse_facts);
673                parsed.warnings.extend(detected.warnings);
674                detected.profiles
675            }
676            crate::FlavourSelection::Explicit { .. }
677            | crate::FlavourSelection::CustomProfile { .. } => {
678                let xmp = parse_document_xmp(&parsed, &self.options.resource_limits, false)?;
679                parsed.parse_facts.extend(xmp.parse_facts);
680                parsed.warnings.extend(xmp.warnings);
681                self.profiles.profiles_for(&self.options.flavour)?
682            }
683        };
684        if profiles.is_empty() {
685            return base_report(
686                source_summary,
687                ValidationStatus::Incomplete,
688                Vec::new(),
689                parsed,
690                started.elapsed(),
691            );
692        }
693        let mut session = ValidationSession::new(
694            parsed,
695            self.options.resource_limits.clone(),
696            self.options.max_failed_assertions_per_rule.get(),
697            self.options.record_passed_assertions,
698        );
699        let mut profile_reports = Vec::with_capacity(profiles.len());
700        for profile in &profiles {
701            profile_reports.push(session.validate_profile(profile)?);
702        }
703        let mut status = if profile_reports
704            .iter()
705            .any(|report| !report.unsupported_rules.is_empty())
706        {
707            ValidationStatus::Incomplete
708        } else if profile_reports.iter().all(|report| report.is_compliant) {
709            ValidationStatus::Valid
710        } else {
711            ValidationStatus::Invalid
712        };
713        let flavours = profiles
714            .iter()
715            .map(|profile| profile.flavour.clone())
716            .collect::<Vec<_>>();
717
718        let needs_features =
719            self.options.feature_selection.is_enabled() || self.options.policy.is_some();
720        let feature_started = Instant::now();
721        let feature_report = if needs_features {
722            Some(session.extract_features(&self.options.feature_selection)?)
723        } else {
724            None
725        };
726        let feature_duration = needs_features.then(|| {
727            TaskDuration::from_duration(
728                Identifier::unchecked("featureExtraction"),
729                feature_started.elapsed(),
730            )
731        });
732        let policy_started = Instant::now();
733        let policy_report = match (&self.options.policy, feature_report.as_ref()) {
734            (Some(policy), Some(features)) => {
735                policy.validate()?;
736                let report = evaluate_policy(policy, features)?;
737                if !report.is_compliant && matches!(status, ValidationStatus::Valid) {
738                    status = ValidationStatus::Invalid;
739                }
740                Some(report)
741            }
742            (Some(_), None) => {
743                return Err(crate::PolicyError::Evaluation {
744                    reason: BoundedText::unchecked("policy evaluation requires feature report"),
745                }
746                .into());
747            }
748            (None, _) => None,
749        };
750        let policy_duration = self.options.policy.is_some().then(|| {
751            TaskDuration::from_duration(Identifier::unchecked("policy"), policy_started.elapsed())
752        });
753        let parse_facts = session.document.parse_facts.clone();
754        let warnings = session.document.warnings.clone();
755        let mut task_durations = vec![TaskDuration::from_duration(
756            Identifier::new("validate")?,
757            started.elapsed(),
758        )];
759        if let Some(duration) = feature_duration {
760            task_durations.push(duration);
761        }
762        if let Some(duration) = policy_duration {
763            task_durations.push(duration);
764        }
765        Ok(ValidationReport::builder()
766            .engine_version(ENGINE_VERSION.to_owned())
767            .source(source_summary)
768            .status(status)
769            .flavours(flavours)
770            .profile_reports(profile_reports)
771            .parse_facts(parse_facts)
772            .warnings(warnings)
773            .feature_report(feature_report)
774            .policy_report(policy_report)
775            .task_durations(task_durations)
776            .build())
777    }
778}
779
780/// Mutable validation state for one input.
781#[derive(Debug)]
782pub struct ValidationSession {
783    document: ParsedDocument,
784    limits: ResourceLimits,
785    max_failed_assertions_per_rule: u32,
786    record_passed_assertions: bool,
787}
788
789impl ValidationSession {
790    fn new(
791        document: ParsedDocument,
792        limits: ResourceLimits,
793        max_failed_assertions_per_rule: u32,
794        record_passed_assertions: bool,
795    ) -> Self {
796        Self {
797            document,
798            limits,
799            max_failed_assertions_per_rule,
800            record_passed_assertions,
801        }
802    }
803
804    fn validate_profile(&mut self, profile: &crate::ValidationProfile) -> Result<ProfileReport> {
805        let index = RuleIndex::new(&profile.rules);
806        let graph = ModelGraph::for_rules(&self.document, &self.limits, &profile.rules);
807        let mut evaluator = DefaultRuleEvaluator::new(self.limits.clone());
808        let mut state = ProfileState::new(
809            profile.identity.clone(),
810            self.max_failed_assertions_per_rule,
811            self.record_passed_assertions,
812        );
813        state.register_static_unsupported_rules(&profile.rules);
814        let mut stack = Vec::from([ModelObjectRef::Document(DocumentModel::new(&self.document))]);
815        let mut visited = HashSet::new();
816        let mut deferred = Vec::new();
817
818        while let Some(object) = stack.pop() {
819            let visited_key = object.identity_key();
820            if !visited.insert(visited_key) {
821                continue;
822            }
823            let object_rules = index.rules_for(&object);
824            for rule in object_rules {
825                if matches!(rule.test, crate::RuleExpr::Unsupported { .. }) {
826                    continue;
827                }
828                if rule.deferred {
829                    deferred.push((object.clone(), rule));
830                } else {
831                    state.apply_rule(&object, rule, &mut evaluator)?;
832                }
833            }
834            if u64::try_from(visited.len()).map_err(|_| ValidationError::LimitExceeded {
835                limit: "max_objects",
836            })? > self.limits.max_objects
837            {
838                return Err(ValidationError::LimitExceeded {
839                    limit: "max_objects",
840                }
841                .into());
842            }
843            let object_budget = remaining_object_budget(&self.limits, visited.len(), stack.len())?;
844            for linked in object.linked_objects(&graph, object_budget)? {
845                stack.push(linked);
846            }
847        }
848        for (object, rule) in deferred {
849            state.apply_rule(&object, rule, &mut evaluator)?;
850        }
851        Ok(state.finish())
852    }
853
854    fn extract_features(&self, selection: &FeatureSelection) -> Result<FeatureReport> {
855        let registry = ModelRegistry::default_registry();
856        let selected = selected_feature_families(selection, &registry)?;
857        let graph = ModelGraph::with_all_families(&self.document, &self.limits);
858        let mut stack = Vec::from([ModelObjectRef::Document(DocumentModel::new(&self.document))]);
859        let mut visited = HashSet::new();
860        let mut objects = Vec::new();
861        let mut truncated = false;
862
863        while let Some(object) = stack.pop() {
864            let visited_key = object.identity_key();
865            if !visited.insert(visited_key) {
866                continue;
867            }
868            let object_type = object.object_type();
869            if selected.contains(&object_type)
870                && let Some(feature) = feature_object(&registry, &object, &object_type)?
871            {
872                objects.push(feature);
873            }
874            if u64::try_from(visited.len()).map_err(|_| ValidationError::LimitExceeded {
875                limit: "max_objects",
876            })? > self.limits.max_objects
877            {
878                truncated = true;
879                break;
880            }
881            let object_budget =
882                match remaining_object_budget(&self.limits, visited.len(), stack.len()) {
883                    Ok(budget) => budget,
884                    Err(error) if is_object_limit_error(&error) => {
885                        truncated = true;
886                        break;
887                    }
888                    Err(error) => return Err(error),
889                };
890            let linked_objects = match object.linked_objects(&graph, object_budget) {
891                Ok(objects) => objects,
892                Err(error) if is_object_limit_error(&error) => {
893                    truncated = true;
894                    break;
895                }
896                Err(error) => return Err(error),
897            };
898            for linked in linked_objects {
899                stack.push(linked);
900            }
901        }
902        let visited_objects = u64::try_from(visited.len()).unwrap_or(u64::MAX);
903        Ok(FeatureReport::builder()
904            .objects(objects)
905            .visited_objects(visited_objects)
906            .selected_families(selected.into_iter().collect())
907            .truncated(truncated)
908            .build())
909    }
910}
911
912fn selected_feature_families(
913    selection: &FeatureSelection,
914    registry: &ModelRegistry,
915) -> Result<BTreeSet<ObjectTypeName>> {
916    match selection {
917        FeatureSelection::None | FeatureSelection::All => {
918            Ok(registry.family_names().cloned().collect())
919        }
920        FeatureSelection::Families { families } => {
921            let mut selected = BTreeSet::new();
922            for family in families {
923                if !registry.has_family(family) {
924                    return Err(crate::ConfigError::InvalidValue {
925                        field: "extract",
926                        reason: BoundedText::unchecked("unknown feature family"),
927                    }
928                    .into());
929                }
930                selected.insert(family.clone());
931            }
932            Ok(selected)
933        }
934    }
935}
936
937fn is_object_limit_error(error: &PdfvError) -> bool {
938    matches!(
939        error,
940        PdfvError::Validation(ValidationError::LimitExceeded {
941            limit: "max_objects"
942        })
943    )
944}
945
946fn validate_feature_configuration(options: &ValidationOptions) -> Result<()> {
947    let registry = ModelRegistry::default_registry();
948    let _selected = selected_feature_families(&options.feature_selection, &registry)?;
949    if let Some(policy) = &options.policy {
950        policy.validate()?;
951        validate_policy_schema(policy, &registry)?;
952    }
953    Ok(())
954}
955
956fn validate_policy_schema(policy: &PolicySet, registry: &ModelRegistry) -> Result<()> {
957    for rule in &policy.rules {
958        if !registry.has_family(&rule.family) {
959            return Err(policy_invalid("family", "unknown policy feature family"));
960        }
961        if !registry.has_family_property(&rule.family, &rule.field) {
962            return Err(policy_invalid(
963                "field",
964                "unknown policy feature field for family",
965            ));
966        }
967        match rule.operator {
968            PolicyOperator::Exists | PolicyOperator::Absent => {
969                if rule.value.is_some() {
970                    return Err(policy_invalid(
971                        "value",
972                        "exists and absent operators do not accept values",
973                    ));
974                }
975            }
976            PolicyOperator::Equals | PolicyOperator::NotEquals => {
977                if rule.value.is_none() {
978                    return Err(policy_invalid(
979                        "value",
980                        "comparison operator requires a value",
981                    ));
982                }
983            }
984            PolicyOperator::Min | PolicyOperator::Max => {
985                if !matches!(rule.value, Some(PolicyValue::Number(_))) {
986                    return Err(policy_invalid(
987                        "value",
988                        "numeric operator requires a number value",
989                    ));
990                }
991            }
992        }
993    }
994    Ok(())
995}
996
997fn policy_invalid(field: &'static str, reason: &'static str) -> PdfvError {
998    crate::PolicyError::InvalidField {
999        field,
1000        reason: BoundedText::unchecked(reason),
1001    }
1002    .into()
1003}
1004
1005fn feature_object(
1006    registry: &ModelRegistry,
1007    object: &ModelObjectRef<'_>,
1008    object_type: &ObjectTypeName,
1009) -> Result<Option<FeatureObject>> {
1010    let Some(properties) = registry.family_property_names(object_type) else {
1011        return Ok(None);
1012    };
1013    let mut values = BTreeMap::new();
1014    for property in properties {
1015        match object.property(&property) {
1016            Ok(value) => {
1017                values.insert(property.clone(), safe_feature_value(&property, value));
1018            }
1019            Err(PdfvError::Profile(crate::ProfileError::UnknownProperty { .. })) => {}
1020            Err(error) => return Err(error),
1021        }
1022    }
1023    Ok(Some(
1024        FeatureObject::builder()
1025            .family(object_type.clone())
1026            .location(object.location())
1027            .context(object.context())
1028            .properties(values)
1029            .build(),
1030    ))
1031}
1032
1033impl From<ModelValue> for FeatureValue {
1034    fn from(value: ModelValue) -> Self {
1035        match value {
1036            ModelValue::Null => Self::Null,
1037            ModelValue::Bool(value) => Self::Bool(value),
1038            ModelValue::Number(value) => Self::Number(value),
1039            ModelValue::String(value) => Self::String(value),
1040            ModelValue::ObjectKey(value) => Self::ObjectKey(value),
1041            ModelValue::List(values) => {
1042                Self::List(values.into_iter().map(FeatureValue::from).collect())
1043            }
1044        }
1045    }
1046}
1047
1048fn safe_feature_value(property: &PropertyName, value: ModelValue) -> FeatureValue {
1049    match value {
1050        ModelValue::String(value)
1051            if !SAFE_FEATURE_STRING_PROPERTIES.contains(&property.as_str()) =>
1052        {
1053            FeatureValue::RedactedString {
1054                bytes: u64::try_from(value.as_str().len()).unwrap_or(u64::MAX),
1055            }
1056        }
1057        ModelValue::List(values) => FeatureValue::List(
1058            values
1059                .into_iter()
1060                .map(|value| safe_feature_value(property, value))
1061                .collect(),
1062        ),
1063        other => FeatureValue::from(other),
1064    }
1065}
1066
1067fn evaluate_policy(policy: &PolicySet, features: &FeatureReport) -> Result<PolicyReport> {
1068    let results = policy
1069        .rules
1070        .iter()
1071        .map(|rule| evaluate_policy_rule(rule, features))
1072        .collect::<Result<Vec<_>>>()?;
1073    let is_compliant = results.iter().all(|result| result.passed);
1074    Ok(PolicyReport::builder()
1075        .name(policy.name.clone())
1076        .is_compliant(is_compliant)
1077        .results(results)
1078        .build())
1079}
1080
1081fn evaluate_policy_rule(rule: &PolicyRule, features: &FeatureReport) -> Result<PolicyRuleResult> {
1082    let matches = features
1083        .objects
1084        .iter()
1085        .filter(|object| object.family == rule.family)
1086        .collect::<Vec<_>>();
1087    let values = matches
1088        .iter()
1089        .filter_map(|object| object.properties.get(&rule.field))
1090        .collect::<Vec<_>>();
1091    let passed = match rule.operator {
1092        PolicyOperator::Exists => !values.is_empty(),
1093        PolicyOperator::Absent => values.is_empty(),
1094        PolicyOperator::Equals => {
1095            let expected = required_policy_value(rule)?;
1096            values
1097                .iter()
1098                .any(|actual| policy_value_matches(actual, expected))
1099        }
1100        PolicyOperator::NotEquals => {
1101            let expected = required_policy_value(rule)?;
1102            values
1103                .iter()
1104                .all(|actual| !policy_value_matches(actual, expected))
1105        }
1106        PolicyOperator::Min => {
1107            let expected = required_policy_number(rule)?;
1108            values
1109                .iter()
1110                .filter_map(|value| feature_number(value))
1111                .any(|actual| actual >= expected)
1112        }
1113        PolicyOperator::Max => {
1114            let expected = required_policy_number(rule)?;
1115            values
1116                .iter()
1117                .filter_map(|value| feature_number(value))
1118                .any(|actual| actual <= expected)
1119        }
1120    };
1121    let matches = u64::try_from(matches.len()).unwrap_or(u64::MAX);
1122    Ok(PolicyRuleResult::builder()
1123        .id(rule.id.clone())
1124        .description(rule.description.clone())
1125        .passed(passed)
1126        .matches(matches)
1127        .message(policy_message(rule, passed, matches)?)
1128        .build())
1129}
1130
1131fn required_policy_value(rule: &PolicyRule) -> Result<&PolicyValue> {
1132    rule.value.as_ref().ok_or_else(|| {
1133        crate::PolicyError::InvalidField {
1134            field: "value",
1135            reason: BoundedText::unchecked("operator requires a comparison value"),
1136        }
1137        .into()
1138    })
1139}
1140
1141fn required_policy_number(rule: &PolicyRule) -> Result<f64> {
1142    match required_policy_value(rule)? {
1143        PolicyValue::Number(value) => Ok(f64::from(*value)),
1144        _ => Err(crate::PolicyError::InvalidField {
1145            field: "value",
1146            reason: BoundedText::unchecked("operator requires a numeric comparison value"),
1147        }
1148        .into()),
1149    }
1150}
1151
1152fn policy_value_matches(actual: &FeatureValue, expected: &PolicyValue) -> bool {
1153    match (actual, expected) {
1154        (FeatureValue::Bool(actual), PolicyValue::Bool(expected)) => actual == expected,
1155        (FeatureValue::Number(actual), PolicyValue::Number(expected)) => {
1156            (*actual - f64::from(*expected)).abs() < f64::EPSILON
1157        }
1158        (FeatureValue::String(actual), PolicyValue::String(expected)) => actual == expected,
1159        _ => false,
1160    }
1161}
1162
1163fn feature_number(value: &FeatureValue) -> Option<f64> {
1164    match value {
1165        FeatureValue::Number(value) if value.is_finite() => Some(*value),
1166        _ => None,
1167    }
1168}
1169
1170fn policy_message(
1171    rule: &PolicyRule,
1172    passed: bool,
1173    matches: u64,
1174) -> std::result::Result<BoundedText, crate::ConfigError> {
1175    let status = if passed { "passed" } else { "failed" };
1176    BoundedText::new(
1177        format!(
1178            "policy rule {} {status} with {matches} matching feature objects",
1179            rule.id.as_str()
1180        ),
1181        256,
1182    )
1183}
1184
1185/// Property exposed by a validation model family.
1186#[derive(Clone, Debug, Eq, PartialEq)]
1187pub(crate) struct PropertySpec {
1188    /// Property name.
1189    pub name: PropertyName,
1190}
1191
1192impl PropertySpec {
1193    fn new(name: &str) -> Self {
1194        Self {
1195            name: PropertyName::unchecked(name),
1196        }
1197    }
1198}
1199
1200/// Link exposed by a validation model family.
1201#[derive(Clone, Debug, Eq, PartialEq)]
1202pub(crate) struct LinkSpec {
1203    /// Link name.
1204    pub name: LinkName,
1205    /// Target model family.
1206    pub target: ObjectTypeName,
1207}
1208
1209impl LinkSpec {
1210    fn new(name: &'static str, target: &'static str) -> Self {
1211        Self {
1212            name: LinkName(Identifier::unchecked(name)),
1213            target: ObjectTypeName::unchecked(target),
1214        }
1215    }
1216}
1217
1218/// Validation model family schema entry.
1219pub(crate) trait ModelFamily {
1220    /// Family name.
1221    fn family_name(&self) -> ObjectTypeName;
1222    /// Properties allowed on this family.
1223    fn property_schema(&self) -> &[PropertySpec];
1224    /// Links allowed on this family.
1225    fn link_schema(&self) -> &[LinkSpec];
1226}
1227
1228/// Internal registry of validation model family schemas.
1229#[derive(Clone)]
1230pub(crate) struct ModelRegistry {
1231    families: BTreeMap<ObjectTypeName, Arc<dyn ModelFamily + Send + Sync>>,
1232    all_properties: BTreeSet<PropertyName>,
1233}
1234
1235impl std::fmt::Debug for ModelRegistry {
1236    fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1237        formatter
1238            .debug_struct("ModelRegistry")
1239            .field("families", &self.families.keys().collect::<Vec<_>>())
1240            .field("all_properties_len", &self.all_properties.len())
1241            .finish()
1242    }
1243}
1244
1245impl ModelRegistry {
1246    /// Builds the default internal registry.
1247    #[must_use]
1248    pub(crate) fn default_registry() -> Self {
1249        let families = [
1250            family("document", DOCUMENT_PROPERTIES, DOCUMENT_LINKS),
1251            family("catalog", CATALOG_PROPERTIES, CATALOG_LINKS),
1252            family("metadata", METADATA_PROPERTIES, EMPTY_LINK_NAMES),
1253            family("page", PAGE_PROPERTIES, PAGE_LINKS),
1254            family("pageTree", PAGE_TREE_PROPERTIES, EMPTY_LINK_NAMES),
1255            family("resource", RESOURCE_PROPERTIES, EMPTY_LINK_NAMES),
1256            family("names", NAMES_PROPERTIES, EMPTY_LINK_NAMES),
1257            family("outline", OUTLINE_PROPERTIES, EMPTY_LINK_NAMES),
1258            family("destination", DESTINATION_PROPERTIES, EMPTY_LINK_NAMES),
1259            family("acroForm", ACRO_FORM_PROPERTIES, EMPTY_LINK_NAMES),
1260            family(
1261                "optionalContentProperties",
1262                OPTIONAL_CONTENT_PROPERTIES,
1263                EMPTY_LINK_NAMES,
1264            ),
1265            family("permissions", PERMISSIONS_PROPERTIES, EMPTY_LINK_NAMES),
1266            family("font", FONT_PROPERTIES, EMPTY_LINK_NAMES),
1267            family("cMap", CMAP_PROPERTIES, EMPTY_LINK_NAMES),
1268            family("embeddedFontFile", STREAM_PROPERTIES, EMPTY_LINK_NAMES),
1269            family("image", IMAGE_PROPERTIES, EMPTY_LINK_NAMES),
1270            family("xObject", XOBJECT_PROPERTIES, EMPTY_LINK_NAMES),
1271            family("contentStream", CONTENT_STREAM_PROPERTIES, EMPTY_LINK_NAMES),
1272            family(
1273                "undefinedOperator",
1274                UNDEFINED_OPERATOR_PROPERTIES,
1275                EMPTY_LINK_NAMES,
1276            ),
1277            family("annotation", ANNOTATION_PROPERTIES, EMPTY_LINK_NAMES),
1278            family("action", ACTION_PROPERTIES, EMPTY_LINK_NAMES),
1279            family("formField", FORM_FIELD_PROPERTIES, EMPTY_LINK_NAMES),
1280            family("colorSpace", COLOR_SPACE_PROPERTIES, EMPTY_LINK_NAMES),
1281            family("extGState", EXT_GSTATE_PROPERTIES, EMPTY_LINK_NAMES),
1282            family("structureTreeRoot", STRUCTURE_PROPERTIES, EMPTY_LINK_NAMES),
1283            family(
1284                "structureElement",
1285                STRUCTURE_ELEMENT_PROPERTIES,
1286                EMPTY_LINK_NAMES,
1287            ),
1288            family("signature", SIGNATURE_PROPERTIES, EMPTY_LINK_NAMES),
1289            family("security", SECURITY_PROPERTIES, EMPTY_LINK_NAMES),
1290            family("outputIntent", OUTPUT_INTENT_PROPERTIES, EMPTY_LINK_NAMES),
1291            family("stream", STREAM_PROPERTIES, EMPTY_LINK_NAMES),
1292            family("object", OBJECT_PROPERTIES, EMPTY_LINK_NAMES),
1293        ];
1294        let mut by_name: BTreeMap<ObjectTypeName, Arc<dyn ModelFamily + Send + Sync>> =
1295            BTreeMap::new();
1296        let mut all_properties = BTreeSet::new();
1297        for family in families {
1298            for property in family.property_schema() {
1299                all_properties.insert(property.name.clone());
1300            }
1301            by_name.insert(family.family_name(), Arc::new(family) as Arc<_>);
1302        }
1303        for family in by_name.values() {
1304            for link in family.link_schema() {
1305                debug_assert!(
1306                    by_name.contains_key(&link.target),
1307                    "model registry link target is not registered"
1308                );
1309            }
1310        }
1311        for property in DIRECT_PROPERTY_NAMES {
1312            all_properties.insert(PropertyName::unchecked(*property));
1313        }
1314        Self {
1315            families: by_name,
1316            all_properties,
1317        }
1318    }
1319
1320    /// Returns true when a family is registered.
1321    #[must_use]
1322    pub(crate) fn has_family(&self, family: &ObjectTypeName) -> bool {
1323        self.families.contains_key(family)
1324    }
1325
1326    /// Returns true when a property is present on a specific registered family schema.
1327    #[must_use]
1328    pub(crate) fn has_family_property(
1329        &self,
1330        family: &ObjectTypeName,
1331        property: &PropertyName,
1332    ) -> bool {
1333        self.families.get(family).is_some_and(|family| {
1334            family
1335                .property_schema()
1336                .iter()
1337                .any(|spec| spec.name == *property)
1338        })
1339    }
1340
1341    fn family_property_names(&self, family: &ObjectTypeName) -> Option<Vec<PropertyName>> {
1342        self.families.get(family).map(|family| {
1343            family
1344                .property_schema()
1345                .iter()
1346                .map(|property| property.name.clone())
1347                .collect()
1348        })
1349    }
1350
1351    /// Iterates registered family names.
1352    pub(crate) fn family_names(&self) -> impl Iterator<Item = &ObjectTypeName> {
1353        self.families.keys()
1354    }
1355}
1356
1357#[derive(Debug)]
1358struct StaticModelFamily {
1359    name: ObjectTypeName,
1360    properties: Vec<PropertySpec>,
1361    links: Vec<LinkSpec>,
1362}
1363
1364impl ModelFamily for StaticModelFamily {
1365    fn family_name(&self) -> ObjectTypeName {
1366        self.name.clone()
1367    }
1368
1369    fn property_schema(&self) -> &[PropertySpec] {
1370        &self.properties
1371    }
1372
1373    fn link_schema(&self) -> &[LinkSpec] {
1374        &self.links
1375    }
1376}
1377
1378fn family(
1379    name: &'static str,
1380    properties: &'static [&'static str],
1381    links: &'static [(&'static str, &'static str)],
1382) -> StaticModelFamily {
1383    StaticModelFamily {
1384        name: ObjectTypeName::unchecked(name),
1385        properties: properties
1386            .iter()
1387            .map(|name| PropertySpec::new(name))
1388            .collect(),
1389        links: links
1390            .iter()
1391            .map(|(name, target)| LinkSpec::new(name, target))
1392            .collect(),
1393    }
1394}
1395
1396/// Stable object identity used by traversal.
1397#[derive(Clone, Debug, Eq, Hash, PartialEq)]
1398pub struct ObjectIdentity {
1399    key: String,
1400}
1401
1402/// Validation model link name.
1403#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1404pub struct LinkName(Identifier);
1405
1406impl LinkName {
1407    /// Creates a link name.
1408    ///
1409    /// # Errors
1410    ///
1411    /// Returns [`crate::ConfigError`] when the identifier violates policy.
1412    pub fn new(value: impl Into<String>) -> std::result::Result<Self, crate::ConfigError> {
1413        Ok(Self(Identifier::new(value)?))
1414    }
1415}
1416
1417/// Validation model object.
1418pub trait ModelObject {
1419    /// Optional stable object identity.
1420    fn id(&self) -> Option<ObjectIdentity>;
1421    /// Concrete object type.
1422    fn object_type(&self) -> ObjectTypeName;
1423    /// Supertype names.
1424    fn super_types(&self) -> &[ObjectTypeName];
1425    /// Extra diagnostic context.
1426    fn extra_context(&self) -> Option<&str>;
1427    /// Looks up a property.
1428    ///
1429    /// # Errors
1430    ///
1431    /// Returns [`PdfvError`] when the property is unknown or cannot be materialized.
1432    fn property(&self, name: &PropertyName) -> Result<ModelValue>;
1433    /// Link names exposed by this object.
1434    fn links(&self) -> &[LinkName];
1435    /// Resolves linked model objects.
1436    ///
1437    /// # Errors
1438    ///
1439    /// Returns [`PdfvError`] when link materialization fails.
1440    fn linked_objects<'a>(
1441        &self,
1442        graph: &ModelGraph<'a>,
1443        max_objects: usize,
1444    ) -> Result<Vec<ModelObjectRef<'a>>>;
1445}
1446
1447/// Validation model object reference.
1448#[derive(Clone, Debug)]
1449pub enum ModelObjectRef<'a> {
1450    /// Document root object.
1451    Document(DocumentModel<'a>),
1452    /// Catalog object.
1453    Catalog(CatalogModel<'a>),
1454    /// Metadata stream object.
1455    Metadata(MetadataModel<'a>),
1456    /// Page dictionary object.
1457    Page(PageModel<'a>),
1458    /// Font dictionary object.
1459    Font(FontModel<'a>),
1460    /// Annotation dictionary object.
1461    Annotation(AnnotationModel<'a>),
1462    /// Output intent dictionary object.
1463    OutputIntent(OutputIntentModel<'a>),
1464    /// Page content stream object.
1465    ContentStream(ContentStreamModel<'a>),
1466    /// Basic stream object.
1467    Stream(StreamModel<'a>),
1468    /// Generic dictionary-backed model family object.
1469    Generic(GenericModel<'a>),
1470}
1471
1472impl<'a> ModelObjectRef<'a> {
1473    /// Returns the parsed document backing this model object.
1474    #[must_use]
1475    pub fn document(&self) -> &'a ParsedDocument {
1476        match self {
1477            Self::Document(model) => model.document,
1478            Self::Catalog(model) => model.document,
1479            Self::Metadata(model) => model.document,
1480            Self::Page(model) => model.document,
1481            Self::Font(model) => model.document,
1482            Self::Annotation(model) => model.document,
1483            Self::OutputIntent(model) => model.document,
1484            Self::ContentStream(model) => model.document,
1485            Self::Stream(model) => model.document,
1486            Self::Generic(model) => model.document,
1487        }
1488    }
1489
1490    /// Returns this object's type.
1491    #[must_use]
1492    pub fn object_type(&self) -> ObjectTypeName {
1493        match self {
1494            Self::Document(model) => model.object_type(),
1495            Self::Catalog(model) => model.object_type(),
1496            Self::Metadata(model) => model.object_type(),
1497            Self::Page(model) => model.object_type(),
1498            Self::Font(model) => model.object_type(),
1499            Self::Annotation(model) => model.object_type(),
1500            Self::OutputIntent(model) => model.object_type(),
1501            Self::ContentStream(model) => model.object_type(),
1502            Self::Stream(model) => model.object_type(),
1503            Self::Generic(model) => model.object_type(),
1504        }
1505    }
1506
1507    /// Looks up a property.
1508    ///
1509    /// # Errors
1510    ///
1511    /// Returns [`PdfvError`] when the property is unknown.
1512    pub fn property(&self, name: &PropertyName) -> Result<ModelValue> {
1513        match self {
1514            Self::Document(model) => model.property(name),
1515            Self::Catalog(model) => model.property(name),
1516            Self::Metadata(model) => model.property(name),
1517            Self::Page(model) => model.property(name),
1518            Self::Font(model) => model.property(name),
1519            Self::Annotation(model) => model.property(name),
1520            Self::OutputIntent(model) => model.property(name),
1521            Self::ContentStream(model) => model.property(name),
1522            Self::Stream(model) => model.property(name),
1523            Self::Generic(model) => model.property(name),
1524        }
1525    }
1526
1527    fn location(&self) -> ObjectLocation {
1528        match self {
1529            Self::Document(_) => ObjectLocation {
1530                object: None,
1531                offset: None,
1532                path: Some(BoundedText::unchecked("root")),
1533            },
1534            Self::Catalog(model) => ObjectLocation {
1535                object: Some(model.key),
1536                offset: Some(model.offset),
1537                path: Some(BoundedText::unchecked("root/catalog[0]")),
1538            },
1539            Self::Metadata(model) => ObjectLocation {
1540                object: Some(model.key),
1541                offset: Some(model.offset),
1542                path: Some(BoundedText::unchecked("root/catalog[0]/metadata[0]")),
1543            },
1544            Self::Page(model) => ObjectLocation {
1545                object: Some(model.key),
1546                offset: Some(model.offset),
1547                path: Some(BoundedText::unchecked(format!(
1548                    "root/page[{}]",
1549                    model.ordinal
1550                ))),
1551            },
1552            Self::Font(model) => ObjectLocation {
1553                object: model.key,
1554                offset: model.offset,
1555                path: Some(BoundedText::unchecked(format!(
1556                    "root/page[{}]/font[{}]",
1557                    model.page_ordinal,
1558                    String::from_utf8_lossy(model.name.as_bytes())
1559                ))),
1560            },
1561            Self::Annotation(model) => ObjectLocation {
1562                object: model.key,
1563                offset: model.offset,
1564                path: Some(BoundedText::unchecked(format!(
1565                    "root/page[{}]/annotation[{}]",
1566                    model.page_ordinal, model.ordinal
1567                ))),
1568            },
1569            Self::OutputIntent(model) => ObjectLocation {
1570                object: model.key,
1571                offset: model.offset,
1572                path: Some(BoundedText::unchecked(format!(
1573                    "root/catalog[0]/outputIntent[{}]",
1574                    model.ordinal
1575                ))),
1576            },
1577            Self::ContentStream(model) => ObjectLocation {
1578                object: Some(model.key),
1579                offset: Some(model.offset),
1580                path: Some(BoundedText::unchecked(format!(
1581                    "root/page[{}]/contentStream[{}]",
1582                    model.page_ordinal, model.ordinal
1583                ))),
1584            },
1585            Self::Stream(model) => ObjectLocation {
1586                object: Some(model.key),
1587                offset: Some(model.offset),
1588                path: Some(BoundedText::unchecked(format!(
1589                    "root/stream[{}]",
1590                    model.key.number
1591                ))),
1592            },
1593            Self::Generic(model) => ObjectLocation {
1594                object: model.key,
1595                offset: model.offset,
1596                path: Some(BoundedText::unchecked(model.context.clone())),
1597            },
1598        }
1599    }
1600
1601    fn context(&self) -> BoundedText {
1602        match self {
1603            Self::Document(_) => BoundedText::unchecked("root"),
1604            Self::Catalog(_) => BoundedText::unchecked("root/catalog[0]"),
1605            Self::Metadata(_) => BoundedText::unchecked("root/catalog[0]/metadata[0]"),
1606            Self::Page(model) => BoundedText::unchecked(format!("root/page[{}]", model.ordinal)),
1607            Self::Font(model) => BoundedText::unchecked(format!(
1608                "root/page[{}]/font[{}]",
1609                model.page_ordinal,
1610                String::from_utf8_lossy(model.name.as_bytes())
1611            )),
1612            Self::Annotation(model) => BoundedText::unchecked(format!(
1613                "root/page[{}]/annotation[{}]",
1614                model.page_ordinal, model.ordinal
1615            )),
1616            Self::OutputIntent(model) => {
1617                BoundedText::unchecked(format!("root/catalog[0]/outputIntent[{}]", model.ordinal))
1618            }
1619            Self::ContentStream(model) => BoundedText::unchecked(format!(
1620                "root/page[{}]/contentStream[{}]",
1621                model.page_ordinal, model.ordinal
1622            )),
1623            Self::Stream(model) => {
1624                BoundedText::unchecked(format!("root/stream[{}]", model.key.number))
1625            }
1626            Self::Generic(model) => BoundedText::unchecked(model.context.clone()),
1627        }
1628    }
1629
1630    fn identity_key(&self) -> String {
1631        match self {
1632            Self::Document(_) => String::from("document"),
1633            Self::Catalog(model) => {
1634                format!("catalog:{}:{}", model.key.number, model.key.generation)
1635            }
1636            Self::Metadata(model) => {
1637                format!("metadata:{}:{}", model.key.number, model.key.generation)
1638            }
1639            Self::Page(model) => format!("page:{}:{}", model.key.number, model.key.generation),
1640            Self::Font(model) => format!(
1641                "font:{}:{}:{}",
1642                model.page_ordinal,
1643                model.key.map_or(0, |key| key.number.get()),
1644                String::from_utf8_lossy(model.name.as_bytes())
1645            ),
1646            Self::Annotation(model) => format!(
1647                "annotation:{}:{}:{}",
1648                model.page_ordinal,
1649                model.ordinal,
1650                model.key.map_or(0, |key| key.number.get())
1651            ),
1652            Self::OutputIntent(model) => format!(
1653                "outputIntent:{}:{}",
1654                model.ordinal,
1655                model.key.map_or(0, |key| key.number.get())
1656            ),
1657            Self::ContentStream(model) => format!(
1658                "contentStream:{}:{}:{}",
1659                model.page_ordinal, model.key.number, model.key.generation
1660            ),
1661            Self::Stream(model) => format!("stream:{}:{}", model.key.number, model.key.generation),
1662            Self::Generic(model) => format!(
1663                "{}:{}:{}",
1664                model.object_type.as_str(),
1665                model.ordinal,
1666                model.key.map_or(0, |key| key.number.get())
1667            ),
1668        }
1669    }
1670
1671    fn linked_objects(
1672        &self,
1673        graph: &ModelGraph<'a>,
1674        max_objects: usize,
1675    ) -> Result<Vec<ModelObjectRef<'a>>> {
1676        match self {
1677            Self::Document(model) => model.linked_objects(graph, max_objects),
1678            Self::Catalog(model) => model.linked_objects(graph, max_objects),
1679            Self::Metadata(model) => model.linked_objects(graph, max_objects),
1680            Self::Page(model) => model.linked_objects(graph, max_objects),
1681            Self::Font(model) => model.linked_objects(graph, max_objects),
1682            Self::Annotation(model) => model.linked_objects(graph, max_objects),
1683            Self::OutputIntent(model) => model.linked_objects(graph, max_objects),
1684            Self::ContentStream(model) => model.linked_objects(graph, max_objects),
1685            Self::Stream(model) => model.linked_objects(graph, max_objects),
1686            Self::Generic(model) => model.linked_objects(graph, max_objects),
1687        }
1688    }
1689}
1690
1691/// Document model wrapper.
1692#[derive(Debug)]
1693pub struct ModelGraph<'a> {
1694    document: &'a ParsedDocument,
1695    limits: &'a ResourceLimits,
1696    materialized_families: BTreeSet<ObjectTypeName>,
1697}
1698
1699#[derive(Clone, Copy, Debug)]
1700struct ResourceCollection<'a> {
1701    resources: &'a crate::Dictionary,
1702    resource_name: &'static str,
1703    family: &'static str,
1704    context_prefix: &'static str,
1705    page_ordinal: usize,
1706    max_objects: usize,
1707}
1708
1709impl<'a> ModelGraph<'a> {
1710    fn for_rules(document: &'a ParsedDocument, limits: &'a ResourceLimits, rules: &[Rule]) -> Self {
1711        let materialized_families = rules
1712            .iter()
1713            .filter(|rule| !matches!(rule.test, crate::RuleExpr::Unsupported { .. }))
1714            .map(|rule| rule.object_type.clone())
1715            .collect();
1716        Self {
1717            document,
1718            limits,
1719            materialized_families,
1720        }
1721    }
1722
1723    fn with_all_families(document: &'a ParsedDocument, limits: &'a ResourceLimits) -> Self {
1724        Self {
1725            document,
1726            limits,
1727            materialized_families: ModelRegistry::default_registry()
1728                .family_names()
1729                .cloned()
1730                .collect(),
1731        }
1732    }
1733
1734    fn materializes(&self, family: &str) -> bool {
1735        self.materialized_families
1736            .iter()
1737            .any(|materialized| materialized.as_str() == family)
1738    }
1739
1740    fn materializes_generic_roots(&self) -> bool {
1741        self.materialized_families.iter().any(|family| {
1742            !matches!(
1743                family.as_str(),
1744                "document"
1745                    | "catalog"
1746                    | "metadata"
1747                    | "page"
1748                    | "font"
1749                    | "annotation"
1750                    | "outputIntent"
1751                    | "contentStream"
1752                    | "stream"
1753                    | "object"
1754            )
1755        })
1756    }
1757
1758    fn catalog(&self) -> Option<CatalogModel<'a>> {
1759        self.document
1760            .catalog
1761            .and_then(|key| CatalogModel::new(self.document, key))
1762    }
1763
1764    fn metadata(&self, catalog: &CatalogModel<'_>) -> Option<MetadataModel<'a>> {
1765        MetadataModel::new(self.document, catalog.metadata)
1766    }
1767
1768    fn pages(&self, catalog: &CatalogModel<'_>, max_objects: usize) -> Result<Vec<PageModel<'a>>> {
1769        PageModel::from_catalog(self.document, catalog, self.limits, max_objects)
1770    }
1771
1772    fn fonts(&self, page: &PageModel<'_>, max_objects: usize) -> Result<Vec<FontModel<'a>>> {
1773        FontModel::from_page(self.document, page, max_objects)
1774    }
1775
1776    fn annotations(
1777        &self,
1778        page: &PageModel<'_>,
1779        max_objects: usize,
1780    ) -> Result<Vec<AnnotationModel<'a>>> {
1781        AnnotationModel::from_page(self.document, page, max_objects)
1782    }
1783
1784    fn output_intents(
1785        &self,
1786        catalog: &CatalogModel<'_>,
1787        max_objects: usize,
1788    ) -> Result<Vec<OutputIntentModel<'a>>> {
1789        OutputIntentModel::from_catalog(self.document, catalog, max_objects)
1790    }
1791
1792    fn content_streams(
1793        &self,
1794        page: &PageModel<'_>,
1795        max_objects: usize,
1796    ) -> Result<Vec<ContentStreamModel<'a>>> {
1797        ContentStreamModel::from_page(self.document, page, max_objects)
1798    }
1799
1800    fn push_streams(
1801        &self,
1802        objects: &mut Vec<ModelObjectRef<'a>>,
1803        max_objects: usize,
1804    ) -> Result<()> {
1805        for object in self.document.objects.values() {
1806            let Some(stream) = StreamModel::from_indirect_with_document(self.document, object)
1807            else {
1808                continue;
1809            };
1810            if Some(stream.key) != self.document.catalog {
1811                push_linked(objects, ModelObjectRef::Stream(stream), max_objects)?;
1812            }
1813        }
1814        Ok(())
1815    }
1816
1817    fn push_generic_roots(
1818        &self,
1819        objects: &mut Vec<ModelObjectRef<'a>>,
1820        max_objects: usize,
1821    ) -> Result<()> {
1822        for model in self.generic_models(max_objects)? {
1823            if !self.materialized_families.contains(&model.object_type) {
1824                continue;
1825            }
1826            push_linked(objects, ModelObjectRef::Generic(model), max_objects)?;
1827        }
1828        Ok(())
1829    }
1830
1831    fn generic_models(&self, max_objects: usize) -> Result<Vec<GenericModel<'a>>> {
1832        let mut models = Vec::new();
1833        if let Some(catalog) = self.catalog() {
1834            self.push_catalog_generic_models(&catalog, max_objects, &mut models)?;
1835            for page in self.pages(&catalog, max_objects)? {
1836                self.push_page_generic_models(&page, max_objects, &mut models)?;
1837            }
1838        }
1839        self.push_indirect_generic_models(max_objects, &mut models)?;
1840        Ok(models)
1841    }
1842
1843    fn push_catalog_generic_models(
1844        &self,
1845        catalog: &CatalogModel<'_>,
1846        max_objects: usize,
1847        models: &mut Vec<GenericModel<'a>>,
1848    ) -> Result<()> {
1849        let Some(catalog_object) = self.document.objects.get(&catalog.key) else {
1850            return Ok(());
1851        };
1852        let Some(dictionary) = catalog_object.object.as_dictionary() else {
1853            return Ok(());
1854        };
1855        for (family, key_name, context) in [
1856            ("acroForm", "AcroForm", "root/catalog[0]/acroForm[0]"),
1857            (
1858                "structureTreeRoot",
1859                "StructTreeRoot",
1860                "root/catalog[0]/structureTreeRoot[0]",
1861            ),
1862            (
1863                "optionalContentProperties",
1864                "OCProperties",
1865                "root/catalog[0]/optionalContentProperties[0]",
1866            ),
1867            ("names", "Names", "root/catalog[0]/names[0]"),
1868            ("outline", "Outlines", "root/catalog[0]/outline[0]"),
1869            ("permissions", "Perms", "root/catalog[0]/permissions[0]"),
1870        ] {
1871            if let Some((key, offset, dictionary)) =
1872                resolve_named_dictionary_from_option(self.document, dictionary.get(key_name))
1873            {
1874                push_generic_model(
1875                    models,
1876                    GenericModel::new(
1877                        self.document,
1878                        family,
1879                        key,
1880                        offset,
1881                        dictionary,
1882                        models.len(),
1883                        context,
1884                    ),
1885                    max_objects,
1886                )?;
1887            }
1888        }
1889        for (ordinal, value) in array_values(dictionary.get("Dests")).enumerate() {
1890            if let Some((key, offset, dictionary)) = resolve_named_dictionary(self.document, value)
1891            {
1892                push_generic_model(
1893                    models,
1894                    GenericModel::new(
1895                        self.document,
1896                        "destination",
1897                        key,
1898                        offset,
1899                        dictionary,
1900                        ordinal,
1901                        format!("root/catalog[0]/destination[{ordinal}]"),
1902                    ),
1903                    max_objects,
1904                )?;
1905            }
1906        }
1907        Ok(())
1908    }
1909
1910    fn push_page_generic_models(
1911        &self,
1912        page: &PageModel<'a>,
1913        max_objects: usize,
1914        models: &mut Vec<GenericModel<'a>>,
1915    ) -> Result<()> {
1916        if let Some(resources) =
1917            resolve_dictionary_value(self.document, page.dictionary.get("Resources"))
1918        {
1919            push_generic_model(
1920                models,
1921                GenericModel::new(
1922                    self.document,
1923                    "resource",
1924                    None,
1925                    None,
1926                    resources,
1927                    page.ordinal,
1928                    format!("root/page[{}]/resources[0]", page.ordinal),
1929                ),
1930                max_objects,
1931            )?;
1932            self.push_resource_collection(
1933                ResourceCollection {
1934                    resources,
1935                    resource_name: "XObject",
1936                    family: "xObject",
1937                    context_prefix: "root/page",
1938                    page_ordinal: page.ordinal,
1939                    max_objects,
1940                },
1941                models,
1942            )?;
1943            self.push_resource_collection(
1944                ResourceCollection {
1945                    resources,
1946                    resource_name: "ColorSpace",
1947                    family: "colorSpace",
1948                    context_prefix: "root/page",
1949                    page_ordinal: page.ordinal,
1950                    max_objects,
1951                },
1952                models,
1953            )?;
1954            self.push_resource_collection(
1955                ResourceCollection {
1956                    resources,
1957                    resource_name: "ExtGState",
1958                    family: "extGState",
1959                    context_prefix: "root/page",
1960                    page_ordinal: page.ordinal,
1961                    max_objects,
1962                },
1963                models,
1964            )?;
1965        }
1966        Ok(())
1967    }
1968
1969    fn push_resource_collection(
1970        &self,
1971        collection: ResourceCollection<'a>,
1972        models: &mut Vec<GenericModel<'a>>,
1973    ) -> Result<()> {
1974        let Some(crate::CosObject::Dictionary(resources)) =
1975            collection.resources.get(collection.resource_name)
1976        else {
1977            return Ok(());
1978        };
1979        for (ordinal, (name, value)) in resources.iter().enumerate() {
1980            if let Some((key, offset, dictionary)) = resolve_named_dictionary(self.document, value)
1981            {
1982                let object_family = if collection.family == "xObject" {
1983                    classify_xobject(dictionary).unwrap_or(collection.family)
1984                } else {
1985                    collection.family
1986                };
1987                push_generic_model(
1988                    models,
1989                    GenericModel::new(
1990                        self.document,
1991                        object_family,
1992                        key,
1993                        offset,
1994                        dictionary,
1995                        ordinal,
1996                        format!(
1997                            "{}[{}]/{}[{}]",
1998                            collection.context_prefix,
1999                            collection.page_ordinal,
2000                            collection.family,
2001                            String::from_utf8_lossy(name.as_bytes())
2002                        ),
2003                    ),
2004                    collection.max_objects,
2005                )?;
2006            }
2007        }
2008        Ok(())
2009    }
2010
2011    fn push_indirect_generic_models(
2012        &self,
2013        max_objects: usize,
2014        models: &mut Vec<GenericModel<'a>>,
2015    ) -> Result<()> {
2016        for object in self.document.objects.values() {
2017            let Some(dictionary) = object.object.as_dictionary() else {
2018                continue;
2019            };
2020            let Some(family) = classify_dictionary(dictionary) else {
2021                continue;
2022            };
2023            if matches!(
2024                family,
2025                "catalog" | "page" | "font" | "annotation" | "outputIntent" | "metadata"
2026            ) {
2027                continue;
2028            }
2029            push_generic_model(
2030                models,
2031                GenericModel::new(
2032                    self.document,
2033                    family,
2034                    Some(object.key),
2035                    Some(object.offset),
2036                    dictionary,
2037                    models.len(),
2038                    format!("root/{family}[{}]", object.key.number),
2039                ),
2040                max_objects,
2041            )?;
2042        }
2043        Ok(())
2044    }
2045}
2046
2047/// Document model wrapper.
2048#[derive(Clone, Debug)]
2049pub struct DocumentModel<'a> {
2050    document: &'a ParsedDocument,
2051    object_type: ObjectTypeName,
2052    supertypes: Vec<ObjectTypeName>,
2053    links: Vec<LinkName>,
2054}
2055
2056impl<'a> DocumentModel<'a> {
2057    /// Creates a document model wrapper.
2058    #[must_use]
2059    pub fn new(document: &'a ParsedDocument) -> Self {
2060        Self {
2061            document,
2062            object_type: ObjectTypeName::unchecked("document"),
2063            supertypes: Vec::new(),
2064            links: vec![LinkName(Identifier::unchecked("catalog"))],
2065        }
2066    }
2067}
2068
2069impl ModelObject for DocumentModel<'_> {
2070    fn id(&self) -> Option<ObjectIdentity> {
2071        Some(ObjectIdentity {
2072            key: String::from("document"),
2073        })
2074    }
2075
2076    fn object_type(&self) -> ObjectTypeName {
2077        self.object_type.clone()
2078    }
2079
2080    fn super_types(&self) -> &[ObjectTypeName] {
2081        &self.supertypes
2082    }
2083
2084    fn extra_context(&self) -> Option<&str> {
2085        Some("root")
2086    }
2087
2088    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2089        match name.as_str() {
2090            "headerOffset" => Ok(ModelValue::Number(u64_to_f64(header_offset(
2091                self.document,
2092            ))?)),
2093            "postEOFDataSize" => Ok(ModelValue::Number(u64_to_f64(post_eof_data_size(
2094                self.document,
2095            ))?)),
2096            "header" => Ok(ModelValue::String(BoundedText::new(
2097                format!(
2098                    "%PDF-{}.{}",
2099                    self.document.version.major, self.document.version.minor
2100                ),
2101                32,
2102            )?)),
2103            "encrypted" | "isEncrypted" => Ok(ModelValue::Bool(self.document.is_encrypted())),
2104            "hasCatalog" => Ok(ModelValue::Bool(self.document.catalog.is_some())),
2105            "containsXRefStream" => Ok(ModelValue::Bool(contains_xref_stream(self.document))),
2106            "nrIndirects" => Ok(ModelValue::Number(usize_to_f64(
2107                self.document.objects.len(),
2108            )?)),
2109            "containsPDFUAIdentification" => Ok(ModelValue::Bool(contains_xmp_family(
2110                self.document,
2111                "pdfua",
2112            ))),
2113            "containsPDFAIdentification" => {
2114                Ok(ModelValue::Bool(contains_xmp_family(self.document, "pdfa")))
2115            }
2116            "part" => Ok(ModelValue::Number(0.0)),
2117            "partPrefix" | "rev" | "revPrefix" => Ok(ModelValue::Null),
2118            _ => Err(crate::ProfileError::UnknownProperty {
2119                property: BoundedText::unchecked(name.as_str()),
2120            }
2121            .into()),
2122        }
2123    }
2124
2125    fn links(&self) -> &[LinkName] {
2126        &self.links
2127    }
2128
2129    fn linked_objects<'a>(
2130        &self,
2131        graph: &ModelGraph<'a>,
2132        max_objects: usize,
2133    ) -> Result<Vec<ModelObjectRef<'a>>> {
2134        let mut objects = Vec::new();
2135        if let Some(catalog) = graph.catalog() {
2136            push_linked(&mut objects, ModelObjectRef::Catalog(catalog), max_objects)?;
2137        }
2138        if graph.materializes("stream") {
2139            graph.push_streams(&mut objects, max_objects)?;
2140        }
2141        if graph.materializes_generic_roots() {
2142            graph.push_generic_roots(&mut objects, max_objects)?;
2143        }
2144        Ok(objects)
2145    }
2146}
2147
2148/// Catalog model wrapper.
2149#[derive(Clone, Debug)]
2150pub struct CatalogModel<'a> {
2151    document: &'a ParsedDocument,
2152    key: ObjectKey,
2153    offset: u64,
2154    metadata: Option<ObjectKey>,
2155    pages: Option<ObjectKey>,
2156    object_type: ObjectTypeName,
2157    supertypes: Vec<ObjectTypeName>,
2158    links: Vec<LinkName>,
2159}
2160
2161impl<'a> CatalogModel<'a> {
2162    fn new(document: &'a ParsedDocument, key: ObjectKey) -> Option<Self> {
2163        let object = document.objects.get(&key)?;
2164        let dictionary = object.object.as_dictionary()?;
2165        let metadata = match dictionary.get("Metadata") {
2166            Some(crate::CosObject::Reference(key)) => Some(*key),
2167            _ => None,
2168        };
2169        let pages = match dictionary.get("Pages") {
2170            Some(crate::CosObject::Reference(key)) => Some(*key),
2171            _ => None,
2172        };
2173        Some(Self {
2174            document,
2175            key,
2176            offset: object.offset,
2177            metadata,
2178            pages,
2179            object_type: ObjectTypeName::unchecked("catalog"),
2180            supertypes: vec![ObjectTypeName::unchecked("object")],
2181            links: vec![LinkName(Identifier::unchecked("metadata"))],
2182        })
2183    }
2184}
2185
2186impl ModelObject for CatalogModel<'_> {
2187    fn id(&self) -> Option<ObjectIdentity> {
2188        Some(ObjectIdentity {
2189            key: format!("catalog:{}:{}", self.key.number, self.key.generation),
2190        })
2191    }
2192
2193    fn object_type(&self) -> ObjectTypeName {
2194        self.object_type.clone()
2195    }
2196
2197    fn super_types(&self) -> &[ObjectTypeName] {
2198        &self.supertypes
2199    }
2200
2201    fn extra_context(&self) -> Option<&str> {
2202        Some("catalog")
2203    }
2204
2205    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2206        match name.as_str() {
2207            "hasMetadata" => Ok(ModelValue::Bool(self.metadata.is_some())),
2208            "hasAcroForm" | "containsAcroForm" => Ok(ModelValue::Bool(
2209                self.document
2210                    .objects
2211                    .get(&self.key)
2212                    .and_then(|object| object.object.as_dictionary())
2213                    .and_then(|dictionary| dictionary.get("AcroForm"))
2214                    .is_some(),
2215            )),
2216            "hasStructTreeRoot" | "containsStructTreeRoot" => Ok(ModelValue::Bool(
2217                self.document
2218                    .objects
2219                    .get(&self.key)
2220                    .and_then(|object| object.object.as_dictionary())
2221                    .and_then(|dictionary| dictionary.get("StructTreeRoot"))
2222                    .is_some(),
2223            )),
2224            "hasOCProperties" | "containsOCProperties" => Ok(ModelValue::Bool(
2225                self.document
2226                    .objects
2227                    .get(&self.key)
2228                    .and_then(|object| object.object.as_dictionary())
2229                    .and_then(|dictionary| dictionary.get("OCProperties"))
2230                    .is_some(),
2231            )),
2232            "hasLang" => Ok(ModelValue::Bool(
2233                self.document
2234                    .objects
2235                    .get(&self.key)
2236                    .and_then(|object| object.object.as_dictionary())
2237                    .and_then(|dictionary| dictionary.get("Lang"))
2238                    .is_some(),
2239            )),
2240            "hasOutlines" => Ok(ModelValue::Bool(
2241                self.document
2242                    .objects
2243                    .get(&self.key)
2244                    .and_then(|object| object.object.as_dictionary())
2245                    .and_then(|dictionary| dictionary.get("Outlines"))
2246                    .is_some(),
2247            )),
2248            "hasNames" => Ok(ModelValue::Bool(
2249                self.document
2250                    .objects
2251                    .get(&self.key)
2252                    .and_then(|object| object.object.as_dictionary())
2253                    .and_then(|dictionary| dictionary.get("Names"))
2254                    .is_some(),
2255            )),
2256            "hasDests" => Ok(ModelValue::Bool(
2257                self.document
2258                    .objects
2259                    .get(&self.key)
2260                    .and_then(|object| object.object.as_dictionary())
2261                    .and_then(|dictionary| dictionary.get("Dests"))
2262                    .is_some(),
2263            )),
2264            "Marked" => Ok(ModelValue::Bool(false)),
2265            _ => self
2266                .document
2267                .objects
2268                .get(&self.key)
2269                .and_then(|object| object.object.as_dictionary())
2270                .map_or_else(
2271                    || unknown_property(name),
2272                    |dictionary| dictionary_property(dictionary, name, CATALOG_DIRECT_PROPERTIES),
2273                ),
2274        }
2275    }
2276
2277    fn links(&self) -> &[LinkName] {
2278        &self.links
2279    }
2280
2281    fn linked_objects<'a>(
2282        &self,
2283        graph: &ModelGraph<'a>,
2284        max_objects: usize,
2285    ) -> Result<Vec<ModelObjectRef<'a>>> {
2286        let mut objects = graph
2287            .metadata(self)
2288            .map(ModelObjectRef::Metadata)
2289            .into_iter()
2290            .collect::<Vec<_>>();
2291        if objects.len() > max_objects {
2292            return Err(ValidationError::LimitExceeded {
2293                limit: "max_objects",
2294            }
2295            .into());
2296        }
2297        let mut output_intents =
2298            graph.output_intents(self, max_objects.saturating_sub(objects.len()))?;
2299        output_intents.reverse();
2300        for output_intent in output_intents {
2301            push_linked(
2302                &mut objects,
2303                ModelObjectRef::OutputIntent(output_intent),
2304                max_objects,
2305            )?;
2306        }
2307        let mut pages = graph.pages(self, max_objects.saturating_sub(objects.len()))?;
2308        pages.reverse();
2309        for page in pages {
2310            push_linked(&mut objects, ModelObjectRef::Page(page), max_objects)?;
2311        }
2312        Ok(objects)
2313    }
2314}
2315
2316/// Metadata stream model wrapper.
2317#[derive(Clone, Debug)]
2318pub struct MetadataModel<'a> {
2319    document: &'a ParsedDocument,
2320    key: ObjectKey,
2321    offset: u64,
2322    object_type: ObjectTypeName,
2323    supertypes: Vec<ObjectTypeName>,
2324    links: Vec<LinkName>,
2325}
2326
2327impl<'a> MetadataModel<'a> {
2328    fn new(document: &'a ParsedDocument, key: Option<ObjectKey>) -> Option<Self> {
2329        let key = key?;
2330        let object = document.objects.get(&key)?;
2331        if !matches!(object.object, crate::CosObject::Stream(_)) {
2332            return None;
2333        }
2334        Some(Self {
2335            document,
2336            key,
2337            offset: object.offset,
2338            object_type: ObjectTypeName::unchecked("metadata"),
2339            supertypes: vec![
2340                ObjectTypeName::unchecked("stream"),
2341                ObjectTypeName::unchecked("object"),
2342            ],
2343            links: Vec::new(),
2344        })
2345    }
2346}
2347
2348impl ModelObject for MetadataModel<'_> {
2349    fn id(&self) -> Option<ObjectIdentity> {
2350        Some(ObjectIdentity {
2351            key: format!("metadata:{}:{}", self.key.number, self.key.generation),
2352        })
2353    }
2354
2355    fn object_type(&self) -> ObjectTypeName {
2356        self.object_type.clone()
2357    }
2358
2359    fn super_types(&self) -> &[ObjectTypeName] {
2360        &self.supertypes
2361    }
2362
2363    fn extra_context(&self) -> Option<&str> {
2364        Some("metadata")
2365    }
2366
2367    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2368        match name.as_str() {
2369            "present" | "catalogMetadata" => Ok(ModelValue::Bool(true)),
2370            "containsPDFAIdentification" => {
2371                Ok(ModelValue::Bool(contains_xmp_family(self.document, "pdfa")))
2372            }
2373            "containsPDFUAIdentification" => Ok(ModelValue::Bool(contains_xmp_family(
2374                self.document,
2375                "pdfua",
2376            ))),
2377            "part" => Ok(ModelValue::Number(xmp_part(self.document).unwrap_or(0.0))),
2378            "partPrefix" => Ok(ModelValue::String(BoundedText::unchecked(
2379                xmp_prefix_for_claim(self.document).unwrap_or("pdfaid"),
2380            ))),
2381            "conformance" => Ok(
2382                xmp_conformance(self.document).map_or(ModelValue::Null, |value| {
2383                    ModelValue::String(BoundedText::unchecked(value))
2384                }),
2385            ),
2386            "conformancePrefix" | "revPrefix" | "amdPrefix" | "corrPrefix" => {
2387                Ok(ModelValue::String(BoundedText::unchecked("pdfaid")))
2388            }
2389            "rev" => Ok(ModelValue::Null),
2390            "declarations" => Ok(ModelValue::List(xmp_declarations(self.document))),
2391            _ => self.document.objects.get(&self.key).map_or_else(
2392                || unknown_property(name),
2393                |object| match &object.object {
2394                    crate::CosObject::Stream(stream) => {
2395                        dictionary_property(&stream.dictionary, name, METADATA_DIRECT_PROPERTIES)
2396                    }
2397                    _ => unknown_property(name),
2398                },
2399            ),
2400        }
2401    }
2402
2403    fn links(&self) -> &[LinkName] {
2404        &self.links
2405    }
2406
2407    fn linked_objects<'a>(
2408        &self,
2409        _graph: &ModelGraph<'a>,
2410        _max_objects: usize,
2411    ) -> Result<Vec<ModelObjectRef<'a>>> {
2412        Ok(Vec::new())
2413    }
2414}
2415
2416/// Page dictionary model wrapper.
2417#[derive(Clone, Debug)]
2418pub struct PageModel<'a> {
2419    document: &'a ParsedDocument,
2420    key: ObjectKey,
2421    offset: u64,
2422    ordinal: usize,
2423    dictionary: &'a crate::Dictionary,
2424    object_type: ObjectTypeName,
2425    supertypes: Vec<ObjectTypeName>,
2426    links: Vec<LinkName>,
2427}
2428
2429impl<'a> PageModel<'a> {
2430    fn from_catalog(
2431        document: &'a ParsedDocument,
2432        catalog: &CatalogModel<'_>,
2433        limits: &ResourceLimits,
2434        max_objects: usize,
2435    ) -> Result<Vec<Self>> {
2436        let Some(pages_root) = catalog.pages else {
2437            return Ok(Vec::new());
2438        };
2439        let mut stack = vec![pages_root];
2440        let mut pages = Vec::new();
2441        let mut visited = HashSet::new();
2442        while let Some(key) = stack.pop() {
2443            if !visited.insert(key) {
2444                continue;
2445            }
2446            let Some(object) = document.objects.get(&key) else {
2447                continue;
2448            };
2449            let Some(dictionary) = object.object.as_dictionary() else {
2450                continue;
2451            };
2452            match dictionary.get("Type") {
2453                Some(crate::CosObject::Name(name)) if name.matches("Page") => {
2454                    if pages.len() >= max_objects {
2455                        return Err(ValidationError::LimitExceeded {
2456                            limit: "max_objects",
2457                        }
2458                        .into());
2459                    }
2460                    pages.push(Self {
2461                        document,
2462                        key,
2463                        offset: object.offset,
2464                        ordinal: pages.len(),
2465                        dictionary,
2466                        object_type: ObjectTypeName::unchecked("page"),
2467                        supertypes: vec![ObjectTypeName::unchecked("object")],
2468                        links: vec![
2469                            LinkName(Identifier::unchecked("fonts")),
2470                            LinkName(Identifier::unchecked("annotations")),
2471                            LinkName(Identifier::unchecked("contentStreams")),
2472                        ],
2473                    });
2474                }
2475                _ => {
2476                    for kid in object_refs_from_array(dictionary.get("Kids"))
2477                        .into_iter()
2478                        .rev()
2479                    {
2480                        stack.push(kid);
2481                    }
2482                }
2483            }
2484            if u64::try_from(visited.len()).map_err(|_| ValidationError::LimitExceeded {
2485                limit: "max_objects",
2486            })? > limits.max_objects
2487            {
2488                return Err(ValidationError::LimitExceeded {
2489                    limit: "max_objects",
2490                }
2491                .into());
2492            }
2493        }
2494        Ok(pages)
2495    }
2496}
2497
2498impl ModelObject for PageModel<'_> {
2499    fn id(&self) -> Option<ObjectIdentity> {
2500        Some(ObjectIdentity {
2501            key: format!("page:{}:{}", self.key.number, self.key.generation),
2502        })
2503    }
2504
2505    fn object_type(&self) -> ObjectTypeName {
2506        self.object_type.clone()
2507    }
2508
2509    fn super_types(&self) -> &[ObjectTypeName] {
2510        &self.supertypes
2511    }
2512
2513    fn extra_context(&self) -> Option<&str> {
2514        Some("page")
2515    }
2516
2517    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2518        match name.as_str() {
2519            "hasContents" => Ok(ModelValue::Bool(self.dictionary.get("Contents").is_some())),
2520            "hasResources" => Ok(ModelValue::Bool(self.dictionary.get("Resources").is_some())),
2521            "annotationCount" => Ok(ModelValue::Number(usize_to_f64(
2522                object_refs_or_direct_count(self.dictionary.get("Annots")),
2523            )?)),
2524            _ => dictionary_property(self.dictionary, name, PAGE_DIRECT_PROPERTIES),
2525        }
2526    }
2527
2528    fn links(&self) -> &[LinkName] {
2529        &self.links
2530    }
2531
2532    fn linked_objects<'a>(
2533        &self,
2534        graph: &ModelGraph<'a>,
2535        max_objects: usize,
2536    ) -> Result<Vec<ModelObjectRef<'a>>> {
2537        let mut objects = Vec::new();
2538        let mut content_streams = graph.content_streams(self, max_objects)?;
2539        content_streams.reverse();
2540        for content_stream in content_streams {
2541            push_linked(
2542                &mut objects,
2543                ModelObjectRef::ContentStream(content_stream),
2544                max_objects,
2545            )?;
2546        }
2547        let mut annotations = graph.annotations(self, max_objects.saturating_sub(objects.len()))?;
2548        annotations.reverse();
2549        for annotation in annotations {
2550            push_linked(
2551                &mut objects,
2552                ModelObjectRef::Annotation(annotation),
2553                max_objects,
2554            )?;
2555        }
2556        let mut fonts = graph.fonts(self, max_objects.saturating_sub(objects.len()))?;
2557        fonts.reverse();
2558        for font in fonts {
2559            push_linked(&mut objects, ModelObjectRef::Font(font), max_objects)?;
2560        }
2561        Ok(objects)
2562    }
2563}
2564
2565/// Font dictionary model wrapper.
2566#[derive(Clone, Debug)]
2567pub struct FontModel<'a> {
2568    document: &'a ParsedDocument,
2569    page_ordinal: usize,
2570    key: Option<ObjectKey>,
2571    offset: Option<u64>,
2572    name: PdfName,
2573    dictionary: &'a crate::Dictionary,
2574    object_type: ObjectTypeName,
2575    supertypes: Vec<ObjectTypeName>,
2576    links: Vec<LinkName>,
2577}
2578
2579impl<'a> FontModel<'a> {
2580    fn from_page(
2581        document: &'a ParsedDocument,
2582        page: &PageModel<'_>,
2583        max_objects: usize,
2584    ) -> Result<Vec<Self>> {
2585        let mut fonts = Vec::new();
2586        let Some(page_dictionary) = page_dictionary(document, page.key) else {
2587            return Ok(fonts);
2588        };
2589        let Some(resources) = resolve_dictionary_value(document, page_dictionary.get("Resources"))
2590        else {
2591            return Ok(fonts);
2592        };
2593        let Some(crate::CosObject::Dictionary(fonts_dictionary)) = resources.get("Font") else {
2594            return Ok(fonts);
2595        };
2596        for (name, value) in fonts_dictionary.iter() {
2597            if let Some((key, offset, dictionary)) = resolve_named_dictionary(document, value) {
2598                if fonts.len() >= max_objects {
2599                    return Err(ValidationError::LimitExceeded {
2600                        limit: "max_objects",
2601                    }
2602                    .into());
2603                }
2604                fonts.push(Self {
2605                    document,
2606                    page_ordinal: page.ordinal,
2607                    key,
2608                    offset,
2609                    name: name.clone(),
2610                    dictionary,
2611                    object_type: ObjectTypeName::unchecked("font"),
2612                    supertypes: vec![ObjectTypeName::unchecked("object")],
2613                    links: Vec::new(),
2614                });
2615            }
2616        }
2617        Ok(fonts)
2618    }
2619}
2620
2621impl ModelObject for FontModel<'_> {
2622    fn id(&self) -> Option<ObjectIdentity> {
2623        Some(ObjectIdentity {
2624            key: format!(
2625                "font:{}:{}",
2626                self.page_ordinal,
2627                String::from_utf8_lossy(self.name.as_bytes())
2628            ),
2629        })
2630    }
2631
2632    fn object_type(&self) -> ObjectTypeName {
2633        self.object_type.clone()
2634    }
2635
2636    fn super_types(&self) -> &[ObjectTypeName] {
2637        &self.supertypes
2638    }
2639
2640    fn extra_context(&self) -> Option<&str> {
2641        Some("font")
2642    }
2643
2644    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2645        match name.as_str() {
2646            "embedded" => Ok(ModelValue::Bool(
2647                self.dictionary.get("FontDescriptor").is_some(),
2648            )),
2649            "hasSubtype" => Ok(ModelValue::Bool(self.dictionary.get("Subtype").is_some())),
2650            _ => dictionary_property(self.dictionary, name, FONT_DIRECT_PROPERTIES),
2651        }
2652    }
2653
2654    fn links(&self) -> &[LinkName] {
2655        &self.links
2656    }
2657
2658    fn linked_objects<'a>(
2659        &self,
2660        _graph: &ModelGraph<'a>,
2661        _max_objects: usize,
2662    ) -> Result<Vec<ModelObjectRef<'a>>> {
2663        Ok(Vec::new())
2664    }
2665}
2666
2667/// Annotation dictionary model wrapper.
2668#[derive(Clone, Debug)]
2669pub struct AnnotationModel<'a> {
2670    document: &'a ParsedDocument,
2671    page_ordinal: usize,
2672    ordinal: usize,
2673    key: Option<ObjectKey>,
2674    offset: Option<u64>,
2675    dictionary: &'a crate::Dictionary,
2676    object_type: ObjectTypeName,
2677    supertypes: Vec<ObjectTypeName>,
2678    links: Vec<LinkName>,
2679}
2680
2681impl<'a> AnnotationModel<'a> {
2682    fn from_page(
2683        document: &'a ParsedDocument,
2684        page: &PageModel<'_>,
2685        max_objects: usize,
2686    ) -> Result<Vec<Self>> {
2687        let mut annotations = Vec::new();
2688        let Some(page_dictionary) = page_dictionary(document, page.key) else {
2689            return Ok(annotations);
2690        };
2691        for (ordinal, value) in array_values(page_dictionary.get("Annots")).enumerate() {
2692            if let Some((key, offset, dictionary)) = resolve_named_dictionary(document, value) {
2693                if annotations.len() >= max_objects {
2694                    return Err(ValidationError::LimitExceeded {
2695                        limit: "max_objects",
2696                    }
2697                    .into());
2698                }
2699                annotations.push(Self {
2700                    document,
2701                    page_ordinal: page.ordinal,
2702                    ordinal,
2703                    key,
2704                    offset,
2705                    dictionary,
2706                    object_type: ObjectTypeName::unchecked("annotation"),
2707                    supertypes: vec![ObjectTypeName::unchecked("object")],
2708                    links: Vec::new(),
2709                });
2710            }
2711        }
2712        Ok(annotations)
2713    }
2714}
2715
2716impl ModelObject for AnnotationModel<'_> {
2717    fn id(&self) -> Option<ObjectIdentity> {
2718        Some(ObjectIdentity {
2719            key: format!("annotation:{}:{}", self.page_ordinal, self.ordinal),
2720        })
2721    }
2722
2723    fn object_type(&self) -> ObjectTypeName {
2724        self.object_type.clone()
2725    }
2726
2727    fn super_types(&self) -> &[ObjectTypeName] {
2728        &self.supertypes
2729    }
2730
2731    fn extra_context(&self) -> Option<&str> {
2732        Some("annotation")
2733    }
2734
2735    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2736        match name.as_str() {
2737            "hasSubtype" => Ok(ModelValue::Bool(self.dictionary.get("Subtype").is_some())),
2738            _ => dictionary_property(self.dictionary, name, ANNOTATION_DIRECT_PROPERTIES),
2739        }
2740    }
2741
2742    fn links(&self) -> &[LinkName] {
2743        &self.links
2744    }
2745
2746    fn linked_objects<'a>(
2747        &self,
2748        _graph: &ModelGraph<'a>,
2749        _max_objects: usize,
2750    ) -> Result<Vec<ModelObjectRef<'a>>> {
2751        Ok(Vec::new())
2752    }
2753}
2754
2755/// Output intent dictionary model wrapper.
2756#[derive(Clone, Debug)]
2757pub struct OutputIntentModel<'a> {
2758    document: &'a ParsedDocument,
2759    ordinal: usize,
2760    key: Option<ObjectKey>,
2761    offset: Option<u64>,
2762    dictionary: &'a crate::Dictionary,
2763    object_type: ObjectTypeName,
2764    supertypes: Vec<ObjectTypeName>,
2765    links: Vec<LinkName>,
2766}
2767
2768impl<'a> OutputIntentModel<'a> {
2769    fn from_catalog(
2770        document: &'a ParsedDocument,
2771        catalog: &CatalogModel<'_>,
2772        max_objects: usize,
2773    ) -> Result<Vec<Self>> {
2774        let Some(catalog_object) = document.objects.get(&catalog.key) else {
2775            return Ok(Vec::new());
2776        };
2777        let Some(catalog_dictionary) = catalog_object.object.as_dictionary() else {
2778            return Ok(Vec::new());
2779        };
2780        let mut output_intents = Vec::new();
2781        for (ordinal, value) in array_values(catalog_dictionary.get("OutputIntents")).enumerate() {
2782            if let Some((key, offset, dictionary)) = resolve_named_dictionary(document, value) {
2783                if output_intents.len() >= max_objects {
2784                    return Err(ValidationError::LimitExceeded {
2785                        limit: "max_objects",
2786                    }
2787                    .into());
2788                }
2789                output_intents.push(Self {
2790                    document,
2791                    ordinal,
2792                    key,
2793                    offset,
2794                    dictionary,
2795                    object_type: ObjectTypeName::unchecked("outputIntent"),
2796                    supertypes: vec![ObjectTypeName::unchecked("object")],
2797                    links: Vec::new(),
2798                });
2799            }
2800        }
2801        Ok(output_intents)
2802    }
2803}
2804
2805impl ModelObject for OutputIntentModel<'_> {
2806    fn id(&self) -> Option<ObjectIdentity> {
2807        Some(ObjectIdentity {
2808            key: format!("outputIntent:{}", self.ordinal),
2809        })
2810    }
2811
2812    fn object_type(&self) -> ObjectTypeName {
2813        self.object_type.clone()
2814    }
2815
2816    fn super_types(&self) -> &[ObjectTypeName] {
2817        &self.supertypes
2818    }
2819
2820    fn extra_context(&self) -> Option<&str> {
2821        Some("outputIntent")
2822    }
2823
2824    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2825        match name.as_str() {
2826            "hasDestOutputProfile" => Ok(ModelValue::Bool(
2827                self.dictionary.get("DestOutputProfile").is_some(),
2828            )),
2829            _ => dictionary_property(self.dictionary, name, OUTPUT_INTENT_DIRECT_PROPERTIES),
2830        }
2831    }
2832
2833    fn links(&self) -> &[LinkName] {
2834        &self.links
2835    }
2836
2837    fn linked_objects<'a>(
2838        &self,
2839        _graph: &ModelGraph<'a>,
2840        _max_objects: usize,
2841    ) -> Result<Vec<ModelObjectRef<'a>>> {
2842        Ok(Vec::new())
2843    }
2844}
2845
2846/// Page content stream model wrapper.
2847#[derive(Clone, Debug)]
2848pub struct ContentStreamModel<'a> {
2849    document: &'a ParsedDocument,
2850    page_ordinal: usize,
2851    ordinal: usize,
2852    key: ObjectKey,
2853    offset: u64,
2854    stream: &'a crate::StreamObject,
2855    object_type: ObjectTypeName,
2856    supertypes: Vec<ObjectTypeName>,
2857    links: Vec<LinkName>,
2858}
2859
2860impl<'a> ContentStreamModel<'a> {
2861    fn from_page(
2862        document: &'a ParsedDocument,
2863        page: &PageModel<'_>,
2864        max_objects: usize,
2865    ) -> Result<Vec<Self>> {
2866        let mut streams = Vec::new();
2867        let Some(page_dictionary) = page_dictionary(document, page.key) else {
2868            return Ok(streams);
2869        };
2870        push_content_streams_from_value(
2871            document,
2872            page,
2873            page_dictionary.get("Contents"),
2874            max_objects,
2875            &mut streams,
2876        )?;
2877        Ok(streams)
2878    }
2879}
2880
2881impl ModelObject for ContentStreamModel<'_> {
2882    fn id(&self) -> Option<ObjectIdentity> {
2883        Some(ObjectIdentity {
2884            key: format!(
2885                "contentStream:{}:{}:{}",
2886                self.page_ordinal, self.key.number, self.key.generation
2887            ),
2888        })
2889    }
2890
2891    fn object_type(&self) -> ObjectTypeName {
2892        self.object_type.clone()
2893    }
2894
2895    fn super_types(&self) -> &[ObjectTypeName] {
2896        &self.supertypes
2897    }
2898
2899    fn extra_context(&self) -> Option<&str> {
2900        Some("contentStream")
2901    }
2902
2903    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
2904        match name.as_str() {
2905            "lengthMatches" => {
2906                Ok(ModelValue::Bool(self.stream.declared_length.is_none_or(
2907                    |declared| declared == self.stream.discovered_length,
2908                )))
2909            }
2910            "declaredLength" => Ok(ModelValue::Number(u64_to_f64(
2911                self.stream
2912                    .declared_length
2913                    .unwrap_or(self.stream.discovered_length),
2914            )?)),
2915            "discoveredLength" => Ok(ModelValue::Number(u64_to_f64(
2916                self.stream.discovered_length,
2917            )?)),
2918            _ => dictionary_property(&self.stream.dictionary, name, STREAM_DIRECT_PROPERTIES),
2919        }
2920    }
2921
2922    fn links(&self) -> &[LinkName] {
2923        &self.links
2924    }
2925
2926    fn linked_objects<'a>(
2927        &self,
2928        _graph: &ModelGraph<'a>,
2929        _max_objects: usize,
2930    ) -> Result<Vec<ModelObjectRef<'a>>> {
2931        Ok(Vec::new())
2932    }
2933}
2934
2935fn resolve_dictionary_value<'a>(
2936    document: &'a ParsedDocument,
2937    value: Option<&'a crate::CosObject>,
2938) -> Option<&'a crate::Dictionary> {
2939    match value {
2940        Some(crate::CosObject::Dictionary(dictionary)) => Some(dictionary),
2941        Some(crate::CosObject::Reference(key)) => document.objects.get(key)?.object.as_dictionary(),
2942        _ => None,
2943    }
2944}
2945
2946fn page_dictionary(document: &ParsedDocument, key: ObjectKey) -> Option<&crate::Dictionary> {
2947    document.objects.get(&key)?.object.as_dictionary()
2948}
2949
2950fn resolve_named_dictionary<'a>(
2951    document: &'a ParsedDocument,
2952    value: &'a crate::CosObject,
2953) -> Option<(Option<ObjectKey>, Option<u64>, &'a crate::Dictionary)> {
2954    match value {
2955        crate::CosObject::Dictionary(dictionary) => Some((None, None, dictionary)),
2956        crate::CosObject::Reference(key) => {
2957            let object = document.objects.get(key)?;
2958            let dictionary = object.object.as_dictionary()?;
2959            Some((Some(*key), Some(object.offset), dictionary))
2960        }
2961        _ => None,
2962    }
2963}
2964
2965fn object_refs_from_array(value: Option<&crate::CosObject>) -> Vec<ObjectKey> {
2966    match value {
2967        Some(crate::CosObject::Array(values)) => values
2968            .iter()
2969            .filter_map(|value| match value {
2970                crate::CosObject::Reference(key) => Some(*key),
2971                _ => None,
2972            })
2973            .collect(),
2974        _ => Vec::new(),
2975    }
2976}
2977
2978fn push_content_streams_from_value<'a>(
2979    document: &'a ParsedDocument,
2980    page: &PageModel<'_>,
2981    value: Option<&crate::CosObject>,
2982    max_objects: usize,
2983    streams: &mut Vec<ContentStreamModel<'a>>,
2984) -> Result<()> {
2985    match value {
2986        Some(crate::CosObject::Reference(key)) => {
2987            push_content_stream(document, page, *key, 0, max_objects, streams)?;
2988        }
2989        Some(crate::CosObject::Array(values)) => {
2990            let mut ordinal = 0_usize;
2991            for value in values {
2992                let crate::CosObject::Reference(key) = value else {
2993                    continue;
2994                };
2995                push_content_stream(document, page, *key, ordinal, max_objects, streams)?;
2996                ordinal = ordinal
2997                    .checked_add(1)
2998                    .ok_or(ValidationError::LimitExceeded {
2999                        limit: "max_objects",
3000                    })?;
3001            }
3002        }
3003        Some(_) | None => {}
3004    }
3005    Ok(())
3006}
3007
3008fn push_content_stream<'a>(
3009    document: &'a ParsedDocument,
3010    page: &PageModel<'_>,
3011    key: ObjectKey,
3012    ordinal: usize,
3013    max_objects: usize,
3014    streams: &mut Vec<ContentStreamModel<'a>>,
3015) -> Result<()> {
3016    let Some(object) = document.objects.get(&key) else {
3017        return Ok(());
3018    };
3019    let crate::CosObject::Stream(stream) = &object.object else {
3020        return Ok(());
3021    };
3022    if streams.len() >= max_objects {
3023        return Err(ValidationError::LimitExceeded {
3024            limit: "max_objects",
3025        }
3026        .into());
3027    }
3028    streams.push(ContentStreamModel {
3029        document,
3030        page_ordinal: page.ordinal,
3031        ordinal,
3032        key,
3033        offset: object.offset,
3034        stream,
3035        object_type: ObjectTypeName::unchecked("contentStream"),
3036        supertypes: vec![
3037            ObjectTypeName::unchecked("stream"),
3038            ObjectTypeName::unchecked("object"),
3039        ],
3040        links: Vec::new(),
3041    });
3042    Ok(())
3043}
3044
3045fn object_refs_or_direct_count(value: Option<&crate::CosObject>) -> usize {
3046    match value {
3047        Some(crate::CosObject::Array(values)) => values.len(),
3048        Some(_) => 1,
3049        None => 0,
3050    }
3051}
3052
3053fn array_values(value: Option<&crate::CosObject>) -> impl Iterator<Item = &crate::CosObject> {
3054    value
3055        .and_then(|value| match value {
3056            crate::CosObject::Array(values) => Some(values.as_slice()),
3057            _ => None,
3058        })
3059        .into_iter()
3060        .flatten()
3061}
3062
3063fn dictionary_property(
3064    dictionary: &crate::Dictionary,
3065    name: &PropertyName,
3066    allowed_names: &[&str],
3067) -> Result<ModelValue> {
3068    if !allowed_names.contains(&name.as_str()) {
3069        return unknown_property(name);
3070    }
3071    Ok(dictionary
3072        .get(name.as_str())
3073        .cloned()
3074        .map_or(ModelValue::Null, ModelValue::from))
3075}
3076
3077fn unknown_property(name: &PropertyName) -> Result<ModelValue> {
3078    Err(crate::ProfileError::UnknownProperty {
3079        property: BoundedText::unchecked(name.as_str()),
3080    }
3081    .into())
3082}
3083
3084/// Generic dictionary-backed model wrapper.
3085#[derive(Clone, Debug)]
3086pub struct GenericModel<'a> {
3087    document: &'a ParsedDocument,
3088    key: Option<ObjectKey>,
3089    offset: Option<u64>,
3090    dictionary: &'a crate::Dictionary,
3091    object_type: ObjectTypeName,
3092    supertypes: Vec<ObjectTypeName>,
3093    links: Vec<LinkName>,
3094    allowed_properties: &'static [&'static str],
3095    context: String,
3096    ordinal: usize,
3097}
3098
3099impl<'a> GenericModel<'a> {
3100    fn new(
3101        document: &'a ParsedDocument,
3102        family: &'static str,
3103        key: Option<ObjectKey>,
3104        offset: Option<u64>,
3105        dictionary: &'a crate::Dictionary,
3106        ordinal: usize,
3107        context: impl Into<String>,
3108    ) -> Self {
3109        Self {
3110            document,
3111            key,
3112            offset,
3113            dictionary,
3114            object_type: ObjectTypeName::unchecked(family),
3115            supertypes: vec![ObjectTypeName::unchecked("object")],
3116            links: Vec::new(),
3117            allowed_properties: family_direct_properties(family),
3118            context: context.into(),
3119            ordinal,
3120        }
3121    }
3122}
3123
3124impl ModelObject for GenericModel<'_> {
3125    fn id(&self) -> Option<ObjectIdentity> {
3126        Some(ObjectIdentity {
3127            key: format!("{}:{}", self.object_type.as_str(), self.ordinal),
3128        })
3129    }
3130
3131    fn object_type(&self) -> ObjectTypeName {
3132        self.object_type.clone()
3133    }
3134
3135    fn super_types(&self) -> &[ObjectTypeName] {
3136        &self.supertypes
3137    }
3138
3139    fn extra_context(&self) -> Option<&str> {
3140        Some(&self.context)
3141    }
3142
3143    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
3144        match (self.object_type.as_str(), name.as_str()) {
3145            ("image", "width") => dictionary_property(
3146                self.dictionary,
3147                &PropertyName::unchecked("Width"),
3148                IMAGE_DIRECT_PROPERTIES,
3149            ),
3150            ("image", "height") => dictionary_property(
3151                self.dictionary,
3152                &PropertyName::unchecked("Height"),
3153                IMAGE_DIRECT_PROPERTIES,
3154            ),
3155            ("contentStream", "operatorCount" | "markedContentCount") => {
3156                Ok(ModelValue::Number(0.0))
3157            }
3158            _ => dictionary_property(self.dictionary, name, self.allowed_properties),
3159        }
3160    }
3161
3162    fn links(&self) -> &[LinkName] {
3163        &self.links
3164    }
3165
3166    fn linked_objects<'a>(
3167        &self,
3168        _graph: &ModelGraph<'a>,
3169        _max_objects: usize,
3170    ) -> Result<Vec<ModelObjectRef<'a>>> {
3171        Ok(Vec::new())
3172    }
3173}
3174
3175fn push_generic_model<'a>(
3176    models: &mut Vec<GenericModel<'a>>,
3177    model: GenericModel<'a>,
3178    max_objects: usize,
3179) -> Result<()> {
3180    if models.len() >= max_objects {
3181        return Err(ValidationError::LimitExceeded {
3182            limit: "max_objects",
3183        }
3184        .into());
3185    }
3186    models.push(model);
3187    Ok(())
3188}
3189
3190fn family_direct_properties(family: &str) -> &'static [&'static str] {
3191    match family {
3192        "resource" => RESOURCE_DIRECT_PROPERTIES,
3193        "names" => NAMES_DIRECT_PROPERTIES,
3194        "outline" => OUTLINES_DIRECT_PROPERTIES,
3195        "destination" => DESTINATION_DIRECT_PROPERTIES,
3196        "acroForm" => ACRO_FORM_DIRECT_PROPERTIES,
3197        "optionalContentProperties" => OPTIONAL_CONTENT_DIRECT_PROPERTIES,
3198        "permissions" => PERMISSIONS_PROPERTIES,
3199        "cMap" => CMAP_DIRECT_PROPERTIES,
3200        "image" => IMAGE_DIRECT_PROPERTIES,
3201        "xObject" => XOBJECT_DIRECT_PROPERTIES,
3202        "action" => ACTION_DIRECT_PROPERTIES,
3203        "formField" => FORM_FIELD_DIRECT_PROPERTIES,
3204        "colorSpace" => COLOR_SPACE_DIRECT_PROPERTIES,
3205        "extGState" => EXT_GSTATE_DIRECT_PROPERTIES,
3206        "structureTreeRoot" => STRUCTURE_DIRECT_PROPERTIES,
3207        "structureElement" => STRUCTURE_ELEMENT_PROPERTIES,
3208        "signature" => SIGNATURE_DIRECT_PROPERTIES,
3209        "security" => SECURITY_DIRECT_PROPERTIES,
3210        "pageTree" => PAGE_TREE_PROPERTIES,
3211        _ => DIRECT_PROPERTY_NAMES,
3212    }
3213}
3214
3215fn resolve_named_dictionary_from_option<'a>(
3216    document: &'a ParsedDocument,
3217    value: Option<&'a crate::CosObject>,
3218) -> Option<(Option<ObjectKey>, Option<u64>, &'a crate::Dictionary)> {
3219    match value {
3220        Some(value) => resolve_named_dictionary(document, value),
3221        None => None,
3222    }
3223}
3224
3225fn classify_xobject(dictionary: &crate::Dictionary) -> Option<&'static str> {
3226    match dictionary.get("Subtype") {
3227        Some(crate::CosObject::Name(name)) if name.matches("Image") => Some("image"),
3228        Some(crate::CosObject::Name(name)) if name.matches("Form") => Some("xObject"),
3229        _ => None,
3230    }
3231}
3232
3233fn classify_dictionary(dictionary: &crate::Dictionary) -> Option<&'static str> {
3234    if let Some(crate::CosObject::Name(name)) = dictionary.get("Subtype") {
3235        if name.matches("Image") {
3236            return Some("image");
3237        }
3238        if name.matches("Form") {
3239            return Some("xObject");
3240        }
3241        if name.matches("Widget") {
3242            return Some("formField");
3243        }
3244    }
3245    if let Some(crate::CosObject::Name(name)) = dictionary.get("Type") {
3246        if name.matches("Pages") {
3247            return Some("pageTree");
3248        }
3249        if name.matches("Action") {
3250            return Some("action");
3251        }
3252        if name.matches("StructTreeRoot") {
3253            return Some("structureTreeRoot");
3254        }
3255        if name.matches("StructElem") {
3256            return Some("structureElement");
3257        }
3258        if name.matches("Sig") {
3259            return Some("signature");
3260        }
3261        if name.matches("EmbeddedFile") {
3262            return Some("embeddedFontFile");
3263        }
3264        if name.matches("OCProperties") {
3265            return Some("optionalContentProperties");
3266        }
3267        if name.matches("XObject") {
3268            return classify_xobject(dictionary).or(Some("xObject"));
3269        }
3270        if name.matches("Font") {
3271            return Some("font");
3272        }
3273        if name.matches("Annot") {
3274            return Some("annotation");
3275        }
3276        if name.matches("Metadata") {
3277            return Some("metadata");
3278        }
3279        if name.matches("OutputIntent") {
3280            return Some("outputIntent");
3281        }
3282        if name.matches("Filespec") {
3283            return Some("destination");
3284        }
3285    }
3286    if dictionary.get("Fields").is_some() {
3287        return Some("acroForm");
3288    }
3289    if dictionary.get("Filter").is_some() && dictionary.get("V").is_some() {
3290        return Some("security");
3291    }
3292    if dictionary.get("CMapName").is_some() {
3293        return Some("cMap");
3294    }
3295    if dictionary.get("ByteRange").is_some() {
3296        return Some("signature");
3297    }
3298    None
3299}
3300
3301/// Stream model wrapper.
3302#[derive(Clone, Debug)]
3303pub struct StreamModel<'a> {
3304    document: &'a ParsedDocument,
3305    key: ObjectKey,
3306    offset: u64,
3307    stream: &'a crate::StreamObject,
3308    object_type: ObjectTypeName,
3309    supertypes: Vec<ObjectTypeName>,
3310    links: Vec<LinkName>,
3311}
3312
3313impl<'a> StreamModel<'a> {
3314    fn from_indirect_with_document(
3315        document: &'a ParsedDocument,
3316        object: &'a IndirectObject,
3317    ) -> Option<Self> {
3318        let crate::CosObject::Stream(stream) = &object.object else {
3319            return None;
3320        };
3321        Some(Self {
3322            document,
3323            key: object.key,
3324            offset: object.offset,
3325            stream,
3326            object_type: ObjectTypeName::unchecked("stream"),
3327            supertypes: vec![ObjectTypeName::unchecked("object")],
3328            links: Vec::new(),
3329        })
3330    }
3331}
3332
3333impl ModelObject for StreamModel<'_> {
3334    fn id(&self) -> Option<ObjectIdentity> {
3335        Some(ObjectIdentity {
3336            key: format!("stream:{}:{}", self.key.number, self.key.generation),
3337        })
3338    }
3339
3340    fn object_type(&self) -> ObjectTypeName {
3341        self.object_type.clone()
3342    }
3343
3344    fn super_types(&self) -> &[ObjectTypeName] {
3345        &self.supertypes
3346    }
3347
3348    fn extra_context(&self) -> Option<&str> {
3349        Some("stream")
3350    }
3351
3352    fn property(&self, name: &PropertyName) -> Result<ModelValue> {
3353        match name.as_str() {
3354            "lengthMatches" => {
3355                Ok(ModelValue::Bool(self.stream.declared_length.is_none_or(
3356                    |declared| declared == self.stream.discovered_length,
3357                )))
3358            }
3359            "declaredLength" => Ok(ModelValue::Number(u64_to_f64(
3360                self.stream
3361                    .declared_length
3362                    .unwrap_or(self.stream.discovered_length),
3363            )?)),
3364            "discoveredLength" => Ok(ModelValue::Number(u64_to_f64(
3365                self.stream.discovered_length,
3366            )?)),
3367            "streamKeywordCRLFCompliant" => {
3368                Ok(ModelValue::Bool(self.stream.stream_keyword_crlf_compliant))
3369            }
3370            "endstreamKeywordEOLCompliant" => Ok(ModelValue::Bool(
3371                self.stream.endstream_keyword_eol_compliant,
3372            )),
3373            "F" | "FFilter" | "FDecodeParms" => Ok(self
3374                .stream
3375                .dictionary
3376                .get(name.as_str())
3377                .cloned()
3378                .map_or(ModelValue::Null, ModelValue::from)),
3379            _ => Err(crate::ProfileError::UnknownProperty {
3380                property: BoundedText::unchecked(name.as_str()),
3381            }
3382            .into()),
3383        }
3384    }
3385
3386    fn links(&self) -> &[LinkName] {
3387        &self.links
3388    }
3389
3390    fn linked_objects<'a>(
3391        &self,
3392        _graph: &ModelGraph<'a>,
3393        _max_objects: usize,
3394    ) -> Result<Vec<ModelObjectRef<'a>>> {
3395        Ok(Vec::new())
3396    }
3397}
3398
3399struct RuleIndex<'a> {
3400    by_type: BTreeMap<&'a str, Vec<&'a Rule>>,
3401}
3402
3403impl<'a> RuleIndex<'a> {
3404    fn new(rules: &'a [Rule]) -> Self {
3405        let mut by_type: BTreeMap<&'a str, Vec<&'a Rule>> = BTreeMap::new();
3406        for rule in rules {
3407            by_type
3408                .entry(rule.object_type.as_str())
3409                .or_default()
3410                .push(rule);
3411        }
3412        Self { by_type }
3413    }
3414
3415    fn rules_for(&self, object: &ModelObjectRef<'_>) -> Vec<&'a Rule> {
3416        let mut rules = self
3417            .by_type
3418            .get(object.object_type().as_str())
3419            .cloned()
3420            .unwrap_or_default();
3421        let supertypes = match object {
3422            ModelObjectRef::Document(model) => model.super_types(),
3423            ModelObjectRef::Catalog(model) => model.super_types(),
3424            ModelObjectRef::Metadata(model) => model.super_types(),
3425            ModelObjectRef::Page(model) => model.super_types(),
3426            ModelObjectRef::Font(model) => model.super_types(),
3427            ModelObjectRef::Annotation(model) => model.super_types(),
3428            ModelObjectRef::OutputIntent(model) => model.super_types(),
3429            ModelObjectRef::ContentStream(model) => model.super_types(),
3430            ModelObjectRef::Stream(model) => model.super_types(),
3431            ModelObjectRef::Generic(model) => model.super_types(),
3432        };
3433        for supertype in supertypes {
3434            if let Some(super_rules) = self.by_type.get(supertype.as_str()) {
3435                rules.extend(super_rules.iter().copied());
3436            }
3437        }
3438        rules
3439    }
3440}
3441
3442struct ProfileState {
3443    profile: crate::ProfileIdentity,
3444    max_failed_assertions_per_rule: u32,
3445    record_passed_assertions: bool,
3446    checks_executed: u64,
3447    rules_executed: u64,
3448    failed_rules: u64,
3449    failed_assertions: Vec<Assertion>,
3450    passed_assertions: Vec<Assertion>,
3451    unsupported_rules: Vec<UnsupportedRule>,
3452    retained_failures_by_rule: HashMap<RuleId, u32>,
3453    next_ordinal: u64,
3454}
3455
3456impl ProfileState {
3457    fn new(
3458        profile: crate::ProfileIdentity,
3459        max_failed_assertions_per_rule: u32,
3460        record_passed_assertions: bool,
3461    ) -> Self {
3462        Self {
3463            profile,
3464            max_failed_assertions_per_rule,
3465            record_passed_assertions,
3466            checks_executed: 0,
3467            rules_executed: 0,
3468            failed_rules: 0,
3469            failed_assertions: Vec::new(),
3470            passed_assertions: Vec::new(),
3471            unsupported_rules: Vec::new(),
3472            retained_failures_by_rule: HashMap::new(),
3473            next_ordinal: 1,
3474        }
3475    }
3476
3477    fn apply_rule(
3478        &mut self,
3479        object: &ModelObjectRef<'_>,
3480        rule: &Rule,
3481        evaluator: &mut DefaultRuleEvaluator,
3482    ) -> Result<()> {
3483        self.rules_executed =
3484            self.rules_executed
3485                .checked_add(1)
3486                .ok_or(ValidationError::LimitExceeded {
3487                    limit: "rules_executed",
3488                })?;
3489        self.checks_executed =
3490            self.checks_executed
3491                .checked_add(1)
3492                .ok_or(ValidationError::LimitExceeded {
3493                    limit: "checks_executed",
3494                })?;
3495        let outcome = match evaluator.evaluate(object.clone(), rule) {
3496            Ok(outcome) => outcome,
3497            Err(PdfvError::Profile(error)) => {
3498                self.unsupported_rules.push(UnsupportedRule {
3499                    profile_id: self.profile.id.clone(),
3500                    rule_id: rule.id.clone(),
3501                    expression_fragment: Some(BoundedText::unchecked(format!("{:?}", rule.test))),
3502                    reason: BoundedText::new(error.to_string(), 512)?,
3503                    references: rule.references.clone(),
3504                });
3505                return Ok(());
3506            }
3507            Err(error) => return Err(error),
3508        };
3509        match outcome {
3510            RuleOutcome::Passed if self.record_passed_assertions => {
3511                let assertion = self.assertion(object, rule, outcome)?;
3512                self.passed_assertions.push(assertion);
3513            }
3514            RuleOutcome::Passed => {}
3515            RuleOutcome::Failed => {
3516                self.failed_rules =
3517                    self.failed_rules
3518                        .checked_add(1)
3519                        .ok_or(ValidationError::LimitExceeded {
3520                            limit: "failed_rules",
3521                        })?;
3522                let retained = self
3523                    .retained_failures_by_rule
3524                    .get(&rule.id)
3525                    .copied()
3526                    .unwrap_or(0);
3527                if retained < self.max_failed_assertions_per_rule {
3528                    let assertion = self.assertion(object, rule, outcome)?;
3529                    self.failed_assertions.push(assertion);
3530                    self.retained_failures_by_rule
3531                        .insert(rule.id.clone(), retained.saturating_add(1));
3532                }
3533            }
3534        }
3535        Ok(())
3536    }
3537
3538    fn register_static_unsupported_rules(&mut self, rules: &[Rule]) {
3539        for rule in rules {
3540            if let crate::RuleExpr::Unsupported { fragment, reason } = &rule.test {
3541                self.unsupported_rules.push(UnsupportedRule {
3542                    profile_id: self.profile.id.clone(),
3543                    rule_id: rule.id.clone(),
3544                    expression_fragment: Some(fragment.clone()),
3545                    reason: reason.clone(),
3546                    references: rule.references.clone(),
3547                });
3548            }
3549        }
3550    }
3551
3552    fn assertion(
3553        &mut self,
3554        object: &ModelObjectRef<'_>,
3555        rule: &Rule,
3556        outcome: RuleOutcome,
3557    ) -> Result<Assertion> {
3558        let ordinal = NonZeroU64::new(self.next_ordinal).ok_or(ValidationError::LimitExceeded {
3559            limit: "assertion_ordinal",
3560        })?;
3561        self.next_ordinal =
3562            self.next_ordinal
3563                .checked_add(1)
3564                .ok_or(ValidationError::LimitExceeded {
3565                    limit: "assertion_ordinal",
3566                })?;
3567        Ok(Assertion {
3568            ordinal,
3569            rule_id: rule.id.clone(),
3570            status: outcome.assertion_status(),
3571            description: rule.description.clone(),
3572            location: object.location(),
3573            object_context: Some(object.context()),
3574            message: Some(rule.error.message.clone()),
3575            error_arguments: Vec::<ErrorArgument>::new(),
3576        })
3577    }
3578
3579    fn finish(self) -> ProfileReport {
3580        ProfileReport::builder()
3581            .profile(self.profile)
3582            .is_compliant(self.failed_rules == 0 && self.unsupported_rules.is_empty())
3583            .checks_executed(self.checks_executed)
3584            .rules_executed(self.rules_executed)
3585            .failed_rules(self.failed_rules)
3586            .failed_assertions(self.failed_assertions)
3587            .passed_assertions(self.passed_assertions)
3588            .unsupported_rules(self.unsupported_rules)
3589            .build()
3590    }
3591}
3592
3593fn reader_len<R: Read + Seek>(reader: &mut R) -> Result<Option<u64>> {
3594    let current = reader
3595        .stream_position()
3596        .map_err(|source| PdfvError::Io { path: None, source })?;
3597    let end = reader
3598        .seek(SeekFrom::End(0))
3599        .map_err(|source| PdfvError::Io { path: None, source })?;
3600    reader
3601        .seek(SeekFrom::Start(current))
3602        .map_err(|source| PdfvError::Io { path: None, source })?;
3603    Ok(Some(end))
3604}
3605
3606fn parse_failed_report(
3607    source: InputSummary,
3608    error: &crate::ParseError,
3609    elapsed: std::time::Duration,
3610) -> Result<ValidationReport> {
3611    Ok(ValidationReport::builder()
3612        .engine_version(ENGINE_VERSION.to_owned())
3613        .source(source)
3614        .status(ValidationStatus::ParseFailed)
3615        .flavours(Vec::new())
3616        .profile_reports(Vec::new())
3617        .parse_facts(Vec::new())
3618        .warnings(vec![crate::ValidationWarning::General {
3619            message: BoundedText::new(error.to_string(), 512)?,
3620        }])
3621        .task_durations(vec![TaskDuration::from_duration(
3622            Identifier::new("parse")?,
3623            elapsed,
3624        )])
3625        .build())
3626}
3627
3628fn base_report(
3629    source: InputSummary,
3630    status: ValidationStatus,
3631    profile_reports: Vec<ProfileReport>,
3632    parsed: ParsedDocument,
3633    elapsed: std::time::Duration,
3634) -> Result<ValidationReport> {
3635    Ok(ValidationReport::builder()
3636        .engine_version(ENGINE_VERSION.to_owned())
3637        .source(source)
3638        .status(status)
3639        .flavours(Vec::new())
3640        .profile_reports(profile_reports)
3641        .parse_facts(parsed.parse_facts)
3642        .warnings(parsed.warnings)
3643        .task_durations(vec![TaskDuration::from_duration(
3644            Identifier::new("validate")?,
3645            elapsed,
3646        )])
3647        .build())
3648}
3649
3650fn header_offset(document: &ParsedDocument) -> u64 {
3651    document
3652        .parse_facts
3653        .iter()
3654        .find_map(|fact| match fact {
3655            crate::ParseFact::Header { offset, .. } => Some(*offset),
3656            _ => None,
3657        })
3658        .unwrap_or(0)
3659}
3660
3661fn post_eof_data_size(document: &ParsedDocument) -> u64 {
3662    document
3663        .parse_facts
3664        .iter()
3665        .find_map(|fact| match fact {
3666            crate::ParseFact::PostEofData { bytes } => Some(*bytes),
3667            _ => None,
3668        })
3669        .unwrap_or(0)
3670}
3671
3672fn contains_xref_stream(document: &ParsedDocument) -> bool {
3673    document.parse_facts.iter().any(|fact| {
3674        matches!(
3675            fact,
3676            crate::ParseFact::Xref {
3677                fact: crate::XrefFact::XrefStreamParsed { .. }
3678                    | crate::XrefFact::XrefStreamUnsupported,
3679                ..
3680            }
3681        )
3682    })
3683}
3684
3685fn contains_xmp_family(document: &ParsedDocument, family: &str) -> bool {
3686    document.parse_facts.iter().any(|fact| {
3687        matches!(
3688            fact,
3689            crate::ParseFact::Xmp {
3690                fact:
3691                    crate::XmpFact::FlavourClaim {
3692                        family: claim_family,
3693                        ..
3694                    },
3695                ..
3696            } if claim_family.as_str() == family
3697        )
3698    })
3699}
3700
3701fn xmp_part(document: &ParsedDocument) -> Option<f64> {
3702    document.parse_facts.iter().find_map(|fact| {
3703        let crate::ParseFact::Xmp {
3704            fact:
3705                crate::XmpFact::FlavourClaim {
3706                    family,
3707                    display_flavour,
3708                    ..
3709                },
3710            ..
3711        } = fact
3712        else {
3713            return None;
3714        };
3715        if family.as_str() == "pdfa" || family.as_str() == "pdfua" {
3716            display_flavour
3717                .as_str()
3718                .split('-')
3719                .nth(1)
3720                .and_then(|value| value.chars().next())
3721                .and_then(|character| character.to_digit(10))
3722                .map(f64::from)
3723        } else {
3724            None
3725        }
3726    })
3727}
3728
3729fn xmp_prefix_for_claim(document: &ParsedDocument) -> Option<&'static str> {
3730    document.parse_facts.iter().find_map(|fact| {
3731        let crate::ParseFact::Xmp {
3732            fact: crate::XmpFact::FlavourClaim { family, .. },
3733            ..
3734        } = fact
3735        else {
3736            return None;
3737        };
3738        match family.as_str() {
3739            "pdfa" => Some("pdfaid"),
3740            "pdfua" => Some("pdfuaid"),
3741            _ => None,
3742        }
3743    })
3744}
3745
3746fn xmp_conformance(document: &ParsedDocument) -> Option<String> {
3747    document.parse_facts.iter().find_map(|fact| {
3748        let crate::ParseFact::Xmp {
3749            fact:
3750                crate::XmpFact::FlavourClaim {
3751                    family,
3752                    display_flavour,
3753                    ..
3754                },
3755            ..
3756        } = fact
3757        else {
3758            return None;
3759        };
3760        if family.as_str() != "pdfa" {
3761            return None;
3762        }
3763        display_flavour
3764            .as_str()
3765            .chars()
3766            .last()
3767            .filter(char::is_ascii_alphabetic)
3768            .map(|character| character.to_ascii_uppercase().to_string())
3769    })
3770}
3771
3772fn xmp_declarations(document: &ParsedDocument) -> Vec<ModelValue> {
3773    document
3774        .parse_facts
3775        .iter()
3776        .filter_map(|fact| {
3777            let crate::ParseFact::Xmp {
3778                fact:
3779                    crate::XmpFact::FlavourClaim {
3780                        family,
3781                        display_flavour,
3782                        ..
3783                    },
3784                ..
3785            } = fact
3786            else {
3787                return None;
3788            };
3789            if family.as_str() != "wtpdf" {
3790                return None;
3791            }
3792            let declaration = match display_flavour.as_str() {
3793                "wtpdf-1-0-accessibility" => "http://pdfa.org/declarations/wtpdf#accessibility1.0",
3794                "wtpdf-1-0-reuse" => "http://pdfa.org/declarations/wtpdf#reuse1.0",
3795                _ => return None,
3796            };
3797            Some(ModelValue::String(BoundedText::unchecked(declaration)))
3798        })
3799        .collect()
3800}
3801
3802fn u64_to_f64(value: u64) -> Result<f64> {
3803    let bounded = u32::try_from(value).map_err(|_| ValidationError::LimitExceeded {
3804        limit: "numeric_property",
3805    })?;
3806    Ok(f64::from(bounded))
3807}
3808
3809fn usize_to_f64(value: usize) -> Result<f64> {
3810    let bounded = u32::try_from(value).map_err(|_| ValidationError::LimitExceeded {
3811        limit: "numeric_property",
3812    })?;
3813    Ok(f64::from(bounded))
3814}
3815
3816fn remaining_object_budget(
3817    limits: &ResourceLimits,
3818    visited_len: usize,
3819    stack_len: usize,
3820) -> Result<usize> {
3821    let visited = u64::try_from(visited_len).map_err(|_| ValidationError::LimitExceeded {
3822        limit: "max_objects",
3823    })?;
3824    let pending = u64::try_from(stack_len).map_err(|_| ValidationError::LimitExceeded {
3825        limit: "max_objects",
3826    })?;
3827    let consumed = visited
3828        .checked_add(pending)
3829        .ok_or(ValidationError::LimitExceeded {
3830            limit: "max_objects",
3831        })?;
3832    let remaining =
3833        limits
3834            .max_objects
3835            .checked_sub(consumed)
3836            .ok_or(ValidationError::LimitExceeded {
3837                limit: "max_objects",
3838            })?;
3839    usize::try_from(remaining).map_err(|_| {
3840        ValidationError::LimitExceeded {
3841            limit: "max_objects",
3842        }
3843        .into()
3844    })
3845}
3846
3847fn push_linked<'a>(
3848    objects: &mut Vec<ModelObjectRef<'a>>,
3849    object: ModelObjectRef<'a>,
3850    max_objects: usize,
3851) -> Result<()> {
3852    if objects.len() >= max_objects {
3853        return Err(ValidationError::LimitExceeded {
3854            limit: "max_objects",
3855        }
3856        .into());
3857    }
3858    objects.push(object);
3859    Ok(())
3860}
3861
3862#[cfg(test)]
3863mod tests {
3864    use std::{io::Cursor, sync::Arc};
3865
3866    use super::{
3867        AnnotationModel, CatalogModel, ContentStreamModel, FontModel, OutputIntentModel, PageModel,
3868    };
3869    use crate::{
3870        BinaryOp, BoundedText, ErrorTemplate, FlavourSelection, Identifier, ModelObject,
3871        ModelObjectRef, ModelValue, Parser, PdfvError, ProfileIdentity, ProfileRepository,
3872        PropertyName, ResourceLimits, Rule, RuleExpr, RuleId, ValidationFlavour, ValidationOptions,
3873        ValidationProfile, Validator,
3874    };
3875
3876    #[derive(Debug)]
3877    struct StaticRepo(ValidationProfile);
3878
3879    impl ProfileRepository for StaticRepo {
3880        fn profiles_for(
3881            &self,
3882            _selection: &FlavourSelection,
3883        ) -> crate::Result<Vec<ValidationProfile>> {
3884            Ok(vec![self.0.clone()])
3885        }
3886    }
3887
3888    fn m1_model_pdf() -> &'static [u8] {
3889        br"%PDF-1.7
38901 0 obj
3891<< /Type /Catalog /Pages 2 0 R /OutputIntents [8 0 R] >>
3892endobj
38932 0 obj
3894<< /Type /Pages /Kids [3 0 R] /Count 1 >>
3895endobj
38963 0 obj
3897<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /Annots [5 0 R] /Contents 6 0 R >>
3898endobj
38994 0 obj
3900<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
3901endobj
39025 0 obj
3903<< /Type /Annot /Subtype /Text >>
3904endobj
39056 0 obj
3906<< /Length 3 >>
3907stream
3908q Q
3909endstream
3910endobj
39117 0 obj
3912<< /Length 0 >>
3913stream
3914endstream
3915endobj
39168 0 obj
3917<< /Type /OutputIntent /S /GTS_PDFA1 /DestOutputProfile 7 0 R >>
3918endobj
3919trailer
3920<< /Root 1 0 R >>
3921%%EOF
3922"
3923    }
3924
3925    fn m6_model_pdf() -> &'static [u8] {
3926        br"%PDF-1.7
39271 0 obj
3928<< /Type /Catalog /Pages 2 0 R /AcroForm 7 0 R /StructTreeRoot 8 0 R /OCProperties 9 0 R /Names 10 0 R /Outlines 11 0 R /Perms 12 0 R /Dests [21 0 R] >>
3929endobj
39302 0 obj
3931<< /Type /Pages /Kids [3 0 R] /Count 1 >>
3932endobj
39333 0 obj
3934<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> /XObject << /Im1 5 0 R /Fm1 22 0 R >> /ColorSpace << /CS1 13 0 R >> /ExtGState << /GS1 14 0 R >> >> /Annots [6 0 R] /Contents 15 0 R >>
3935endobj
39364 0 obj
3937<< /Type /Font /Subtype /Type0 /BaseFont /Faux /ToUnicode 16 0 R /FontDescriptor << /FontFile2 20 0 R >> >>
3938endobj
39395 0 obj
3940<< /Type /XObject /Subtype /Image /Width 1 /Height 1 /ColorSpace /DeviceRGB /BitsPerComponent 8 /Length 0 >>
3941stream
3942endstream
3943endobj
39446 0 obj
3945<< /Type /Annot /Subtype /Widget /FT /Sig /A 17 0 R >>
3946endobj
39477 0 obj
3948<< /Fields [6 0 R] /SigFlags 3 >>
3949endobj
39508 0 obj
3951<< /Type /StructTreeRoot /K 18 0 R /RoleMap << /H1 /H >> >>
3952endobj
39539 0 obj
3954<< /OCGs [] /D << >> >>
3955endobj
395610 0 obj
3957<< /Dests << /Names [] >> >>
3958endobj
395911 0 obj
3960<< /Type /Outlines /Count 0 >>
3961endobj
396212 0 obj
3963<< /DocMDP 19 0 R >>
3964endobj
396513 0 obj
3966<< /N 3 /Alternate /DeviceRGB >>
3967endobj
396814 0 obj
3969<< /Type /ExtGState /BM /Normal /CA 1 >>
3970endobj
397115 0 obj
3972<< /Length 3 >>
3973stream
3974q Q
3975endstream
3976endobj
397716 0 obj
3978<< /Type /CMap /CMapName /Identity-H >>
3979endobj
398017 0 obj
3981<< /Type /Action /S /URI /URI (https://example.invalid) >>
3982endobj
398318 0 obj
3984<< /Type /StructElem /S /Document /K [] >>
3985endobj
398619 0 obj
3987<< /Type /Sig /Filter /Adobe.PPKLite /ByteRange [0 0 0 0] >>
3988endobj
398920 0 obj
3990<< /Type /EmbeddedFile /Length 0 >>
3991stream
3992endstream
3993endobj
399421 0 obj
3995<< /D [3 0 R /Fit] >>
3996endobj
399722 0 obj
3998<< /Type /XObject /Subtype /Form /BBox [0 0 1 1] /Length 0 >>
3999stream
4000endstream
4001endobj
400223 0 obj
4003<< /Filter /Standard /V 1 /R 2 /Length 40 /P -4 >>
4004endobj
4005trailer
4006<< /Root 1 0 R >>
4007%%EOF
4008"
4009    }
4010
4011    #[test]
4012    fn test_should_materialize_m1_model_wrappers() -> crate::Result<()> {
4013        let document = Parser::default().parse(Cursor::new(m1_model_pdf()))?;
4014        let catalog_key = document.catalog.ok_or(crate::ParseError::MissingObject {
4015            message: crate::BoundedText::unchecked("missing catalog"),
4016        })?;
4017        let catalog =
4018            CatalogModel::new(&document, catalog_key).ok_or(crate::ParseError::MissingObject {
4019                message: crate::BoundedText::unchecked("missing catalog model"),
4020            })?;
4021
4022        let pages =
4023            PageModel::from_catalog(&document, &catalog, &crate::ResourceLimits::default(), 16)?;
4024        let page = pages.first().ok_or(crate::ParseError::MissingObject {
4025            message: crate::BoundedText::unchecked("missing page"),
4026        })?;
4027        let fonts = FontModel::from_page(&document, page, 16)?;
4028        let annotations = AnnotationModel::from_page(&document, page, 16)?;
4029        let output_intents = OutputIntentModel::from_catalog(&document, &catalog, 16)?;
4030        let content_streams = ContentStreamModel::from_page(&document, page, 16)?;
4031
4032        assert_eq!(pages.len(), 1);
4033        assert_eq!(fonts.len(), 1);
4034        assert_eq!(annotations.len(), 1);
4035        assert_eq!(output_intents.len(), 1);
4036        assert_eq!(content_streams.len(), 1);
4037        assert_eq!(
4038            page.property(&PropertyName::new("hasContents")?)?,
4039            ModelValue::Bool(true)
4040        );
4041        Ok(())
4042    }
4043
4044    #[test]
4045    fn test_should_resolve_m1_links_lazily_from_model_graph() -> crate::Result<()> {
4046        let document = Parser::default().parse(Cursor::new(m1_model_pdf()))?;
4047        let limits = crate::ResourceLimits::default();
4048        let graph = super::ModelGraph::with_all_families(&document, &limits);
4049        let document_model = super::DocumentModel::new(&document);
4050        let mut stack = vec![ModelObjectRef::Document(document_model)];
4051        let mut visited_contexts = Vec::new();
4052
4053        while let Some(object) = stack.pop() {
4054            visited_contexts.push(object.context().as_str().to_owned());
4055            for linked in object.linked_objects(&graph, 16)? {
4056                stack.push(linked);
4057            }
4058        }
4059
4060        assert!(visited_contexts.iter().any(|value| value == "root/page[0]"));
4061        assert!(
4062            visited_contexts
4063                .iter()
4064                .any(|value| value == "root/page[0]/font[F1]")
4065        );
4066        assert!(
4067            visited_contexts
4068                .iter()
4069                .any(|value| value == "root/page[0]/annotation[0]")
4070        );
4071        assert!(
4072            visited_contexts
4073                .iter()
4074                .any(|value| value == "root/catalog[0]/outputIntent[0]")
4075        );
4076        assert!(
4077            visited_contexts
4078                .iter()
4079                .any(|value| value == "root/page[0]/contentStream[0]")
4080        );
4081        Ok(())
4082    }
4083
4084    #[test]
4085    fn test_should_redact_content_strings_from_feature_report() -> crate::Result<()> {
4086        let document = Parser::default().parse(Cursor::new(m6_model_pdf()))?;
4087        let session =
4088            super::ValidationSession::new(document, crate::ResourceLimits::default(), 100, false);
4089        let action_family = crate::ObjectTypeName::new("action")?;
4090        let report = session.extract_features(&super::FeatureSelection::Families {
4091            families: vec![action_family.clone()],
4092        })?;
4093        let Some(action) = report
4094            .objects
4095            .iter()
4096            .find(|object| object.family == action_family)
4097        else {
4098            return Err(crate::ParseError::MissingObject {
4099                message: crate::BoundedText::unchecked("missing action feature"),
4100            }
4101            .into());
4102        };
4103        assert!(matches!(
4104            action.properties.get(&PropertyName::new("URI")?),
4105            Some(crate::FeatureValue::RedactedString { bytes }) if *bytes > 0
4106        ));
4107        Ok(())
4108    }
4109
4110    #[test]
4111    fn test_should_truncate_feature_report_on_object_cap() -> crate::Result<()> {
4112        let document = Parser::default().parse(Cursor::new(m6_model_pdf()))?;
4113        let limits = crate::ResourceLimits {
4114            max_objects: 1,
4115            ..crate::ResourceLimits::default()
4116        };
4117        let session = super::ValidationSession::new(document, limits, 100, false);
4118        let report = session.extract_features(&super::FeatureSelection::All)?;
4119
4120        assert!(report.truncated);
4121        assert_eq!(report.visited_objects, 1);
4122        Ok(())
4123    }
4124
4125    #[test]
4126    fn test_should_register_model_family_schema_for_generated_profiles() -> crate::Result<()> {
4127        let registry = super::ModelRegistry::default_registry();
4128
4129        for family in [
4130            "document",
4131            "catalog",
4132            "page",
4133            "resource",
4134            "font",
4135            "cMap",
4136            "image",
4137            "contentStream",
4138            "annotation",
4139            "action",
4140            "formField",
4141            "colorSpace",
4142            "extGState",
4143            "structureTreeRoot",
4144            "structureElement",
4145            "signature",
4146            "security",
4147        ] {
4148            assert!(registry.has_family(&crate::ObjectTypeName::new(family)?));
4149        }
4150        assert!(registry.has_family_property(
4151            &crate::ObjectTypeName::new("structureElement")?,
4152            &PropertyName::new("parentStandardType")?
4153        ));
4154        Ok(())
4155    }
4156
4157    #[test]
4158    fn test_should_materialize_m6_broad_model_families_bounded_iteratively() -> crate::Result<()> {
4159        let document = Parser::default().parse(Cursor::new(m6_model_pdf()))?;
4160        let limits = crate::ResourceLimits {
4161            max_objects: 128,
4162            ..crate::ResourceLimits::default()
4163        };
4164        let graph = super::ModelGraph::with_all_families(&document, &limits);
4165        let mut stack = vec![ModelObjectRef::Document(super::DocumentModel::new(
4166            &document,
4167        ))];
4168        let mut visited = std::collections::HashSet::new();
4169        let mut families = std::collections::BTreeSet::new();
4170
4171        while let Some(object) = stack.pop() {
4172            if !visited.insert(object.identity_key()) {
4173                continue;
4174            }
4175            families.insert(object.object_type().as_str().to_owned());
4176            for linked in object.linked_objects(&graph, 128)? {
4177                stack.push(linked);
4178            }
4179        }
4180
4181        for family in [
4182            "acroForm",
4183            "structureTreeRoot",
4184            "optionalContentProperties",
4185            "names",
4186            "outline",
4187            "destination",
4188            "permissions",
4189            "pageTree",
4190            "resource",
4191            "image",
4192            "xObject",
4193            "colorSpace",
4194            "extGState",
4195            "cMap",
4196            "embeddedFontFile",
4197            "action",
4198            "signature",
4199            "security",
4200        ] {
4201            assert!(families.contains(family), "missing {family}: {families:?}");
4202        }
4203        Ok(())
4204    }
4205
4206    #[test]
4207    fn test_should_validate_m1_linked_objects_through_lazy_traversal() -> crate::Result<()> {
4208        let profile = linked_object_profile()?;
4209        let validator =
4210            Validator::with_profiles(ValidationOptions::default(), Arc::new(StaticRepo(profile)))?;
4211        let report =
4212            validator.validate_reader(Cursor::new(m1_model_pdf()), crate::InputName::memory())?;
4213        let profile =
4214            report
4215                .profile_reports
4216                .first()
4217                .ok_or(crate::ValidationError::LimitExceeded {
4218                    limit: "profile_reports",
4219                })?;
4220        let contexts = profile
4221            .failed_assertions
4222            .iter()
4223            .filter_map(|assertion| assertion.object_context.as_ref())
4224            .map(BoundedText::as_str)
4225            .collect::<Vec<_>>();
4226
4227        assert_eq!(profile.rules_executed, 5);
4228        assert!(contexts.contains(&"root/page[0]"));
4229        assert!(contexts.contains(&"root/page[0]/font[F1]"));
4230        assert!(contexts.contains(&"root/page[0]/annotation[0]"));
4231        assert!(contexts.contains(&"root/catalog[0]/outputIntent[0]"));
4232        assert!(contexts.contains(&"root/page[0]/contentStream[0]"));
4233        Ok(())
4234    }
4235
4236    #[test]
4237    fn test_should_limit_lazy_link_expansion_before_enqueue() -> crate::Result<()> {
4238        let limits = ResourceLimits {
4239            max_objects: 1,
4240            ..ResourceLimits::default()
4241        };
4242        let options = ValidationOptions::builder().resource_limits(limits).build();
4243        let Err(error) = Validator::new(options)?.validate_reader(
4244            Cursor::new(simple_catalog_pdf()),
4245            crate::InputName::memory(),
4246        ) else {
4247            return Err(crate::ValidationError::LimitExceeded {
4248                limit: "expected_error",
4249            }
4250            .into());
4251        };
4252
4253        assert!(matches!(
4254            error,
4255            PdfvError::Validation(crate::ValidationError::LimitExceeded {
4256                limit: "max_objects"
4257            })
4258        ));
4259        Ok(())
4260    }
4261
4262    fn simple_catalog_pdf() -> &'static [u8] {
4263        br"%PDF-1.7
42641 0 obj
4265<< /Type /Catalog >>
4266endobj
4267trailer
4268<< /Root 1 0 R >>
4269%%EOF
4270"
4271    }
4272
4273    fn linked_object_profile() -> crate::Result<ValidationProfile> {
4274        Ok(ValidationProfile {
4275            identity: ProfileIdentity {
4276                id: Identifier::new("lazy-links")?,
4277                name: BoundedText::new("lazy links", 64)?,
4278                version: None,
4279            },
4280            flavour: ValidationFlavour::new("pdfa", std::num::NonZeroU32::MIN, "b")?,
4281            rules: vec![
4282                false_rule("page-rule", "page", false)?,
4283                false_rule("font-rule", "font", false)?,
4284                false_rule("annotation-rule", "annotation", false)?,
4285                false_rule("output-intent-rule", "outputIntent", false)?,
4286                false_rule("content-stream-deferred", "contentStream", true)?,
4287            ],
4288        })
4289    }
4290
4291    fn false_rule(id: &str, object_type: &str, deferred: bool) -> crate::Result<Rule> {
4292        Ok(Rule {
4293            id: RuleId(Identifier::new(id)?),
4294            object_type: crate::ObjectTypeName::new(object_type)?,
4295            deferred,
4296            tags: Vec::new(),
4297            description: BoundedText::new(id, 64)?,
4298            test: RuleExpr::Binary {
4299                op: BinaryOp::Eq,
4300                left: Box::new(RuleExpr::Bool { value: true }),
4301                right: Box::new(RuleExpr::Bool { value: false }),
4302            },
4303            error: ErrorTemplate {
4304                message: BoundedText::new(id, 64)?,
4305            },
4306            references: Vec::new(),
4307        })
4308    }
4309}