Skip to main content

pdfv_core/
profile.rs

1//! Built-in validation profiles and bounded rule expression evaluation.
2
3use std::num::NonZeroU32;
4
5use regex::RegexBuilder;
6use serde::{Deserialize, Serialize};
7
8use crate::{
9    AssertionStatus, BoundedText, CosObject, FlavourSelection, Identifier, ObjectKey, ParseFact,
10    ProfileError, ProfileIdentity, ResourceLimits, Result, RuleId, SpecReference, StreamFact,
11    ValidationFlavour,
12    generated_profiles::{GENERATED_PROFILE_SOURCES, GeneratedProfileSource, VERA_PDF_LIBRARY_PIN},
13};
14
15const MAX_RULE_INSTRUCTIONS: u64 = 512;
16const MAX_RULE_DEPTH: u32 = 32;
17const MAX_REGEX_PATTERN_BYTES: usize = 512;
18const MAX_REGEX_HAYSTACK_BYTES: usize = 4096;
19const MAX_PROFILE_XML_BYTES: u64 = 8 * 1024 * 1024;
20const MAX_PROFILE_XML_ELEMENTS: u64 = 100_000;
21const MAX_PROFILE_XML_DEPTH: u32 = 32;
22const MAX_PROFILE_XML_ATTRIBUTES: usize = 16;
23const MAX_PROFILE_RULES: usize = 10_000;
24const MAX_PROFILE_STRING_BYTES: usize = 4096;
25
26/// Repository that resolves validation profiles for a caller selection.
27pub trait ProfileRepository {
28    /// Returns immutable profiles matching a selection.
29    ///
30    /// # Errors
31    ///
32    /// Returns [`crate::PdfvError`] when the selection is unsupported or profile data is invalid.
33    fn profiles_for(&self, selection: &FlavourSelection) -> Result<Vec<ValidationProfile>>;
34}
35
36/// Rule evaluator interface.
37pub trait RuleEvaluator {
38    /// Evaluates one rule against one model object.
39    ///
40    /// # Errors
41    ///
42    /// Returns [`crate::PdfvError`] when rule evaluation fails or exceeds budgets.
43    fn evaluate(&mut self, object: crate::ModelObjectRef<'_>, rule: &Rule) -> Result<RuleOutcome>;
44}
45
46/// Built-in profile repository.
47#[derive(Clone, Debug, Default)]
48pub struct BuiltinProfileRepository;
49
50impl BuiltinProfileRepository {
51    /// Creates a built-in profile repository.
52    #[must_use]
53    pub fn new() -> Self {
54        Self
55    }
56
57    /// Lists built-in profile metadata.
58    ///
59    /// # Errors
60    ///
61    /// Returns [`crate::PdfvError`] when built-in generated profile data is invalid.
62    pub fn list_profiles(&self) -> Result<Vec<ProfileCatalogEntry>> {
63        let mut entries = Vec::with_capacity(GENERATED_PROFILE_SOURCES.len().saturating_add(1));
64        let m4 = m4_profile(pdfa_1b_flavour()?)?;
65        entries.push(ProfileCatalogEntry::from_profile(
66            &m4,
67            "pdfv-internal",
68            "built-in smoke profile",
69            "pdfa-1b",
70        )?);
71        for source in GENERATED_PROFILE_SOURCES {
72            let import = import_generated_profile(source)?;
73            entries.push(ProfileCatalogEntry::from_import(source, &import)?);
74        }
75        Ok(entries)
76    }
77}
78
79impl ProfileRepository for BuiltinProfileRepository {
80    fn profiles_for(&self, selection: &FlavourSelection) -> Result<Vec<ValidationProfile>> {
81        match selection {
82            FlavourSelection::Auto { default } => {
83                let Some(flavour) = default else {
84                    return Ok(Vec::new());
85                };
86                ensure_builtin_flavour(flavour)?;
87                Ok(vec![m4_profile(flavour.clone())?])
88            }
89            FlavourSelection::Explicit { flavour } => {
90                let source = builtin_source_for_flavour(flavour)?;
91                Ok(vec![import_generated_profile(source)?.profile])
92            }
93            FlavourSelection::CustomProfile { .. } => {
94                #[cfg(feature = "custom-profiles")]
95                {
96                    let repository = CustomProfileRepository;
97                    repository.profiles_for(selection)
98                }
99                #[cfg(not(feature = "custom-profiles"))]
100                {
101                    Err(ProfileError::UnsupportedSelection.into())
102                }
103            }
104        }
105    }
106}
107
108/// Repository that loads one bounded XML profile from disk.
109#[cfg(feature = "custom-profiles")]
110#[derive(Clone, Debug, Default)]
111pub struct CustomProfileRepository;
112
113#[cfg(feature = "custom-profiles")]
114impl ProfileRepository for CustomProfileRepository {
115    fn profiles_for(&self, selection: &FlavourSelection) -> Result<Vec<ValidationProfile>> {
116        let FlavourSelection::CustomProfile { profile_path } = selection else {
117            return Err(ProfileError::UnsupportedSelection.into());
118        };
119        Ok(vec![load_verapdf_profile_path(profile_path)?.profile])
120    }
121}
122
123/// Summary produced by XML profile import.
124#[derive(Clone, Debug, Deserialize, Serialize)]
125#[non_exhaustive]
126#[serde(rename_all = "camelCase", deny_unknown_fields)]
127pub struct ProfileImportSummary {
128    /// Imported profile.
129    pub profile: ValidationProfile,
130    /// Number of rules imported with executable expressions.
131    pub supported_rules: u64,
132    /// Number of rules imported as unsupported placeholders.
133    pub unsupported_rules: u64,
134}
135
136/// Executable coverage metadata for a profile.
137#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
138#[non_exhaustive]
139#[serde(rename_all = "camelCase", deny_unknown_fields)]
140#[allow(
141    clippy::struct_field_names,
142    reason = "public report contract mirrors ExpressionCoverage terminology from the spec"
143)]
144pub struct ProfileCoverage {
145    /// Total official rules imported from the source profile.
146    pub total_rules: u64,
147    /// Rules that lowered to executable Rust IR.
148    pub executable_rules: u64,
149    /// Required rules retained as unsupported report data.
150    pub unsupported_rules: u64,
151}
152
153/// Profile metadata suitable for listing catalogs.
154#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
155#[non_exhaustive]
156#[serde(rename_all = "camelCase", deny_unknown_fields)]
157pub struct ProfileCatalogEntry {
158    /// Profile identity.
159    pub identity: ProfileIdentity,
160    /// Validation flavour.
161    pub flavour: ValidationFlavour,
162    /// CLI/catalog spelling for the flavour.
163    pub display_flavour: BoundedText,
164    /// Source pin that produced this profile entry.
165    pub source_pin: Identifier,
166    /// Vendored or internal source description.
167    pub source_file: BoundedText,
168    /// Executable rule coverage.
169    pub coverage: ProfileCoverage,
170}
171
172impl ProfileCatalogEntry {
173    fn from_import(source: &GeneratedProfileSource, import: &ProfileImportSummary) -> Result<Self> {
174        Self::from_profile(
175            &import.profile,
176            VERA_PDF_LIBRARY_PIN,
177            source.source_file,
178            source.display_flavour,
179        )
180        .map(|mut entry| {
181            entry.coverage = ProfileCoverage {
182                total_rules: import
183                    .supported_rules
184                    .saturating_add(import.unsupported_rules),
185                executable_rules: import.supported_rules,
186                unsupported_rules: import.unsupported_rules,
187            };
188            entry
189        })
190    }
191
192    fn from_profile(
193        profile: &ValidationProfile,
194        source_pin: &str,
195        source_file: &str,
196        display_flavour: &str,
197    ) -> Result<Self> {
198        Ok(Self {
199            identity: profile.identity.clone(),
200            flavour: profile.flavour.clone(),
201            display_flavour: BoundedText::new(display_flavour, 128)?,
202            source_pin: Identifier::new(source_pin)?,
203            source_file: BoundedText::new(source_file, 512)?,
204            coverage: ProfileCoverage {
205                total_rules: u64::try_from(profile.rules.len()).unwrap_or(u64::MAX),
206                executable_rules: u64::try_from(
207                    profile
208                        .rules
209                        .iter()
210                        .filter(|rule| !matches!(rule.test, RuleExpr::Unsupported { .. }))
211                        .count(),
212                )
213                .unwrap_or(u64::MAX),
214                unsupported_rules: u64::try_from(
215                    profile
216                        .rules
217                        .iter()
218                        .filter(|rule| matches!(rule.test, RuleExpr::Unsupported { .. }))
219                        .count(),
220                )
221                .unwrap_or(u64::MAX),
222            },
223        })
224    }
225}
226
227/// Immutable validation profile.
228#[derive(Clone, Debug, Deserialize, Serialize)]
229#[non_exhaustive]
230#[serde(rename_all = "camelCase", deny_unknown_fields)]
231pub struct ValidationProfile {
232    /// Profile identity.
233    pub identity: ProfileIdentity,
234    /// Validation flavour.
235    pub flavour: ValidationFlavour,
236    /// Rules in deterministic execution order.
237    pub rules: Vec<Rule>,
238}
239
240/// Validation rule.
241#[derive(Clone, Debug, Deserialize, Serialize)]
242#[non_exhaustive]
243#[serde(rename_all = "camelCase", deny_unknown_fields)]
244pub struct Rule {
245    /// Rule id.
246    pub id: RuleId,
247    /// Object type targeted by this rule.
248    pub object_type: ObjectTypeName,
249    /// Whether this rule runs after traversal.
250    pub deferred: bool,
251    /// Rule tags.
252    pub tags: Vec<Identifier>,
253    /// Human-readable rule description.
254    pub description: BoundedText,
255    /// Bounded rule expression.
256    pub test: RuleExpr,
257    /// Error template used for failed assertions.
258    pub error: ErrorTemplate,
259    /// Specification citations associated with this rule.
260    pub references: Vec<SpecReference>,
261}
262
263/// Error template for failed assertions.
264#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
265#[non_exhaustive]
266#[serde(rename_all = "camelCase", deny_unknown_fields)]
267pub struct ErrorTemplate {
268    /// Bounded failure message.
269    pub message: BoundedText,
270}
271
272/// Validation model object type name.
273#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
274#[serde(try_from = "String", into = "String")]
275pub struct ObjectTypeName(Identifier);
276
277impl ObjectTypeName {
278    /// Creates an object type name.
279    ///
280    /// # Errors
281    ///
282    /// Returns [`crate::ConfigError`] when the identifier violates policy.
283    pub fn new(value: impl Into<String>) -> std::result::Result<Self, crate::ConfigError> {
284        Ok(Self(Identifier::new(value)?))
285    }
286
287    /// Returns the type name text.
288    #[must_use]
289    pub fn as_str(&self) -> &str {
290        self.0.as_str()
291    }
292
293    pub(crate) fn unchecked(value: &'static str) -> Self {
294        Self(Identifier::unchecked(value))
295    }
296}
297
298impl TryFrom<String> for ObjectTypeName {
299    type Error = crate::ConfigError;
300
301    fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
302        Self::new(value)
303    }
304}
305
306impl From<ObjectTypeName> for String {
307    fn from(value: ObjectTypeName) -> Self {
308        value.0.into()
309    }
310}
311
312/// Validation model property name.
313#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
314#[serde(try_from = "String", into = "String")]
315pub struct PropertyName(Identifier);
316
317impl PropertyName {
318    /// Creates a property name.
319    ///
320    /// # Errors
321    ///
322    /// Returns [`crate::ConfigError`] when the identifier violates policy.
323    pub fn new(value: impl Into<String>) -> std::result::Result<Self, crate::ConfigError> {
324        Ok(Self(Identifier::new(value)?))
325    }
326
327    /// Returns the property name text.
328    #[must_use]
329    pub fn as_str(&self) -> &str {
330        self.0.as_str()
331    }
332
333    pub(crate) fn unchecked(value: impl Into<String>) -> Self {
334        Self(Identifier::unchecked(value))
335    }
336}
337
338impl TryFrom<String> for PropertyName {
339    type Error = crate::ConfigError;
340
341    fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
342        Self::new(value)
343    }
344}
345
346impl From<PropertyName> for String {
347    fn from(value: PropertyName) -> Self {
348        value.0.into()
349    }
350}
351
352/// Dot-separated property path.
353#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
354#[serde(try_from = "Vec<String>", into = "Vec<String>")]
355pub struct PropertyPath(Vec<PropertyName>);
356
357impl PropertyPath {
358    /// Creates a property path from names.
359    #[must_use]
360    pub fn new(parts: Vec<PropertyName>) -> Self {
361        Self(parts)
362    }
363
364    /// Returns path parts.
365    #[must_use]
366    pub fn parts(&self) -> &[PropertyName] {
367        &self.0
368    }
369}
370
371impl TryFrom<Vec<String>> for PropertyPath {
372    type Error = crate::ConfigError;
373
374    fn try_from(value: Vec<String>) -> std::result::Result<Self, Self::Error> {
375        value
376            .into_iter()
377            .map(PropertyName::new)
378            .collect::<std::result::Result<Vec<_>, _>>()
379            .map(Self)
380    }
381}
382
383impl From<PropertyPath> for Vec<String> {
384    fn from(value: PropertyPath) -> Self {
385        value.0.into_iter().map(Into::into).collect()
386    }
387}
388
389/// Bounded rule expression.
390#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
391#[non_exhaustive]
392#[serde(rename_all = "camelCase", tag = "kind")]
393pub enum RuleExpr {
394    /// Boolean literal.
395    Bool {
396        /// Literal value.
397        value: bool,
398    },
399    /// Number literal.
400    Number {
401        /// Literal value.
402        value: f64,
403    },
404    /// String literal.
405    String {
406        /// Literal value.
407        value: BoundedText,
408    },
409    /// Null literal.
410    Null,
411    /// Property lookup.
412    Property {
413        /// Property path.
414        path: PropertyPath,
415    },
416    /// Unary operation.
417    Unary {
418        /// Operator.
419        op: UnaryOp,
420        /// Operand.
421        expr: Box<RuleExpr>,
422    },
423    /// Binary operation.
424    Binary {
425        /// Operator.
426        op: BinaryOp,
427        /// Left operand.
428        left: Box<RuleExpr>,
429        /// Right operand.
430        right: Box<RuleExpr>,
431    },
432    /// Ternary conditional expression.
433    Conditional {
434        /// Boolean condition.
435        condition: Box<RuleExpr>,
436        /// Expression evaluated when condition is true.
437        when_true: Box<RuleExpr>,
438        /// Expression evaluated when condition is false.
439        when_false: Box<RuleExpr>,
440    },
441    /// Built-in function call.
442    Call {
443        /// Built-in function.
444        function: BuiltinFunction,
445        /// Arguments.
446        args: Vec<RuleExpr>,
447    },
448    /// Unsupported source expression retained for report diagnostics.
449    Unsupported {
450        /// Original bounded expression fragment.
451        fragment: BoundedText,
452        /// Bounded reason.
453        reason: BoundedText,
454    },
455}
456
457/// Unary expression operator.
458#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
459#[non_exhaustive]
460#[serde(rename_all = "camelCase")]
461pub enum UnaryOp {
462    /// Boolean negation.
463    Not,
464}
465
466/// Binary expression operator.
467#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
468#[non_exhaustive]
469#[serde(rename_all = "camelCase")]
470pub enum BinaryOp {
471    /// Equality.
472    Eq,
473    /// Inequality.
474    Ne,
475    /// Numeric less-than-or-equal.
476    Le,
477    /// Numeric greater-than-or-equal.
478    Ge,
479    /// Numeric less-than.
480    Lt,
481    /// Numeric greater-than.
482    Gt,
483    /// Boolean conjunction.
484    And,
485    /// Boolean disjunction.
486    Or,
487    /// Numeric addition.
488    Add,
489    /// Numeric subtraction.
490    Sub,
491    /// Numeric multiplication.
492    Mul,
493    /// Numeric division.
494    Div,
495    /// Numeric modulo.
496    Rem,
497}
498
499/// Bounded built-in function.
500#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
501#[non_exhaustive]
502#[serde(rename_all = "camelCase")]
503pub enum BuiltinFunction {
504    /// Returns true when a named parse fact exists.
505    HasParseFact,
506    /// Returns collection or string size.
507    Size,
508    /// Returns true when a collection or string is empty.
509    IsEmpty,
510    /// Returns true when a collection or string contains a value.
511    Contains,
512    /// Returns true when every boolean argument is true.
513    All,
514    /// Returns true when any boolean argument is true.
515    Exists,
516    /// Returns true when a bounded regex matches a string.
517    Matches,
518}
519
520/// Model value used by rule evaluation.
521#[derive(Clone, Debug, PartialEq)]
522#[non_exhaustive]
523pub enum ModelValue {
524    /// Null value.
525    Null,
526    /// Boolean value.
527    Bool(bool),
528    /// Number value.
529    Number(f64),
530    /// String value.
531    String(BoundedText),
532    /// Object key value.
533    ObjectKey(ObjectKey),
534    /// Bounded list value.
535    List(Vec<ModelValue>),
536}
537
538/// Rule evaluation outcome.
539#[derive(Clone, Copy, Debug, Eq, PartialEq)]
540#[non_exhaustive]
541pub enum RuleOutcome {
542    /// Rule assertion passed.
543    Passed,
544    /// Rule assertion failed.
545    Failed,
546}
547
548impl RuleOutcome {
549    /// Converts the outcome to report assertion status.
550    #[must_use]
551    pub fn assertion_status(self) -> AssertionStatus {
552        match self {
553            Self::Passed => AssertionStatus::Passed,
554            Self::Failed => AssertionStatus::Failed,
555        }
556    }
557}
558
559/// Default bounded rule evaluator.
560#[derive(Clone, Debug)]
561pub struct DefaultRuleEvaluator {
562    limits: ResourceLimits,
563    instructions: u64,
564}
565
566impl DefaultRuleEvaluator {
567    /// Creates an evaluator.
568    #[must_use]
569    pub fn new(limits: ResourceLimits) -> Self {
570        Self {
571            limits,
572            instructions: 0,
573        }
574    }
575
576    fn eval(
577        &mut self,
578        object: &crate::ModelObjectRef<'_>,
579        expr: &RuleExpr,
580        depth: u32,
581    ) -> Result<ModelValue> {
582        self.instructions =
583            self.instructions
584                .checked_add(1)
585                .ok_or(ProfileError::BudgetExceeded {
586                    budget: "instructions",
587                })?;
588        if self.instructions > MAX_RULE_INSTRUCTIONS {
589            return Err(ProfileError::BudgetExceeded {
590                budget: "instructions",
591            }
592            .into());
593        }
594        if depth > MAX_RULE_DEPTH || depth > self.limits.max_object_depth {
595            return Err(ProfileError::BudgetExceeded { budget: "depth" }.into());
596        }
597
598        match expr {
599            RuleExpr::Bool { value } => Ok(ModelValue::Bool(*value)),
600            RuleExpr::Number { value } => Ok(ModelValue::Number(*value)),
601            RuleExpr::String { value } => Ok(ModelValue::String(value.clone())),
602            RuleExpr::Null => Ok(ModelValue::Null),
603            RuleExpr::Property { path } => property(object, path),
604            RuleExpr::Unary { op, expr } => {
605                let value = self.eval(object, expr, depth.saturating_add(1))?;
606                match (op, value) {
607                    (UnaryOp::Not, ModelValue::Bool(value)) => Ok(ModelValue::Bool(!value)),
608                    _ => Err(type_mismatch("unary operator requires boolean").into()),
609                }
610            }
611            RuleExpr::Binary { op, left, right } => {
612                self.eval_binary(object, *op, left, right, depth)
613            }
614            RuleExpr::Conditional {
615                condition,
616                when_true,
617                when_false,
618            } => {
619                if expect_bool(&self.eval(object, condition, depth.saturating_add(1))?)? {
620                    self.eval(object, when_true, depth.saturating_add(1))
621                } else {
622                    self.eval(object, when_false, depth.saturating_add(1))
623                }
624            }
625            RuleExpr::Call { function, args } => self.eval_call(object, *function, args, depth),
626            RuleExpr::Unsupported { reason, .. } => Err(ProfileError::UnsupportedRule {
627                reason: reason.clone(),
628            }
629            .into()),
630        }
631    }
632
633    fn eval_binary(
634        &mut self,
635        object: &crate::ModelObjectRef<'_>,
636        op: BinaryOp,
637        left: &RuleExpr,
638        right: &RuleExpr,
639        depth: u32,
640    ) -> Result<ModelValue> {
641        if op == BinaryOp::And {
642            let left = expect_bool(&self.eval(object, left, depth.saturating_add(1))?)?;
643            if !left {
644                return Ok(ModelValue::Bool(false));
645            }
646            let right = expect_bool(&self.eval(object, right, depth.saturating_add(1))?)?;
647            return Ok(ModelValue::Bool(right));
648        }
649        if op == BinaryOp::Or {
650            let left = expect_bool(&self.eval(object, left, depth.saturating_add(1))?)?;
651            if left {
652                return Ok(ModelValue::Bool(true));
653            }
654            let right = expect_bool(&self.eval(object, right, depth.saturating_add(1))?)?;
655            return Ok(ModelValue::Bool(right));
656        }
657
658        let left = self.eval(object, left, depth.saturating_add(1))?;
659        let right = self.eval(object, right, depth.saturating_add(1))?;
660        let result = match op {
661            BinaryOp::Eq => values_equal(&left, &right),
662            BinaryOp::Ne => !values_equal(&left, &right),
663            BinaryOp::Le => expect_number(&left)? <= expect_number(&right)?,
664            BinaryOp::Ge => expect_number(&left)? >= expect_number(&right)?,
665            BinaryOp::Lt => expect_number(&left)? < expect_number(&right)?,
666            BinaryOp::Gt => expect_number(&left)? > expect_number(&right)?,
667            BinaryOp::And | BinaryOp::Or => false,
668            BinaryOp::Add => {
669                return Ok(ModelValue::Number(
670                    expect_number(&left)? + expect_number(&right)?,
671                ));
672            }
673            BinaryOp::Sub => {
674                return Ok(ModelValue::Number(
675                    expect_number(&left)? - expect_number(&right)?,
676                ));
677            }
678            BinaryOp::Mul => {
679                return Ok(ModelValue::Number(
680                    expect_number(&left)? * expect_number(&right)?,
681                ));
682            }
683            BinaryOp::Div => {
684                let divisor = expect_number(&right)?;
685                if divisor.abs() < f64::EPSILON {
686                    return Err(type_mismatch("division by zero").into());
687                }
688                return Ok(ModelValue::Number(expect_number(&left)? / divisor));
689            }
690            BinaryOp::Rem => {
691                let divisor = expect_number(&right)?;
692                if divisor.abs() < f64::EPSILON {
693                    return Err(type_mismatch("modulo by zero").into());
694                }
695                return Ok(ModelValue::Number(expect_number(&left)? % divisor));
696            }
697        };
698        Ok(ModelValue::Bool(result))
699    }
700
701    fn eval_call(
702        &mut self,
703        object: &crate::ModelObjectRef<'_>,
704        function: BuiltinFunction,
705        args: &[RuleExpr],
706        depth: u32,
707    ) -> Result<ModelValue> {
708        match function {
709            BuiltinFunction::HasParseFact => {
710                let value = self.eval_single_arg(object, args, depth, "hasParseFact")?;
711                let ModelValue::String(name) = value else {
712                    return Err(type_mismatch("hasParseFact requires string").into());
713                };
714                Ok(ModelValue::Bool(has_parse_fact(
715                    object.document().parse_facts.as_slice(),
716                    name.as_str(),
717                )))
718            }
719            BuiltinFunction::Size => {
720                let value = self.eval_single_arg(object, args, depth, "size")?;
721                Ok(ModelValue::Number(usize_to_f64(collection_len(&value)?)?))
722            }
723            BuiltinFunction::IsEmpty => {
724                let value = self.eval_single_arg(object, args, depth, "isEmpty")?;
725                Ok(ModelValue::Bool(collection_len(&value)? == 0))
726            }
727            BuiltinFunction::Contains => {
728                if args.len() != 2 {
729                    return Err(type_mismatch("contains requires two arguments").into());
730                }
731                let haystack = self.eval(
732                    object,
733                    args.first()
734                        .ok_or_else(|| type_mismatch("contains requires haystack"))?,
735                    depth.saturating_add(1),
736                )?;
737                let needle = self.eval(
738                    object,
739                    args.get(1)
740                        .ok_or_else(|| type_mismatch("contains requires needle"))?,
741                    depth.saturating_add(1),
742                )?;
743                Ok(ModelValue::Bool(contains_value(&haystack, &needle)?))
744            }
745            BuiltinFunction::All => {
746                let mut result = true;
747                for arg in args {
748                    result &= expect_bool(&self.eval(object, arg, depth.saturating_add(1))?)?;
749                    if !result {
750                        break;
751                    }
752                }
753                Ok(ModelValue::Bool(result))
754            }
755            BuiltinFunction::Exists => {
756                let mut result = false;
757                for arg in args {
758                    result |= expect_bool(&self.eval(object, arg, depth.saturating_add(1))?)?;
759                    if result {
760                        break;
761                    }
762                }
763                Ok(ModelValue::Bool(result))
764            }
765            BuiltinFunction::Matches => {
766                if args.len() != 2 {
767                    return Err(type_mismatch("matches requires pattern and string").into());
768                }
769                let pattern = self.eval(
770                    object,
771                    args.first()
772                        .ok_or_else(|| type_mismatch("matches requires pattern"))?,
773                    depth.saturating_add(1),
774                )?;
775                let haystack = self.eval(
776                    object,
777                    args.get(1)
778                        .ok_or_else(|| type_mismatch("matches requires string"))?,
779                    depth.saturating_add(1),
780                )?;
781                let (ModelValue::String(pattern), ModelValue::String(haystack)) =
782                    (pattern, haystack)
783                else {
784                    return Err(type_mismatch("matches requires string arguments").into());
785                };
786                if pattern.as_str().len() > MAX_REGEX_PATTERN_BYTES
787                    || haystack.as_str().len() > MAX_REGEX_HAYSTACK_BYTES
788                {
789                    return Err(ProfileError::BudgetExceeded { budget: "regex" }.into());
790                }
791                let regex = RegexBuilder::new(pattern.as_str())
792                    .size_limit(1 << 20)
793                    .dfa_size_limit(1 << 20)
794                    .build()
795                    .map_err(|error| ProfileError::InvalidField {
796                        field: "regex",
797                        reason: BoundedText::new(error.to_string(), 512)
798                            .unwrap_or_else(|_| BoundedText::unchecked("invalid regex")),
799                    })?;
800                Ok(ModelValue::Bool(regex.is_match(haystack.as_str())))
801            }
802        }
803    }
804
805    fn eval_single_arg(
806        &mut self,
807        object: &crate::ModelObjectRef<'_>,
808        args: &[RuleExpr],
809        depth: u32,
810        name: &'static str,
811    ) -> Result<ModelValue> {
812        if args.len() != 1 {
813            return Err(type_mismatch("built-in requires exactly one argument").into());
814        }
815        let Some(first) = args.first() else {
816            return Err(type_mismatch(name).into());
817        };
818        self.eval(object, first, depth.saturating_add(1))
819    }
820}
821
822impl RuleEvaluator for DefaultRuleEvaluator {
823    fn evaluate(&mut self, object: crate::ModelObjectRef<'_>, rule: &Rule) -> Result<RuleOutcome> {
824        self.instructions = 0;
825        let value = self.eval(&object, &rule.test, 0)?;
826        if expect_bool(&value)? {
827            Ok(RuleOutcome::Passed)
828        } else {
829            Ok(RuleOutcome::Failed)
830        }
831    }
832}
833
834fn property(object: &crate::ModelObjectRef<'_>, path: &PropertyPath) -> Result<ModelValue> {
835    if path.parts().is_empty() {
836        return Err(ProfileError::UnknownProperty {
837            property: BoundedText::unchecked("empty"),
838        }
839        .into());
840    }
841    if path.parts().len() > 1 {
842        return Err(ProfileError::UnsupportedRule {
843            reason: BoundedText::unchecked("nested property path has no bound model link"),
844        }
845        .into());
846    }
847    let name = path
848        .parts()
849        .first()
850        .ok_or_else(|| ProfileError::UnknownProperty {
851            property: BoundedText::unchecked("empty"),
852        })?;
853    object.property(name)
854}
855
856fn expect_bool(value: &ModelValue) -> Result<bool> {
857    match value {
858        ModelValue::Bool(value) => Ok(*value),
859        _ => Err(type_mismatch("expected boolean").into()),
860    }
861}
862
863fn expect_number(value: &ModelValue) -> Result<f64> {
864    match value {
865        ModelValue::Number(value) => Ok(*value),
866        _ => Err(type_mismatch("expected number").into()),
867    }
868}
869
870fn values_equal(left: &ModelValue, right: &ModelValue) -> bool {
871    match (left, right) {
872        (ModelValue::Null, ModelValue::Null) => true,
873        (ModelValue::Bool(left), ModelValue::Bool(right)) => left == right,
874        (ModelValue::Number(left), ModelValue::Number(right)) => {
875            (left - right).abs() < f64::EPSILON
876        }
877        (ModelValue::String(left), ModelValue::String(right)) => left == right,
878        (ModelValue::ObjectKey(left), ModelValue::ObjectKey(right)) => left == right,
879        (ModelValue::List(left), ModelValue::List(right)) => left == right,
880        _ => false,
881    }
882}
883
884fn collection_len(value: &ModelValue) -> Result<usize> {
885    match value {
886        ModelValue::String(value) => Ok(value.as_str().len()),
887        ModelValue::List(value) => Ok(value.len()),
888        _ => Err(type_mismatch("expected collection or string").into()),
889    }
890}
891
892fn usize_to_f64(value: usize) -> Result<f64> {
893    let value = u32::try_from(value).map_err(|_| ProfileError::BudgetExceeded {
894        budget: "collection_size",
895    })?;
896    Ok(f64::from(value))
897}
898
899fn contains_value(haystack: &ModelValue, needle: &ModelValue) -> Result<bool> {
900    match (haystack, needle) {
901        (ModelValue::String(haystack), ModelValue::String(needle)) => {
902            Ok(haystack.as_str().contains(needle.as_str()))
903        }
904        (ModelValue::List(values), needle) => {
905            Ok(values.iter().any(|value| values_equal(value, needle)))
906        }
907        _ => Err(type_mismatch("contains requires compatible arguments").into()),
908    }
909}
910
911fn type_mismatch(message: &'static str) -> ProfileError {
912    ProfileError::TypeMismatch {
913        message: BoundedText::unchecked(message),
914    }
915}
916
917fn has_parse_fact(facts: &[ParseFact], name: &str) -> bool {
918    facts.iter().any(|fact| match (name, fact) {
919        ("header", ParseFact::Header { .. })
920        | (
921            "encryption",
922            ParseFact::Encryption {
923                encrypted: true, ..
924            },
925        ) => true,
926        (
927            "streamLengthMismatch",
928            ParseFact::Stream {
929                fact:
930                    StreamFact::Length {
931                        declared,
932                        discovered,
933                    },
934                ..
935            },
936        ) => declared != discovered,
937        _ => false,
938    })
939}
940
941fn pdfa_1b_flavour() -> Result<ValidationFlavour> {
942    Ok(ValidationFlavour::new("pdfa", NonZeroU32::MIN, "b")?)
943}
944
945fn ensure_builtin_flavour(flavour: &ValidationFlavour) -> Result<()> {
946    builtin_source_for_flavour(flavour).map(|_| ())
947}
948
949fn import_generated_profile(source: &GeneratedProfileSource) -> Result<ProfileImportSummary> {
950    let mut import = import_verapdf_profile_xml(source.xml)?;
951    import.profile.identity.id = Identifier::new(source.id)?;
952    import.profile.identity.version = Some(Identifier::new("verapdf-generated")?);
953    import.profile.flavour = parse_display_flavour(source.display_flavour)?;
954    apply_model_schema_checks(&mut import)?;
955    Ok(import)
956}
957
958fn apply_model_schema_checks(import: &mut ProfileImportSummary) -> Result<()> {
959    let registry = crate::validation::ModelRegistry::default_registry();
960    let mut supported_rules = 0_u64;
961    let mut unsupported_rules = 0_u64;
962    for rule in &mut import.profile.rules {
963        if matches!(rule.test, RuleExpr::Unsupported { .. }) {
964            unsupported_rules = unsupported_rules.saturating_add(1);
965            continue;
966        }
967        let unsupported_reason = if registry.has_family(&rule.object_type) {
968            unsupported_property_reason(&registry, &rule.object_type, &rule.test)?
969        } else {
970            Some(BoundedText::new(
971                format!(
972                    "unknown validation model family {}",
973                    rule.object_type.as_str()
974                ),
975                512,
976            )?)
977        };
978        if let Some(reason) = unsupported_reason {
979            let fragment = BoundedText::new(format!("{:?}", rule.test), MAX_PROFILE_STRING_BYTES)
980                .unwrap_or_else(|_| BoundedText::unchecked("rule expression exceeds limit"));
981            rule.test = RuleExpr::Unsupported { fragment, reason };
982            unsupported_rules = unsupported_rules.saturating_add(1);
983        } else {
984            supported_rules = supported_rules.saturating_add(1);
985        }
986    }
987    import.supported_rules = supported_rules;
988    import.unsupported_rules = unsupported_rules;
989    Ok(())
990}
991
992fn unsupported_property_reason(
993    registry: &crate::validation::ModelRegistry,
994    object_type: &ObjectTypeName,
995    expr: &RuleExpr,
996) -> Result<Option<BoundedText>> {
997    let mut properties = Vec::new();
998    collect_property_paths(expr, &mut properties);
999    for property in properties {
1000        let Some(first) = property.parts().first() else {
1001            return Ok(Some(BoundedText::unchecked("empty model property path")));
1002        };
1003        if property.parts().len() > 1 {
1004            return Ok(Some(BoundedText::unchecked(
1005                "nested property path has no bound model link",
1006            )));
1007        }
1008        if !registry.has_family_property(object_type, first) {
1009            return Ok(Some(BoundedText::new(
1010                format!(
1011                    "unknown validation model property {} on {}",
1012                    first.as_str(),
1013                    object_type.as_str()
1014                ),
1015                512,
1016            )?));
1017        }
1018    }
1019    Ok(None)
1020}
1021
1022fn collect_property_paths<'a>(expr: &'a RuleExpr, properties: &mut Vec<&'a PropertyPath>) {
1023    match expr {
1024        RuleExpr::Property { path } => properties.push(path),
1025        RuleExpr::Unary { expr, .. } => collect_property_paths(expr, properties),
1026        RuleExpr::Binary { left, right, .. } => {
1027            collect_property_paths(left, properties);
1028            collect_property_paths(right, properties);
1029        }
1030        RuleExpr::Conditional {
1031            condition,
1032            when_true,
1033            when_false,
1034        } => {
1035            collect_property_paths(condition, properties);
1036            collect_property_paths(when_true, properties);
1037            collect_property_paths(when_false, properties);
1038        }
1039        RuleExpr::Call { args, .. } => {
1040            for arg in args {
1041                collect_property_paths(arg, properties);
1042            }
1043        }
1044        RuleExpr::Bool { .. }
1045        | RuleExpr::Number { .. }
1046        | RuleExpr::String { .. }
1047        | RuleExpr::Null
1048        | RuleExpr::Unsupported { .. } => {}
1049    }
1050}
1051
1052fn builtin_source_for_flavour(
1053    flavour: &ValidationFlavour,
1054) -> Result<&'static GeneratedProfileSource> {
1055    let display = display_flavour(flavour)?;
1056    GENERATED_PROFILE_SOURCES
1057        .iter()
1058        .find(|source| source.display_flavour == display.as_str())
1059        .ok_or_else(|| ProfileError::UnsupportedSelection.into())
1060}
1061
1062/// Returns the stable CLI/catalog spelling for a validation flavour.
1063///
1064/// # Errors
1065///
1066/// Returns [`crate::PdfvError`] when the flavour cannot be represented by the
1067/// built-in profile catalog.
1068pub fn display_flavour(flavour: &ValidationFlavour) -> Result<BoundedText> {
1069    let family = flavour.family.as_str();
1070    let conformance = flavour.conformance.as_str();
1071    let value = match family {
1072        "pdfa" if conformance == "none" => format!("pdfa-{}", flavour.part),
1073        "pdfa" => format!("pdfa-{}{}", flavour.part, conformance),
1074        "pdfua" if flavour.part.get() == 1 && conformance == "none" => String::from("pdfua-1"),
1075        "pdfua" if flavour.part.get() == 2 && conformance == "iso32005" => {
1076            String::from("pdfua-2-iso32005")
1077        }
1078        "wtpdf" if matches!(conformance, "reuse" | "accessibility") => {
1079            format!("wtpdf-1-0-{conformance}")
1080        }
1081        _ => return Err(ProfileError::UnsupportedSelection.into()),
1082    };
1083    Ok(BoundedText::new(value, 128)?)
1084}
1085
1086fn parse_display_flavour(value: &str) -> Result<ValidationFlavour> {
1087    if let Some(rest) = value.strip_prefix("pdfa-") {
1088        return parse_display_pdfa_flavour(rest);
1089    }
1090    if let Some(rest) = value.strip_prefix("pdfua-") {
1091        return parse_display_pdfua_flavour(rest);
1092    }
1093    if let Some(level) = value.strip_prefix("wtpdf-1-0-")
1094        && matches!(level, "reuse" | "accessibility")
1095    {
1096        return ValidationFlavour::new("wtpdf", NonZeroU32::MIN, level).map_err(Into::into);
1097    }
1098    Err(ProfileError::UnsupportedSelection.into())
1099}
1100
1101fn parse_display_pdfa_flavour(rest: &str) -> Result<ValidationFlavour> {
1102    let split_at = rest
1103        .find(|character: char| !character.is_ascii_digit())
1104        .unwrap_or(rest.len());
1105    let (part, conformance) = rest.split_at(split_at);
1106    let part = part
1107        .parse::<u32>()
1108        .map_err(|_| ProfileError::InvalidField {
1109            field: "flavour",
1110            reason: BoundedText::unchecked("PDF/A part is not numeric"),
1111        })?;
1112    let part = NonZeroU32::new(part).ok_or(ProfileError::InvalidField {
1113        field: "flavour",
1114        reason: BoundedText::unchecked("PDF/A part is zero"),
1115    })?;
1116    let conformance = if conformance.is_empty() {
1117        "none"
1118    } else {
1119        conformance
1120    };
1121    ValidationFlavour::new("pdfa", part, conformance).map_err(Into::into)
1122}
1123
1124fn parse_display_pdfua_flavour(rest: &str) -> Result<ValidationFlavour> {
1125    match rest {
1126        "1" => ValidationFlavour::new("pdfua", NonZeroU32::MIN, "none").map_err(Into::into),
1127        "2-iso32005" => ValidationFlavour::new(
1128            "pdfua",
1129            NonZeroU32::new(2).ok_or(ProfileError::UnsupportedSelection)?,
1130            "iso32005",
1131        )
1132        .map_err(Into::into),
1133        _ => Err(ProfileError::UnsupportedSelection.into()),
1134    }
1135}
1136
1137fn m4_profile(flavour: ValidationFlavour) -> Result<ValidationProfile> {
1138    Ok(ValidationProfile {
1139        identity: ProfileIdentity {
1140            id: Identifier::new("pdfv-m4")?,
1141            name: BoundedText::new("pdfv M4 built-in profile", 128)?,
1142            version: Some(Identifier::new("0.1.0")?),
1143        },
1144        flavour,
1145        rules: vec![
1146            rule(
1147                "m0-header-offset-zero",
1148                "document",
1149                "PDF header must start at byte zero",
1150                property_expr("headerOffset")?,
1151                BinaryOp::Eq,
1152                RuleExpr::Number { value: 0.0 },
1153            )?,
1154            rule(
1155                "m0-document-not-encrypted",
1156                "document",
1157                "Encrypted documents are not validated in M0",
1158                property_expr("encrypted")?,
1159                BinaryOp::Eq,
1160                RuleExpr::Bool { value: false },
1161            )?,
1162            rule(
1163                "m0-catalog-present",
1164                "document",
1165                "Trailer must reference a catalog",
1166                property_expr("hasCatalog")?,
1167                BinaryOp::Eq,
1168                RuleExpr::Bool { value: true },
1169            )?,
1170            rule(
1171                "m4-page-contents-present",
1172                "page",
1173                "Page dictionaries must contain contents",
1174                property_expr("hasContents")?,
1175                BinaryOp::Eq,
1176                RuleExpr::Bool { value: true },
1177            )?,
1178            rule(
1179                "m4-page-resources-present",
1180                "page",
1181                "Page dictionaries must contain resources",
1182                property_expr("hasResources")?,
1183                BinaryOp::Eq,
1184                RuleExpr::Bool { value: true },
1185            )?,
1186            rule(
1187                "m4-font-subtype-present",
1188                "font",
1189                "Font dictionaries must contain a Subtype entry",
1190                property_expr("hasSubtype")?,
1191                BinaryOp::Eq,
1192                RuleExpr::Bool { value: true },
1193            )?,
1194            rule(
1195                "m4-annotation-subtype-present",
1196                "annotation",
1197                "Annotation dictionaries must contain a Subtype entry",
1198                property_expr("hasSubtype")?,
1199                BinaryOp::Eq,
1200                RuleExpr::Bool { value: true },
1201            )?,
1202            rule(
1203                "m4-output-intent-profile-present",
1204                "outputIntent",
1205                "Output intent dictionaries must contain a destination output profile",
1206                property_expr("hasDestOutputProfile")?,
1207                BinaryOp::Eq,
1208                RuleExpr::Bool { value: true },
1209            )?,
1210            rule(
1211                "m4-content-stream-length-non-negative",
1212                "contentStream",
1213                "Page content streams must expose a non-negative declared or discovered length",
1214                property_expr("declaredLength")?,
1215                BinaryOp::Ge,
1216                RuleExpr::Number { value: 0.0 },
1217            )?,
1218            rule(
1219                "m0-stream-length-matches",
1220                "stream",
1221                "Stream declared length must match discovered length",
1222                property_expr("lengthMatches")?,
1223                BinaryOp::Eq,
1224                RuleExpr::Bool { value: true },
1225            )?,
1226        ],
1227    })
1228}
1229
1230fn rule(
1231    id: &str,
1232    object_type: &str,
1233    description: &str,
1234    left: RuleExpr,
1235    op: BinaryOp,
1236    right: RuleExpr,
1237) -> Result<Rule> {
1238    Ok(Rule {
1239        id: RuleId(Identifier::new(id)?),
1240        object_type: ObjectTypeName::new(object_type)?,
1241        deferred: false,
1242        tags: Vec::new(),
1243        description: BoundedText::new(description, 256)?,
1244        test: RuleExpr::Binary {
1245            op,
1246            left: Box::new(left),
1247            right: Box::new(right),
1248        },
1249        error: ErrorTemplate {
1250            message: BoundedText::new(description, 256)?,
1251        },
1252        references: Vec::new(),
1253    })
1254}
1255
1256fn property_expr(name: &str) -> Result<RuleExpr> {
1257    Ok(RuleExpr::Property {
1258        path: PropertyPath::new(vec![PropertyName(Identifier::new(name)?)]),
1259    })
1260}
1261
1262#[cfg(feature = "custom-profiles")]
1263#[allow(
1264    clippy::disallowed_methods,
1265    reason = "custom profile loading is a synchronous library API matching validate_path"
1266)]
1267fn load_verapdf_profile_path(path: &std::path::Path) -> Result<ProfileImportSummary> {
1268    let metadata = std::fs::metadata(path).map_err(|source| crate::PdfvError::Io {
1269        path: Some(path.to_path_buf()),
1270        source,
1271    })?;
1272    if metadata.len() > MAX_PROFILE_XML_BYTES {
1273        return Err(ProfileError::InvalidXml {
1274            reason: BoundedText::unchecked("profile XML exceeds byte limit"),
1275        }
1276        .into());
1277    }
1278    let xml = std::fs::read_to_string(path).map_err(|source| crate::PdfvError::Io {
1279        path: Some(path.to_path_buf()),
1280        source,
1281    })?;
1282    import_verapdf_profile_xml(&xml)
1283}
1284
1285/// Imports a veraPDF validation profile XML document into bounded profile data.
1286///
1287/// Unsupported expressions are retained as rules that report `unsupportedRules`
1288/// during validation instead of being silently skipped.
1289///
1290/// # Errors
1291///
1292/// Returns [`crate::PdfvError`] when XML, identifiers, or bounded strings are invalid.
1293pub fn import_verapdf_profile_xml(xml: &str) -> Result<ProfileImportSummary> {
1294    import_verapdf_profile_xml_impl(xml)
1295}
1296
1297#[allow(
1298    clippy::too_many_lines,
1299    reason = "event-driven XML import keeps parser state local and explicit"
1300)]
1301fn import_verapdf_profile_xml_impl(xml: &str) -> Result<ProfileImportSummary> {
1302    use quick_xml::{Reader, events::Event};
1303
1304    if u64::try_from(xml.len()).map_err(|_| ProfileError::InvalidXml {
1305        reason: BoundedText::unchecked("profile XML length overflow"),
1306    })? > MAX_PROFILE_XML_BYTES
1307    {
1308        return Err(ProfileError::InvalidXml {
1309            reason: BoundedText::unchecked("profile XML exceeds byte limit"),
1310        }
1311        .into());
1312    }
1313
1314    let mut reader = Reader::from_str(xml);
1315    reader.config_mut().trim_text(true);
1316    let mut elements = 0_u64;
1317    let mut profile_name: Option<BoundedText> = None;
1318    let mut flavour: Option<ValidationFlavour> = None;
1319    let mut rules = Vec::new();
1320    let mut current_rule: Option<XmlRuleBuilder> = None;
1321    let mut current_text = XmlTextTarget::None;
1322    let mut depth = 0_u32;
1323
1324    loop {
1325        let event = reader
1326            .read_event()
1327            .map_err(|error| ProfileError::InvalidXml {
1328                reason: BoundedText::new(error.to_string(), 512)
1329                    .unwrap_or_else(|_| BoundedText::unchecked("XML parser error")),
1330            })?;
1331        match event {
1332            Event::Start(element) => {
1333                validate_element(&element)?;
1334                depth = depth.checked_add(1).ok_or(ProfileError::InvalidXml {
1335                    reason: BoundedText::unchecked("profile XML depth overflow"),
1336                })?;
1337                if depth > MAX_PROFILE_XML_DEPTH {
1338                    return Err(ProfileError::InvalidXml {
1339                        reason: BoundedText::unchecked("profile XML exceeds depth limit"),
1340                    }
1341                    .into());
1342                }
1343                elements = elements.checked_add(1).ok_or(ProfileError::InvalidXml {
1344                    reason: BoundedText::unchecked("profile XML element count overflow"),
1345                })?;
1346                if elements > MAX_PROFILE_XML_ELEMENTS {
1347                    return Err(ProfileError::InvalidXml {
1348                        reason: BoundedText::unchecked("profile XML exceeds element limit"),
1349                    }
1350                    .into());
1351                }
1352                match element.name().as_ref() {
1353                    b"profile" => {
1354                        flavour = profile_flavour_attr(&element)?;
1355                    }
1356                    b"rule" => {
1357                        if rules.len() >= MAX_PROFILE_RULES {
1358                            return Err(ProfileError::InvalidXml {
1359                                reason: BoundedText::unchecked("profile XML exceeds rule limit"),
1360                            }
1361                            .into());
1362                        }
1363                        current_rule = Some(XmlRuleBuilder::from_rule_start(&element)?);
1364                    }
1365                    b"name" if current_rule.is_none() => current_text = XmlTextTarget::ProfileName,
1366                    b"description" if current_rule.is_some() => {
1367                        current_text = XmlTextTarget::RuleDescription;
1368                    }
1369                    b"test" if current_rule.is_some() => current_text = XmlTextTarget::RuleTest,
1370                    b"message" if current_rule.is_some() => {
1371                        current_text = XmlTextTarget::RuleMessage;
1372                    }
1373                    b"id" if current_rule.is_some() => {
1374                        if let Some(rule) = current_rule.as_mut() {
1375                            rule.id = Some(rule_id_from_attrs(&element)?);
1376                        }
1377                    }
1378                    b"reference" if current_rule.is_some() => {
1379                        if let Some(rule) = current_rule.as_mut() {
1380                            rule.references.push(reference_from_attrs(&element)?);
1381                        }
1382                    }
1383                    _ => {}
1384                }
1385            }
1386            Event::Text(text) => {
1387                let decoded = text.decode().map_err(|error| ProfileError::InvalidXml {
1388                    reason: BoundedText::new(error.to_string(), 512)
1389                        .unwrap_or_else(|_| BoundedText::unchecked("XML text decode error")),
1390                })?;
1391                let bounded = BoundedText::new(decoded.into_owned(), MAX_PROFILE_STRING_BYTES)?;
1392                match current_text {
1393                    XmlTextTarget::ProfileName => profile_name = Some(bounded),
1394                    XmlTextTarget::RuleDescription => {
1395                        if let Some(rule) = current_rule.as_mut() {
1396                            rule.description = Some(bounded);
1397                        }
1398                    }
1399                    XmlTextTarget::RuleTest => {
1400                        if let Some(rule) = current_rule.as_mut() {
1401                            rule.test = Some(bounded);
1402                        }
1403                    }
1404                    XmlTextTarget::RuleMessage => {
1405                        if let Some(rule) = current_rule.as_mut() {
1406                            rule.message = Some(bounded);
1407                        }
1408                    }
1409                    XmlTextTarget::None => {}
1410                }
1411            }
1412            Event::End(element) => {
1413                match element.name().as_ref() {
1414                    b"name" | b"description" | b"test" | b"message" => {
1415                        current_text = XmlTextTarget::None;
1416                    }
1417                    b"rule" => {
1418                        let Some(builder) = current_rule.take() else {
1419                            return Err(ProfileError::InvalidXml {
1420                                reason: BoundedText::unchecked("closing rule without start"),
1421                            }
1422                            .into());
1423                        };
1424                        rules.push(builder.finish()?);
1425                    }
1426                    _ => {}
1427                }
1428                depth = depth.checked_sub(1).ok_or(ProfileError::InvalidXml {
1429                    reason: BoundedText::unchecked("profile XML depth underflow"),
1430                })?;
1431            }
1432            Event::Empty(element) => {
1433                validate_element(&element)?;
1434                elements = elements.checked_add(1).ok_or(ProfileError::InvalidXml {
1435                    reason: BoundedText::unchecked("profile XML element count overflow"),
1436                })?;
1437                if elements > MAX_PROFILE_XML_ELEMENTS {
1438                    return Err(ProfileError::InvalidXml {
1439                        reason: BoundedText::unchecked("profile XML exceeds element limit"),
1440                    }
1441                    .into());
1442                }
1443                if element.name().as_ref() == b"id"
1444                    && let Some(rule) = current_rule.as_mut()
1445                {
1446                    rule.id = Some(rule_id_from_attrs(&element)?);
1447                }
1448                if element.name().as_ref() == b"reference"
1449                    && let Some(rule) = current_rule.as_mut()
1450                {
1451                    rule.references.push(reference_from_attrs(&element)?);
1452                }
1453            }
1454            Event::Eof => break,
1455            _ => {}
1456        }
1457    }
1458
1459    let flavour = flavour.ok_or(ProfileError::InvalidXml {
1460        reason: BoundedText::unchecked("profile flavour is missing"),
1461    })?;
1462    let profile_id = profile_id_for_flavour(&flavour)?;
1463    let mut supported_rules = 0_u64;
1464    let mut unsupported_rules = 0_u64;
1465    for rule in &rules {
1466        if matches!(rule.test, RuleExpr::Unsupported { .. }) {
1467            unsupported_rules = unsupported_rules.saturating_add(1);
1468        } else {
1469            supported_rules = supported_rules.saturating_add(1);
1470        }
1471    }
1472
1473    Ok(ProfileImportSummary {
1474        profile: ValidationProfile {
1475            identity: ProfileIdentity {
1476                id: profile_id,
1477                name: profile_name.unwrap_or_else(|| BoundedText::unchecked("veraPDF profile")),
1478                version: Some(Identifier::new("verapdf-xml")?),
1479            },
1480            flavour,
1481            rules,
1482        },
1483        supported_rules,
1484        unsupported_rules,
1485    })
1486}
1487
1488#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1489enum XmlTextTarget {
1490    None,
1491    ProfileName,
1492    RuleDescription,
1493    RuleTest,
1494    RuleMessage,
1495}
1496
1497#[derive(Debug, Default)]
1498struct XmlRuleBuilder {
1499    object_type: Option<ObjectTypeName>,
1500    unsupported_reason: Option<BoundedText>,
1501    deferred: bool,
1502    id: Option<RuleId>,
1503    description: Option<BoundedText>,
1504    test: Option<BoundedText>,
1505    message: Option<BoundedText>,
1506    references: Vec<SpecReference>,
1507}
1508
1509impl XmlRuleBuilder {
1510    fn from_rule_start(element: &quick_xml::events::BytesStart<'_>) -> Result<Self> {
1511        let source_object_type = required_attr(element, b"object")?;
1512        let (object_type, unsupported_reason) = map_verapdf_object_type(&source_object_type)?;
1513        Ok(Self {
1514            object_type: Some(object_type),
1515            unsupported_reason,
1516            deferred: optional_bool_attr(element, b"deferred")?,
1517            ..Self::default()
1518        })
1519    }
1520
1521    fn finish(self) -> Result<Rule> {
1522        let id = self.id.ok_or(ProfileError::InvalidXml {
1523            reason: BoundedText::unchecked("rule id is missing"),
1524        })?;
1525        let object_type = self.object_type.ok_or(ProfileError::InvalidXml {
1526            reason: BoundedText::unchecked("rule object type is missing"),
1527        })?;
1528        let description = self
1529            .description
1530            .unwrap_or_else(|| BoundedText::unchecked("Imported veraPDF rule"));
1531        let source_test = self.test.ok_or(ProfileError::InvalidXml {
1532            reason: BoundedText::unchecked("rule test is missing"),
1533        })?;
1534        let test = if let Some(reason) = self.unsupported_reason {
1535            RuleExpr::Unsupported {
1536                fragment: source_test.clone(),
1537                reason,
1538            }
1539        } else {
1540            parse_imported_expr(source_test.as_str()).unwrap_or_else(|reason| {
1541                RuleExpr::Unsupported {
1542                    fragment: source_test.clone(),
1543                    reason,
1544                }
1545            })
1546        };
1547        let message = self.message.unwrap_or_else(|| description.clone());
1548        Ok(Rule {
1549            id,
1550            object_type,
1551            deferred: self.deferred,
1552            tags: Vec::new(),
1553            description,
1554            test,
1555            error: ErrorTemplate { message },
1556            references: self.references,
1557        })
1558    }
1559}
1560
1561fn validate_element(element: &quick_xml::events::BytesStart<'_>) -> Result<()> {
1562    let name = element.name();
1563    let name = name.as_ref();
1564    if !matches!(
1565        name,
1566        b"profile"
1567            | b"details"
1568            | b"name"
1569            | b"description"
1570            | b"hash"
1571            | b"rules"
1572            | b"rule"
1573            | b"id"
1574            | b"test"
1575            | b"error"
1576            | b"message"
1577            | b"arguments"
1578            | b"argument"
1579            | b"references"
1580            | b"reference"
1581            | b"variables"
1582            | b"variable"
1583            | b"defaultValue"
1584            | b"value"
1585    ) {
1586        return Err(ProfileError::InvalidXml {
1587            reason: BoundedText::new(
1588                format!(
1589                    "unknown profile XML element {}",
1590                    String::from_utf8_lossy(name)
1591                ),
1592                512,
1593            )
1594            .unwrap_or_else(|_| BoundedText::unchecked("unknown profile XML element")),
1595        }
1596        .into());
1597    }
1598    let mut attributes = 0_usize;
1599    for attr in element.attributes().with_checks(true) {
1600        let attr = attr.map_err(|error| ProfileError::InvalidXml {
1601            reason: BoundedText::new(error.to_string(), 512)
1602                .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1603        })?;
1604        attributes = attributes.checked_add(1).ok_or(ProfileError::InvalidXml {
1605            reason: BoundedText::unchecked("profile XML attribute count overflow"),
1606        })?;
1607        if attributes > MAX_PROFILE_XML_ATTRIBUTES {
1608            return Err(ProfileError::InvalidXml {
1609                reason: BoundedText::unchecked("profile XML exceeds attribute limit"),
1610            }
1611            .into());
1612        }
1613        validate_attribute(name, attr.key.as_ref())?;
1614    }
1615    Ok(())
1616}
1617
1618fn validate_attribute(element: &[u8], attr: &[u8]) -> Result<()> {
1619    let allowed = match element {
1620        b"profile" => matches!(attr, b"flavour" | b"xmlns"),
1621        b"details" => matches!(attr, b"creator" | b"created"),
1622        b"rule" => matches!(attr, b"object" | b"deferred" | b"tags"),
1623        b"id" => matches!(attr, b"specification" | b"clause" | b"testNumber"),
1624        b"reference" => matches!(attr, b"specification" | b"clause"),
1625        b"variable" => matches!(attr, b"name" | b"object"),
1626        _ => false,
1627    };
1628    if allowed {
1629        Ok(())
1630    } else {
1631        Err(ProfileError::InvalidXml {
1632            reason: BoundedText::new(
1633                format!(
1634                    "unknown profile XML attribute {}",
1635                    String::from_utf8_lossy(attr)
1636                ),
1637                512,
1638            )
1639            .unwrap_or_else(|_| BoundedText::unchecked("unknown profile XML attribute")),
1640        }
1641        .into())
1642    }
1643}
1644
1645fn profile_flavour_attr(
1646    element: &quick_xml::events::BytesStart<'_>,
1647) -> Result<Option<ValidationFlavour>> {
1648    for attr in element.attributes().with_checks(true) {
1649        let attr = attr.map_err(|error| ProfileError::InvalidXml {
1650            reason: BoundedText::new(error.to_string(), 512)
1651                .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1652        })?;
1653        if attr.key.as_ref() == b"flavour" {
1654            let value = String::from_utf8_lossy(attr.value.as_ref()).into_owned();
1655            return Ok(Some(parse_verapdf_flavour(&value)?));
1656        }
1657    }
1658    Ok(None)
1659}
1660
1661fn required_attr(element: &quick_xml::events::BytesStart<'_>, name: &[u8]) -> Result<String> {
1662    for attr in element.attributes().with_checks(true) {
1663        let attr = attr.map_err(|error| ProfileError::InvalidXml {
1664            reason: BoundedText::new(error.to_string(), 512)
1665                .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1666        })?;
1667        if attr.key.as_ref() == name {
1668            return Ok(String::from_utf8_lossy(attr.value.as_ref()).into_owned());
1669        }
1670    }
1671    Err(ProfileError::InvalidXml {
1672        reason: BoundedText::unchecked("required XML attribute is missing"),
1673    }
1674    .into())
1675}
1676
1677fn optional_bool_attr(element: &quick_xml::events::BytesStart<'_>, name: &[u8]) -> Result<bool> {
1678    for attr in element.attributes().with_checks(true) {
1679        let attr = attr.map_err(|error| ProfileError::InvalidXml {
1680            reason: BoundedText::new(error.to_string(), 512)
1681                .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1682        })?;
1683        if attr.key.as_ref() == name {
1684            return match attr.value.as_ref() {
1685                b"true" => Ok(true),
1686                b"false" => Ok(false),
1687                _ => Err(ProfileError::InvalidField {
1688                    field: "deferred",
1689                    reason: BoundedText::unchecked("expected true or false"),
1690                }
1691                .into()),
1692            };
1693        }
1694    }
1695    Ok(false)
1696}
1697
1698fn rule_id_from_attrs(element: &quick_xml::events::BytesStart<'_>) -> Result<RuleId> {
1699    let specification = required_attr(element, b"specification")?;
1700    let clause = required_attr(element, b"clause")?;
1701    let test_number = required_attr(element, b"testNumber")?;
1702    let text = format!(
1703        "{}-{}-{}",
1704        identifier_fragment(&specification),
1705        identifier_fragment(&clause),
1706        identifier_fragment(&test_number)
1707    );
1708    Ok(RuleId(Identifier::new(text)?))
1709}
1710
1711fn reference_from_attrs(element: &quick_xml::events::BytesStart<'_>) -> Result<SpecReference> {
1712    Ok(SpecReference {
1713        specification: BoundedText::new(required_attr(element, b"specification")?, 512)?,
1714        clause: BoundedText::new(required_attr(element, b"clause")?, 512)?,
1715    })
1716}
1717
1718fn identifier_fragment(value: &str) -> String {
1719    value
1720        .chars()
1721        .map(|character| {
1722            if character.is_ascii_alphanumeric() {
1723                character.to_ascii_lowercase()
1724            } else {
1725                '-'
1726            }
1727        })
1728        .collect::<String>()
1729        .trim_matches('-')
1730        .to_owned()
1731}
1732
1733fn parse_verapdf_flavour(value: &str) -> Result<ValidationFlavour> {
1734    let parts = value.split('_').collect::<Vec<_>>();
1735    let Some(family) = parts.first().copied() else {
1736        return Err(ProfileError::InvalidField {
1737            field: "flavour",
1738            reason: BoundedText::unchecked("profile flavour is empty"),
1739        }
1740        .into());
1741    };
1742    match family {
1743        "PDFA" => parse_numbered_flavour("pdfa", &parts, "none"),
1744        "PDFUA" => parse_pdfua_xml_flavour(&parts),
1745        "WTPDF" => parse_wtpdf_flavour(&parts),
1746        _ => Err(ProfileError::InvalidField {
1747            field: "flavour",
1748            reason: BoundedText::unchecked("unsupported profile flavour family"),
1749        }
1750        .into()),
1751    }
1752}
1753
1754fn parse_pdfua_xml_flavour(parts: &[&str]) -> Result<ValidationFlavour> {
1755    let part = parts
1756        .get(1)
1757        .ok_or(ProfileError::InvalidField {
1758            field: "flavour",
1759            reason: BoundedText::unchecked("missing PDF/UA part"),
1760        })?
1761        .parse::<u32>()
1762        .map_err(|_| ProfileError::InvalidField {
1763            field: "flavour",
1764            reason: BoundedText::unchecked("PDF/UA part is not numeric"),
1765        })?;
1766    let part = NonZeroU32::new(part).ok_or(ProfileError::InvalidField {
1767        field: "flavour",
1768        reason: BoundedText::unchecked("PDF/UA part is zero"),
1769    })?;
1770    let conformance = if part.get() == 2 { "iso32005" } else { "none" };
1771    ValidationFlavour::new("pdfua", part, conformance).map_err(Into::into)
1772}
1773
1774fn parse_numbered_flavour(
1775    family: &str,
1776    parts: &[&str],
1777    default_conformance: &str,
1778) -> Result<ValidationFlavour> {
1779    let part = parts
1780        .get(1)
1781        .ok_or(ProfileError::InvalidField {
1782            field: "flavour",
1783            reason: BoundedText::unchecked("missing flavour part"),
1784        })?
1785        .parse::<u32>()
1786        .map_err(|_| ProfileError::InvalidField {
1787            field: "flavour",
1788            reason: BoundedText::unchecked("flavour part is not numeric"),
1789        })?;
1790    let part = NonZeroU32::new(part).ok_or(ProfileError::InvalidField {
1791        field: "flavour",
1792        reason: BoundedText::unchecked("flavour part is zero"),
1793    })?;
1794    let conformance = parts
1795        .get(2)
1796        .copied()
1797        .unwrap_or(default_conformance)
1798        .to_ascii_lowercase();
1799    ValidationFlavour::new(family, part, conformance).map_err(Into::into)
1800}
1801
1802fn parse_wtpdf_flavour(parts: &[&str]) -> Result<ValidationFlavour> {
1803    if parts.len() != 4 || parts.get(1).copied() != Some("1") || parts.get(2).copied() != Some("0")
1804    {
1805        return Err(ProfileError::InvalidField {
1806            field: "flavour",
1807            reason: BoundedText::unchecked("expected WTPDF_1_0_<level>"),
1808        }
1809        .into());
1810    }
1811    let conformance = parts
1812        .get(3)
1813        .ok_or(ProfileError::InvalidField {
1814            field: "flavour",
1815            reason: BoundedText::unchecked("missing WTPDF level"),
1816        })?
1817        .to_ascii_lowercase();
1818    ValidationFlavour::new("wtpdf", NonZeroU32::MIN, conformance).map_err(Into::into)
1819}
1820
1821fn profile_id_for_flavour(flavour: &ValidationFlavour) -> Result<Identifier> {
1822    let display = display_flavour(flavour)?;
1823    Identifier::new(format!("verapdf-{}", display.as_str())).map_err(Into::into)
1824}
1825
1826#[allow(
1827    clippy::too_many_lines,
1828    reason = "veraPDF object taxonomy mapping is intentionally centralized for schema checks"
1829)]
1830fn map_verapdf_object_type(value: &str) -> Result<(ObjectTypeName, Option<BoundedText>)> {
1831    let mapped = match value {
1832        "CosDocument" | "PDDocument" | "CosXRef" | "CosTrailer" | "CosIndirect" | "CosInfo" => {
1833            Some("document")
1834        }
1835        "CosStream" => Some("stream"),
1836        "CosArray"
1837        | "CosDict"
1838        | "CosInteger"
1839        | "CosName"
1840        | "CosReal"
1841        | "CosString"
1842        | "CosTextString"
1843        | "CosUnicodeName"
1844        | "CosLang"
1845        | "CosBBox"
1846        | "CosActualText"
1847        | "CosAlt"
1848        | "CosBM"
1849        | "CosRenderingIntent"
1850        | "CosFileSpecification"
1851        | "CosFilter"
1852        | "CosIIFilter" => Some("object"),
1853        "GFCosMetadata"
1854        | "PDMetadata"
1855        | "Metadata"
1856        | "PDFAIdentification"
1857        | "PDFUAIdentification"
1858        | "XMPPackage"
1859        | "MainXMPPackage"
1860        | "XMPProperty"
1861        | "XMPLangAlt"
1862        | "ExtensionSchemaValueType"
1863        | "ExtensionSchemaDefinition"
1864        | "ExtensionSchemaProperty"
1865        | "ExtensionSchemaField"
1866        | "ExtensionSchemasContainer"
1867        | "ExtensionSchemaObject" => Some("metadata"),
1868        "PDCatalog" | "Catalog" => Some("catalog"),
1869        "PDPage" | "Page" => Some("page"),
1870        "PDFont"
1871        | "Font"
1872        | "PDSimpleFont"
1873        | "PDTrueTypeFont"
1874        | "PDType0Font"
1875        | "PDType1Font"
1876        | "PDCIDFont"
1877        | "TrueTypeFontProgram"
1878        | "Glyph" => Some("font"),
1879        "PDCMap" | "PDReferencedCMap" | "CMapFile" => Some("cMap"),
1880        "EmbeddedFile" => Some("embeddedFontFile"),
1881        "PDAnnotation"
1882        | "Annotation"
1883        | "PDAnnot"
1884        | "PDWidgetAnnot"
1885        | "PDLinkAnnot"
1886        | "PDMarkupAnnot"
1887        | "PDTrapNetAnnot"
1888        | "PDPrinterMarkAnnot"
1889        | "PDWatermarkAnnot"
1890        | "PDSoundAnnot"
1891        | "PDScreenAnnot"
1892        | "PDPopupAnnot"
1893        | "PDMovieAnnot"
1894        | "PDFileAttachmentAnnot"
1895        | "PDRubberStampAnnot"
1896        | "PDRichMediaAnnot"
1897        | "PD3DAnnot"
1898        | "PDInkAnnot" => Some("annotation"),
1899        "PDAction" | "PDNamedAction" | "PDGoToAction" | "PDAdditionalActions" => Some("action"),
1900        "PDAcroForm" => Some("acroForm"),
1901        "PDFormField" | "PDTextField" => Some("formField"),
1902        "OutputIntents" | "OutputIntent" | "PDOutputIntent" => Some("outputIntent"),
1903        "PDXObject" | "PDXForm" | "PD3DStream" | "PDMediaClip" | "PDRichMedia" => Some("xObject"),
1904        "PDXImage" | "JPEG2000" | "PDMaskImage" => Some("image"),
1905        "PDContentStream" | "Op_q_gsave" => Some("contentStream"),
1906        "Op_Undefined" => Some("undefinedOperator"),
1907        "PDOCConfig" => Some("optionalContentProperties"),
1908        "PDPerms" => Some("permissions"),
1909        "PDOutline" => Some("outline"),
1910        "PDDestination" => Some("destination"),
1911        "PDExtGState" => Some("extGState"),
1912        "PDDeviceN" | "PDICCBasedCMYK" | "PDDeviceRGB" | "PDDeviceGray" | "PDDeviceCMYK"
1913        | "PDSeparation" | "PDHalftone" | "PDGroup" | "ICCProfile" | "ICCOutputProfile"
1914        | "ICCInputProfile" => Some("colorSpace"),
1915        "PDStructTreeRoot" => Some("structureTreeRoot"),
1916        "PDStructElem"
1917        | "SEDocument"
1918        | "SEDocumentFragment"
1919        | "SEPart"
1920        | "SEArt"
1921        | "SESect"
1922        | "SEDiv"
1923        | "SEBlockQuote"
1924        | "SECaption"
1925        | "SETOC"
1926        | "SETOCI"
1927        | "SEIndex"
1928        | "SENonStruct"
1929        | "SEPrivate"
1930        | "SEP"
1931        | "SEH"
1932        | "SEHn"
1933        | "SEH1"
1934        | "SEH2"
1935        | "SEH3"
1936        | "SEH4"
1937        | "SEH5"
1938        | "SEH6"
1939        | "SEL"
1940        | "SELI"
1941        | "SELbl"
1942        | "SELBody"
1943        | "SETable"
1944        | "SETR"
1945        | "SETH"
1946        | "SETD"
1947        | "SETHead"
1948        | "SETBody"
1949        | "SETFoot"
1950        | "SESpan"
1951        | "SEQuote"
1952        | "SENote"
1953        | "SEReference"
1954        | "SEBibEntry"
1955        | "SECode"
1956        | "SELink"
1957        | "SEAnnot"
1958        | "SERuby"
1959        | "SEWarichu"
1960        | "SEFigure"
1961        | "SEFormula"
1962        | "SEForm"
1963        | "SEArtifact"
1964        | "SEStrong"
1965        | "SEEm"
1966        | "SETitle"
1967        | "SEFENote"
1968        | "SEAside"
1969        | "SESub"
1970        | "SEMathMLStructElem"
1971        | "SEMarkedContent"
1972        | "SESimpleContentItem"
1973        | "SEGraphicContentItem"
1974        | "SETableCell"
1975        | "SENonStandard"
1976        | "SETextItem"
1977        | "SEWT"
1978        | "SEWP"
1979        | "SERT"
1980        | "SERP"
1981        | "SERB" => Some("structureElement"),
1982        "PDSignature" | "PDSigRef" | "PKCSDataObject" => Some("signature"),
1983        "PDEncryption" => Some("security"),
1984        _ => None,
1985    };
1986    if let Some(mapped) = mapped {
1987        Ok((ObjectTypeName::new(mapped)?, None))
1988    } else {
1989        Ok((
1990            ObjectTypeName::new("document")?,
1991            Some(BoundedText::new(
1992                format!("unsupported veraPDF object type {value}"),
1993                512,
1994            )?),
1995        ))
1996    }
1997}
1998
1999fn parse_imported_expr(input: &str) -> std::result::Result<RuleExpr, BoundedText> {
2000    let mut parser = ExprParser::new(input);
2001    let expr = parser.parse_conditional()?;
2002    if matches!(expr, RuleExpr::Unsupported { .. }) {
2003        return Ok(expr);
2004    }
2005    parser.skip_ws();
2006    if parser.remaining().is_empty() {
2007        Ok(expr)
2008    } else {
2009        Err(BoundedText::unchecked("trailing expression input"))
2010    }
2011}
2012
2013#[derive(Debug)]
2014struct ExprParser<'a> {
2015    input: &'a str,
2016    offset: usize,
2017}
2018
2019impl<'a> ExprParser<'a> {
2020    fn new(input: &'a str) -> Self {
2021        Self { input, offset: 0 }
2022    }
2023
2024    fn remaining(&self) -> &'a str {
2025        &self.input[self.offset..]
2026    }
2027
2028    fn skip_ws(&mut self) {
2029        while self
2030            .remaining()
2031            .as_bytes()
2032            .first()
2033            .is_some_and(u8::is_ascii_whitespace)
2034        {
2035            self.offset = self.offset.saturating_add(1);
2036        }
2037    }
2038
2039    fn consume(&mut self, token: &str) -> bool {
2040        self.skip_ws();
2041        if self.remaining().starts_with(token) {
2042            self.offset = self.offset.saturating_add(token.len());
2043            true
2044        } else {
2045            false
2046        }
2047    }
2048
2049    fn parse_conditional(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2050        let condition = self.parse_or()?;
2051        if self.consume("?") {
2052            let when_true = self.parse_conditional()?;
2053            if !self.consume(":") {
2054                return Err(BoundedText::unchecked("missing ternary separator"));
2055            }
2056            let when_false = self.parse_conditional()?;
2057            Ok(RuleExpr::Conditional {
2058                condition: Box::new(condition),
2059                when_true: Box::new(when_true),
2060                when_false: Box::new(when_false),
2061            })
2062        } else {
2063            Ok(condition)
2064        }
2065    }
2066
2067    fn parse_or(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2068        let mut expr = self.parse_and()?;
2069        while self.consume("||") {
2070            let right = self.parse_and()?;
2071            expr = RuleExpr::Binary {
2072                op: BinaryOp::Or,
2073                left: Box::new(expr),
2074                right: Box::new(right),
2075            };
2076        }
2077        Ok(expr)
2078    }
2079
2080    fn parse_and(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2081        let mut expr = self.parse_comparison()?;
2082        while self.consume("&&") {
2083            let right = self.parse_comparison()?;
2084            expr = RuleExpr::Binary {
2085                op: BinaryOp::And,
2086                left: Box::new(expr),
2087                right: Box::new(right),
2088            };
2089        }
2090        Ok(expr)
2091    }
2092
2093    fn parse_comparison(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2094        let left = self.parse_additive()?;
2095        let op = if self.consume("==") {
2096            Some(BinaryOp::Eq)
2097        } else if self.consume("!=") {
2098            Some(BinaryOp::Ne)
2099        } else if self.consume("<=") {
2100            Some(BinaryOp::Le)
2101        } else if self.consume(">=") {
2102            Some(BinaryOp::Ge)
2103        } else if self.consume("<") {
2104            Some(BinaryOp::Lt)
2105        } else if self.consume(">") {
2106            Some(BinaryOp::Gt)
2107        } else {
2108            None
2109        };
2110        if let Some(op) = op {
2111            let right = self.parse_additive()?;
2112            Ok(RuleExpr::Binary {
2113                op,
2114                left: Box::new(left),
2115                right: Box::new(right),
2116            })
2117        } else {
2118            Ok(left)
2119        }
2120    }
2121
2122    fn parse_additive(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2123        let mut expr = self.parse_multiplicative()?;
2124        loop {
2125            let op = if self.consume("+") {
2126                Some(BinaryOp::Add)
2127            } else if self.consume("-") {
2128                Some(BinaryOp::Sub)
2129            } else {
2130                None
2131            };
2132            let Some(op) = op else {
2133                return Ok(expr);
2134            };
2135            expr = RuleExpr::Binary {
2136                op,
2137                left: Box::new(expr),
2138                right: Box::new(self.parse_multiplicative()?),
2139            };
2140        }
2141    }
2142
2143    fn parse_multiplicative(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2144        let mut expr = self.parse_unary()?;
2145        loop {
2146            let op = if self.consume("*") {
2147                Some(BinaryOp::Mul)
2148            } else if self.consume("/") {
2149                Some(BinaryOp::Div)
2150            } else if self.consume("%") {
2151                Some(BinaryOp::Rem)
2152            } else {
2153                None
2154            };
2155            let Some(op) = op else {
2156                return Ok(expr);
2157            };
2158            expr = RuleExpr::Binary {
2159                op,
2160                left: Box::new(expr),
2161                right: Box::new(self.parse_unary()?),
2162            };
2163        }
2164    }
2165
2166    fn parse_unary(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2167        self.skip_ws();
2168        if self.consume("!") {
2169            return Ok(RuleExpr::Unary {
2170                op: UnaryOp::Not,
2171                expr: Box::new(self.parse_unary()?),
2172            });
2173        }
2174        self.parse_postfix()
2175    }
2176
2177    fn parse_postfix(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2178        let expr = self.parse_primary()?;
2179        self.skip_ws();
2180        if self.consume(".") {
2181            if self.consume("length") && self.consume("(") && self.consume(")") {
2182                return Ok(RuleExpr::Call {
2183                    function: BuiltinFunction::Size,
2184                    args: vec![expr],
2185                });
2186            }
2187            if self.consume("test") && self.consume("(") {
2188                let arg = self.parse_conditional()?;
2189                if !self.consume(")") {
2190                    return Err(BoundedText::unchecked("missing call closing parenthesis"));
2191                }
2192                return Ok(RuleExpr::Call {
2193                    function: BuiltinFunction::Matches,
2194                    args: vec![expr, arg],
2195                });
2196            }
2197            return Ok(RuleExpr::Unsupported {
2198                fragment: BoundedText::new(self.input, MAX_PROFILE_STRING_BYTES)
2199                    .map_err(|_| BoundedText::unchecked("expression exceeds limit"))?,
2200                reason: BoundedText::unchecked(
2201                    "nested property path has no bound model link in this phase",
2202                ),
2203            });
2204        }
2205        Ok(expr)
2206    }
2207
2208    fn parse_primary(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2209        self.skip_ws();
2210        if self.consume("(") {
2211            let expr = self.parse_conditional()?;
2212            if !self.consume(")") {
2213                return Err(BoundedText::unchecked("missing closing parenthesis"));
2214            }
2215            return Ok(expr);
2216        }
2217        if self.remaining().starts_with('/') {
2218            return self.parse_regex_literal();
2219        }
2220        if self.remaining().starts_with('"') || self.remaining().starts_with('\'') {
2221            return self.parse_string();
2222        }
2223        if self.remaining().starts_with("true") {
2224            self.offset = self.offset.saturating_add(4);
2225            return Ok(RuleExpr::Bool { value: true });
2226        }
2227        if self.remaining().starts_with("false") {
2228            self.offset = self.offset.saturating_add(5);
2229            return Ok(RuleExpr::Bool { value: false });
2230        }
2231        if self.remaining().starts_with("null") {
2232            self.offset = self.offset.saturating_add(4);
2233            return Ok(RuleExpr::Null);
2234        }
2235        if self
2236            .remaining()
2237            .as_bytes()
2238            .first()
2239            .is_some_and(|byte| byte.is_ascii_digit() || *byte == b'-')
2240        {
2241            return self.parse_number();
2242        }
2243        self.parse_property()
2244    }
2245
2246    fn parse_string(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2247        let quote = *self
2248            .remaining()
2249            .as_bytes()
2250            .first()
2251            .ok_or_else(|| BoundedText::unchecked("expected string quote"))?;
2252        self.offset = self.offset.saturating_add(1);
2253        let start = self.offset;
2254        while let Some(byte) = self.remaining().as_bytes().first() {
2255            if *byte == quote {
2256                let value = &self.input[start..self.offset];
2257                self.offset = self.offset.saturating_add(1);
2258                return Ok(RuleExpr::String {
2259                    value: BoundedText::new(value, MAX_PROFILE_STRING_BYTES)
2260                        .map_err(|_| BoundedText::unchecked("string literal exceeds limit"))?,
2261                });
2262            }
2263            self.offset = self.offset.saturating_add(1);
2264        }
2265        Err(BoundedText::unchecked("unterminated string literal"))
2266    }
2267
2268    fn parse_regex_literal(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2269        self.offset = self.offset.saturating_add(1);
2270        let start = self.offset;
2271        let mut escaped = false;
2272        while let Some(byte) = self.remaining().as_bytes().first() {
2273            if *byte == b'/' && !escaped {
2274                let value = &self.input[start..self.offset];
2275                self.offset = self.offset.saturating_add(1);
2276                return Ok(RuleExpr::String {
2277                    value: BoundedText::new(value, MAX_REGEX_PATTERN_BYTES)
2278                        .map_err(|_| BoundedText::unchecked("regex literal exceeds limit"))?,
2279                });
2280            }
2281            escaped = *byte == b'\\' && !escaped;
2282            if *byte != b'\\' {
2283                escaped = false;
2284            }
2285            self.offset = self.offset.saturating_add(1);
2286        }
2287        Err(BoundedText::unchecked("unterminated regex literal"))
2288    }
2289
2290    fn parse_number(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2291        let start = self.offset;
2292        while let Some(byte) = self.remaining().as_bytes().first() {
2293            if byte.is_ascii_digit() || matches!(*byte, b'-' | b'.') {
2294                self.offset = self.offset.saturating_add(1);
2295            } else {
2296                break;
2297            }
2298        }
2299        let value = self.input[start..self.offset]
2300            .parse::<f64>()
2301            .map_err(|_| BoundedText::unchecked("invalid number literal"))?;
2302        Ok(RuleExpr::Number { value })
2303    }
2304
2305    fn parse_property(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2306        let first = self.parse_identifier()?;
2307        if self.consume("(") {
2308            let mut args = Vec::new();
2309            if !self.consume(")") {
2310                loop {
2311                    args.push(self.parse_conditional()?);
2312                    if self.consume(")") {
2313                        break;
2314                    }
2315                    if !self.consume(",") {
2316                        return Err(BoundedText::unchecked(
2317                            "missing function argument separator",
2318                        ));
2319                    }
2320                }
2321            }
2322            return Ok(RuleExpr::Call {
2323                function: builtin_function(&first)?,
2324                args,
2325            });
2326        }
2327        let parts = vec![property_name_from_source(&first)?];
2328        if self.consume(".") {
2329            let _member = self.parse_identifier()?;
2330            return Ok(RuleExpr::Unsupported {
2331                fragment: BoundedText::new(self.input, MAX_PROFILE_STRING_BYTES)
2332                    .map_err(|_| BoundedText::unchecked("expression exceeds limit"))?,
2333                reason: BoundedText::unchecked(
2334                    "nested property path has no bound model link in this phase",
2335                ),
2336            });
2337        }
2338        Ok(RuleExpr::Property {
2339            path: PropertyPath::new(parts),
2340        })
2341    }
2342
2343    fn parse_identifier(&mut self) -> std::result::Result<String, BoundedText> {
2344        let start = self.offset;
2345        while let Some(byte) = self.remaining().as_bytes().first() {
2346            if byte.is_ascii_alphanumeric() || *byte == b'_' {
2347                self.offset = self.offset.saturating_add(1);
2348            } else {
2349                break;
2350            }
2351        }
2352        if start == self.offset {
2353            return Err(BoundedText::unchecked("expected expression"));
2354        }
2355        Ok(self.input[start..self.offset].to_owned())
2356    }
2357}
2358
2359fn property_name_from_source(value: &str) -> std::result::Result<PropertyName, BoundedText> {
2360    PropertyName::new(map_verapdf_property(value))
2361        .map_err(|_| BoundedText::unchecked("invalid property"))
2362}
2363
2364fn builtin_function(value: &str) -> std::result::Result<BuiltinFunction, BoundedText> {
2365    match value {
2366        "hasParseFact" => Ok(BuiltinFunction::HasParseFact),
2367        "size" => Ok(BuiltinFunction::Size),
2368        "isEmpty" => Ok(BuiltinFunction::IsEmpty),
2369        "contains" => Ok(BuiltinFunction::Contains),
2370        "all" => Ok(BuiltinFunction::All),
2371        "exists" => Ok(BuiltinFunction::Exists),
2372        "matches" => Ok(BuiltinFunction::Matches),
2373        _ => Err(BoundedText::unchecked("unsupported built-in function")),
2374    }
2375}
2376
2377fn map_verapdf_property(value: &str) -> &str {
2378    match value {
2379        "Length" => "declaredLength",
2380        "realLength" => "discoveredLength",
2381        "isEncrypted" => "encrypted",
2382        "containsMetadata" => "hasMetadata",
2383        "isCatalogMetadata" => "catalogMetadata",
2384        other => other,
2385    }
2386}
2387
2388impl From<CosObject> for ModelValue {
2389    fn from(value: CosObject) -> Self {
2390        match value {
2391            CosObject::Boolean(value) => Self::Bool(value),
2392            CosObject::Real(value) => Self::Number(value),
2393            CosObject::Name(name) => Self::String(BoundedText::unchecked(
2394                String::from_utf8_lossy(name.as_bytes()).into_owned(),
2395            )),
2396            CosObject::String(value) => Self::String(BoundedText::unchecked(
2397                String::from_utf8_lossy(value.as_bytes()).into_owned(),
2398            )),
2399            CosObject::Reference(value) => Self::ObjectKey(value),
2400            CosObject::Null
2401            | CosObject::Integer(_)
2402            | CosObject::Array(_)
2403            | CosObject::Dictionary(_)
2404            | CosObject::Stream(_) => Self::Null,
2405        }
2406    }
2407}
2408
2409#[cfg(test)]
2410mod tests {
2411    use std::{io::Cursor, sync::Arc};
2412
2413    use super::{BuiltinProfileRepository, DefaultRuleEvaluator, ProfileRepository, RuleEvaluator};
2414    use crate::{FlavourSelection, Parser, Validator};
2415
2416    const MINIMAL_PDF: &[u8] = br"%PDF-1.7
24171 0 obj
2418<< /Type /Catalog >>
2419endobj
2420trailer
2421<< /Root 1 0 R >>
2422%%EOF
2423";
2424
2425    #[derive(Debug)]
2426    struct StaticRepo(super::ValidationProfile);
2427
2428    impl super::ProfileRepository for StaticRepo {
2429        fn profiles_for(
2430            &self,
2431            _selection: &crate::FlavourSelection,
2432        ) -> crate::Result<Vec<super::ValidationProfile>> {
2433            Ok(vec![self.0.clone()])
2434        }
2435    }
2436
2437    #[test]
2438    fn test_should_return_builtin_profile_for_default_auto_selection() -> crate::Result<()> {
2439        let profiles = BuiltinProfileRepository::new().profiles_for(&FlavourSelection::default())?;
2440
2441        assert_eq!(profiles.len(), 1);
2442        assert_eq!(
2443            profiles.first().map(|profile| profile.rules.len()),
2444            Some(10)
2445        );
2446        assert_eq!(
2447            profiles.first().map(|profile| profile.identity.id.as_str()),
2448            Some("pdfv-m4")
2449        );
2450        Ok(())
2451    }
2452
2453    #[test]
2454    fn test_should_return_no_builtin_profile_for_auto_without_default() -> crate::Result<()> {
2455        let profiles = BuiltinProfileRepository::new()
2456            .profiles_for(&FlavourSelection::Auto { default: None })?;
2457
2458        assert!(profiles.is_empty());
2459        Ok(())
2460    }
2461
2462    #[cfg(feature = "custom-profiles")]
2463    #[test]
2464    fn test_should_import_representative_verapdf_xml_rules() -> crate::Result<()> {
2465        let import = super::import_verapdf_profile_xml(
2466            crate::generated_profiles::GENERATED_PROFILE_SOURCES
2467                .iter()
2468                .find(|source| source.display_flavour == "pdfa-1b")
2469                .ok_or(crate::ProfileError::UnsupportedSelection)?
2470                .xml,
2471        )?;
2472
2473        assert!(import.profile.rules.len() > 100);
2474        assert!(import.supported_rules > 0);
2475        assert!(import.unsupported_rules > 0);
2476        assert_eq!(import.profile.identity.id.as_str(), "verapdf-pdfa-1b");
2477        assert!(
2478            import
2479                .profile
2480                .rules
2481                .iter()
2482                .any(|rule| !rule.references.is_empty())
2483        );
2484        Ok(())
2485    }
2486
2487    #[cfg(feature = "custom-profiles")]
2488    #[test]
2489    fn test_should_map_verapdf_undefined_operator_to_sparse_family() -> crate::Result<()> {
2490        let import = super::import_verapdf_profile_xml(
2491            crate::generated_profiles::GENERATED_PROFILE_SOURCES
2492                .iter()
2493                .find(|source| source.display_flavour == "pdfa-1b")
2494                .ok_or(crate::ProfileError::UnsupportedSelection)?
2495                .xml,
2496        )?;
2497
2498        let rule = import
2499            .profile
2500            .rules
2501            .iter()
2502            .find(|rule| rule.id.0.as_str() == "iso-19005-1-6-2-10-1")
2503            .ok_or(crate::ProfileError::UnsupportedSelection)?;
2504
2505        assert_eq!(rule.object_type.as_str(), "undefinedOperator");
2506        Ok(())
2507    }
2508
2509    #[test]
2510    fn test_should_list_every_generated_builtin_profile_with_coverage() -> crate::Result<()> {
2511        let profiles = BuiltinProfileRepository::new().list_profiles()?;
2512
2513        assert_eq!(
2514            profiles.len(),
2515            crate::generated_profiles::GENERATED_PROFILE_SOURCES.len() + 1
2516        );
2517        assert!(profiles.iter().any(|profile| {
2518            profile.identity.id.as_str() == "verapdf-pdfua-2-iso32005"
2519                && profile.display_flavour.as_str() == "pdfua-2-iso32005"
2520                && profile.coverage.total_rules > 0
2521        }));
2522        assert!(profiles.iter().any(|profile| {
2523            profile.identity.id.as_str() == "verapdf-wtpdf-1-0-reuse"
2524                && profile.source_pin.as_str() == crate::generated_profiles::VERA_PDF_LIBRARY_PIN
2525        }));
2526        Ok(())
2527    }
2528
2529    #[test]
2530    fn test_should_improve_m6_official_profile_coverage_for_accessibility_profiles()
2531    -> crate::Result<()> {
2532        let profiles = BuiltinProfileRepository::new().list_profiles()?;
2533
2534        for display_flavour in [
2535            "pdfua-2-iso32005",
2536            "wtpdf-1-0-accessibility",
2537            "wtpdf-1-0-reuse",
2538        ] {
2539            let profile = profiles
2540                .iter()
2541                .find(|profile| profile.display_flavour.as_str() == display_flavour)
2542                .ok_or(crate::ProfileError::UnsupportedSelection)?;
2543            assert!(
2544                profile.coverage.executable_rules.saturating_mul(100)
2545                    >= profile.coverage.total_rules.saturating_mul(90),
2546                "{display_flavour} coverage is {:?}",
2547                profile.coverage
2548            );
2549        }
2550        Ok(())
2551    }
2552
2553    #[test]
2554    fn test_should_reject_inexact_pdfua_2_flavour_selection() -> crate::Result<()> {
2555        let flavour = crate::ValidationFlavour::new(
2556            "pdfua",
2557            std::num::NonZeroU32::new(2).ok_or(crate::ProfileError::UnsupportedSelection)?,
2558            "wrong",
2559        )?;
2560        let result =
2561            BuiltinProfileRepository::new().profiles_for(&FlavourSelection::Explicit { flavour });
2562
2563        assert!(matches!(
2564            result,
2565            Err(crate::PdfvError::Profile(
2566                crate::ProfileError::UnsupportedSelection
2567            ))
2568        ));
2569        Ok(())
2570    }
2571
2572    #[test]
2573    fn test_should_load_and_validate_every_generated_builtin_profile() -> crate::Result<()> {
2574        for source in crate::generated_profiles::GENERATED_PROFILE_SOURCES {
2575            let flavour = super::parse_display_flavour(source.display_flavour)?;
2576            let report = Validator::new(
2577                crate::ValidationOptions::builder()
2578                    .flavour(FlavourSelection::Explicit { flavour })
2579                    .build(),
2580            )?
2581            .validate_reader(Cursor::new(MINIMAL_PDF), crate::InputName::memory())?;
2582
2583            assert_eq!(report.status, crate::ValidationStatus::Incomplete);
2584            assert_eq!(
2585                report
2586                    .profile_reports
2587                    .first()
2588                    .map(|profile| profile.profile.id.as_str()),
2589                Some(source.id)
2590            );
2591            assert!(
2592                report
2593                    .profile_reports
2594                    .first()
2595                    .is_some_and(|profile| !profile.unsupported_rules.is_empty())
2596            );
2597            assert!(report.profile_reports.first().is_some_and(|profile| {
2598                profile
2599                    .unsupported_rules
2600                    .iter()
2601                    .any(|rule| !rule.references.is_empty())
2602            }));
2603        }
2604        Ok(())
2605    }
2606
2607    #[cfg(feature = "custom-profiles")]
2608    #[test]
2609    fn test_should_load_custom_xml_profile() -> crate::Result<()> {
2610        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2611<profile flavour="PDFA_1_B">
2612  <details><name>Custom smoke profile</name></details>
2613  <rules>
2614    <rule object="CosDocument">
2615      <id specification="LOCAL" clause="1" testNumber="1"/>
2616      <description>Catalog must be present</description>
2617      <test>hasCatalog == true</test>
2618      <error><message>Catalog is missing</message></error>
2619    </rule>
2620  </rules>
2621</profile>"#;
2622        let import = super::import_verapdf_profile_xml(xml)?;
2623
2624        assert_eq!(import.profile.rules.len(), 1);
2625        assert_eq!(import.supported_rules, 1);
2626        assert_eq!(import.unsupported_rules, 0);
2627        Ok(())
2628    }
2629
2630    #[test]
2631    fn test_should_evaluate_m0_document_rules() -> crate::Result<()> {
2632        let bytes = br"%PDF-1.7
26331 0 obj
2634<< /Type /Catalog >>
2635endobj
2636trailer
2637<< /Root 1 0 R >>
2638%%EOF
2639";
2640        let document = Parser::default().parse(Cursor::new(bytes))?;
2641        let model = crate::validation::DocumentModel::new(&document);
2642        let object = crate::ModelObjectRef::Document(model);
2643        let profile = BuiltinProfileRepository::new()
2644            .profiles_for(&FlavourSelection::default())?
2645            .remove(0);
2646        let mut evaluator = DefaultRuleEvaluator::new(crate::ResourceLimits::default());
2647
2648        for rule in profile
2649            .rules
2650            .iter()
2651            .filter(|rule| rule.object_type.as_str() == "document")
2652        {
2653            let outcome = evaluator.evaluate(object.clone(), rule)?;
2654            assert_eq!(outcome, super::RuleOutcome::Passed);
2655        }
2656        Ok(())
2657    }
2658
2659    #[test]
2660    fn test_should_validate_reader_end_to_end() -> crate::Result<()> {
2661        let bytes = br"%PDF-1.7
26621 0 obj
2663<< /Type /Catalog >>
2664endobj
2665trailer
2666<< /Root 1 0 R >>
2667%%EOF
2668";
2669        let report = Validator::new(crate::ValidationOptions::default())?
2670            .validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2671
2672        assert_eq!(report.status, crate::ValidationStatus::Valid);
2673        Ok(())
2674    }
2675
2676    #[test]
2677    fn test_should_validate_stream_with_declared_length_and_eol() -> crate::Result<()> {
2678        let bytes = br"%PDF-1.7
26791 0 obj
2680<< /Type /Catalog >>
2681endobj
26822 0 obj
2683<< /Length 4 >>
2684stream
2685abc
2686endstream
2687endobj
2688trailer
2689<< /Root 1 0 R >>
2690%%EOF
2691";
2692        let report = Validator::new(crate::ValidationOptions::default())?
2693            .validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2694
2695        assert_eq!(report.status, crate::ValidationStatus::Valid);
2696        Ok(())
2697    }
2698
2699    #[test]
2700    fn test_should_apply_m4_feature_fact_rules_to_linked_objects() -> crate::Result<()> {
2701        let report = Validator::new(crate::ValidationOptions::default())?
2702            .validate_reader(Cursor::new(m4_feature_pdf()), crate::InputName::memory())?;
2703        let profile =
2704            report
2705                .profile_reports
2706                .first()
2707                .ok_or(crate::ValidationError::LimitExceeded {
2708                    limit: "profile_reports",
2709                })?;
2710
2711        assert_eq!(
2712            report.status,
2713            crate::ValidationStatus::Valid,
2714            "{profile:#?}"
2715        );
2716        assert_eq!(profile.rules_executed, 12);
2717        Ok(())
2718    }
2719
2720    #[test]
2721    fn test_should_report_imported_derived_property_as_unsupported() -> crate::Result<()> {
2722        let rule = super::Rule {
2723            id: crate::RuleId(crate::Identifier::new("derived-font-name")?),
2724            object_type: super::ObjectTypeName::new("font")?,
2725            deferred: false,
2726            tags: Vec::new(),
2727            description: crate::BoundedText::new("derived font name", 64)?,
2728            test: super::RuleExpr::Binary {
2729                op: super::BinaryOp::Eq,
2730                left: Box::new(super::property_expr("fontName")?),
2731                right: Box::new(super::RuleExpr::Null),
2732            },
2733            error: super::ErrorTemplate {
2734                message: crate::BoundedText::new("derived font name", 64)?,
2735            },
2736            references: Vec::new(),
2737        };
2738        let profile = super::ValidationProfile {
2739            identity: crate::ProfileIdentity {
2740                id: crate::Identifier::new("derived-property")?,
2741                name: crate::BoundedText::new("derived property", 64)?,
2742                version: None,
2743            },
2744            flavour: super::pdfa_1b_flavour()?,
2745            rules: vec![rule],
2746        };
2747        let validator = Validator::with_profiles(
2748            crate::ValidationOptions::default(),
2749            Arc::new(StaticRepo(profile)),
2750        )?;
2751        let report =
2752            validator.validate_reader(Cursor::new(m4_feature_pdf()), crate::InputName::memory())?;
2753        let profile =
2754            report
2755                .profile_reports
2756                .first()
2757                .ok_or(crate::ValidationError::LimitExceeded {
2758                    limit: "profile_reports",
2759                })?;
2760
2761        assert_eq!(report.status, crate::ValidationStatus::Incomplete);
2762        assert_eq!(profile.unsupported_rules.len(), 1);
2763        Ok(())
2764    }
2765
2766    #[test]
2767    fn test_should_fail_m4_feature_fact_rule_on_invalid_font() -> crate::Result<()> {
2768        let bytes = br"%PDF-1.7
27691 0 obj
2770<< /Type /Catalog /Pages 2 0 R >>
2771endobj
27722 0 obj
2773<< /Type /Pages /Kids [3 0 R] /Count 1 >>
2774endobj
27753 0 obj
2776<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>
2777endobj
27784 0 obj
2779<< /Type /Font /BaseFont /Helvetica >>
2780endobj
27815 0 obj
2782<< /Length 4 >>
2783stream
2784q Q
2785endstream
2786endobj
2787trailer
2788<< /Root 1 0 R >>
2789%%EOF
2790";
2791        let report = Validator::new(crate::ValidationOptions::default())?
2792            .validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2793        let profile =
2794            report
2795                .profile_reports
2796                .first()
2797                .ok_or(crate::ValidationError::LimitExceeded {
2798                    limit: "profile_reports",
2799                })?;
2800
2801        assert_eq!(report.status, crate::ValidationStatus::Invalid);
2802        assert!(profile.failed_assertions.iter().any(|assertion| {
2803            assertion.rule_id.0.as_str() == "m4-font-subtype-present"
2804                && assertion
2805                    .object_context
2806                    .as_ref()
2807                    .is_some_and(|context| context.as_str() == "root/page[0]/font[F1]")
2808        }));
2809        Ok(())
2810    }
2811
2812    fn m4_feature_pdf() -> &'static [u8] {
2813        br"%PDF-1.7
28141 0 obj
2815<< /Type /Catalog /Pages 2 0 R /OutputIntents [8 0 R] >>
2816endobj
28172 0 obj
2818<< /Type /Pages /Kids [3 0 R] /Count 1 >>
2819endobj
28203 0 obj
2821<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /Annots [5 0 R] /Contents 6 0 R >>
2822endobj
28234 0 obj
2824<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
2825endobj
28265 0 obj
2827<< /Type /Annot /Subtype /Text >>
2828endobj
28296 0 obj
2830<< /Length 0 >>
2831stream
2832endstream
2833endobj
28347 0 obj
2835<< /Length 0 >>
2836stream
2837endstream
2838endobj
28398 0 obj
2840<< /Type /OutputIntent /S /GTS_PDFA1 /DestOutputProfile 7 0 R >>
2841endobj
2842trailer
2843<< /Root 1 0 R >>
2844%%EOF
2845"
2846    }
2847
2848    #[test]
2849    fn test_should_reject_unsupported_rule_ir_silently_fallbacks() -> crate::Result<()> {
2850        let bytes = br"%PDF-1.7
28511 0 obj
2852<< /Type /Catalog >>
2853endobj
2854trailer
2855<< /Root 1 0 R >>
2856%%EOF
2857";
2858        let document = Parser::default().parse(Cursor::new(bytes))?;
2859        let model = crate::validation::DocumentModel::new(&document);
2860        let object = crate::ModelObjectRef::Document(model);
2861        let mut evaluator = DefaultRuleEvaluator::new(crate::ResourceLimits::default());
2862        let nested_rule = super::Rule {
2863            id: crate::RuleId(crate::Identifier::new("bad-nested")?),
2864            object_type: super::ObjectTypeName::new("document")?,
2865            deferred: false,
2866            tags: Vec::new(),
2867            description: crate::BoundedText::new("nested", 32)?,
2868            test: super::RuleExpr::Property {
2869                path: super::PropertyPath::new(vec![
2870                    super::PropertyName::new("headerOffset")?,
2871                    super::PropertyName::new("extra")?,
2872                ]),
2873            },
2874            error: super::ErrorTemplate {
2875                message: crate::BoundedText::new("nested", 32)?,
2876            },
2877            references: Vec::new(),
2878        };
2879        let arity_rule = super::Rule {
2880            id: crate::RuleId(crate::Identifier::new("bad-arity")?),
2881            object_type: super::ObjectTypeName::new("document")?,
2882            deferred: false,
2883            tags: Vec::new(),
2884            description: crate::BoundedText::new("arity", 32)?,
2885            test: super::RuleExpr::Call {
2886                function: super::BuiltinFunction::HasParseFact,
2887                args: vec![
2888                    super::RuleExpr::String {
2889                        value: crate::BoundedText::new("header", 32)?,
2890                    },
2891                    super::RuleExpr::String {
2892                        value: crate::BoundedText::new("extra", 32)?,
2893                    },
2894                ],
2895            },
2896            error: super::ErrorTemplate {
2897                message: crate::BoundedText::new("arity", 32)?,
2898            },
2899            references: Vec::new(),
2900        };
2901
2902        assert!(evaluator.evaluate(object.clone(), &nested_rule).is_err());
2903        assert!(evaluator.evaluate(object, &arity_rule).is_err());
2904        Ok(())
2905    }
2906
2907    #[test]
2908    fn test_should_report_unsupported_rule_as_incomplete() -> crate::Result<()> {
2909        let bytes = br"%PDF-1.7
29101 0 obj
2911<< /Type /Catalog >>
2912endobj
2913trailer
2914<< /Root 1 0 R >>
2915%%EOF
2916";
2917        let rule = super::Rule {
2918            id: crate::RuleId(crate::Identifier::new("unsupported")?),
2919            object_type: super::ObjectTypeName::new("document")?,
2920            deferred: false,
2921            tags: Vec::new(),
2922            description: crate::BoundedText::new("unsupported", 64)?,
2923            test: super::RuleExpr::Property {
2924                path: super::PropertyPath::new(vec![
2925                    super::PropertyName::new("headerOffset")?,
2926                    super::PropertyName::new("extra")?,
2927                ]),
2928            },
2929            error: super::ErrorTemplate {
2930                message: crate::BoundedText::new("unsupported", 64)?,
2931            },
2932            references: Vec::new(),
2933        };
2934        let profile = super::ValidationProfile {
2935            identity: crate::ProfileIdentity {
2936                id: crate::Identifier::new("test")?,
2937                name: crate::BoundedText::new("test", 64)?,
2938                version: None,
2939            },
2940            flavour: super::pdfa_1b_flavour()?,
2941            rules: vec![rule],
2942        };
2943        let validator = Validator::with_profiles(
2944            crate::ValidationOptions::default(),
2945            Arc::new(StaticRepo(profile)),
2946        )?;
2947        let report = validator.validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2948
2949        assert_eq!(report.status, crate::ValidationStatus::Incomplete);
2950        assert_eq!(
2951            report
2952                .profile_reports
2953                .first()
2954                .map(|profile| profile.unsupported_rules.len()),
2955            Some(1)
2956        );
2957        Ok(())
2958    }
2959
2960    #[test]
2961    fn test_should_parse_phase_13_expression_surface() -> crate::Result<()> {
2962        let modulo = super::parse_imported_expr("hexCount % 2 == 0")
2963            .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2964        let ternary = super::parse_imported_expr(
2965            "gPageOutputCS == null ? gDocumentOutputCS == 'RGB ' : gPageOutputCS == 'RGB '",
2966        )
2967        .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2968        let regex = super::parse_imported_expr(r"/^%PDF-2\.[0-9]$/.test(header)")
2969            .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2970        let call = super::parse_imported_expr("contains(entries, 'UR3') == false")
2971            .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2972
2973        assert!(matches!(modulo, super::RuleExpr::Binary { .. }));
2974        assert!(matches!(ternary, super::RuleExpr::Conditional { .. }));
2975        assert!(matches!(regex, super::RuleExpr::Call { .. }));
2976        assert!(matches!(call, super::RuleExpr::Binary { .. }));
2977        Ok(())
2978    }
2979
2980    #[test]
2981    fn test_should_import_nested_property_paths_as_static_unsupported() -> crate::Result<()> {
2982        let expr = super::parse_imported_expr("metadata.schema.part == 1")
2983            .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2984
2985        assert!(matches!(expr, super::RuleExpr::Unsupported { .. }));
2986        Ok(())
2987    }
2988
2989    #[test]
2990    fn test_should_evaluate_arithmetic_ternary_and_regex_builtins() -> crate::Result<()> {
2991        let bytes = br"%PDF-2.0
29921 0 obj
2993<< /Type /Catalog >>
2994endobj
2995trailer
2996<< /Root 1 0 R >>
2997%%EOF
2998";
2999        let document = Parser::default().parse(Cursor::new(bytes))?;
3000        let model = crate::validation::DocumentModel::new(&document);
3001        let object = crate::ModelObjectRef::Document(model);
3002        let mut evaluator = DefaultRuleEvaluator::new(crate::ResourceLimits::default());
3003        let rule = super::Rule {
3004            id: crate::RuleId(crate::Identifier::new("expr-surface")?),
3005            object_type: super::ObjectTypeName::new("document")?,
3006            deferred: false,
3007            tags: Vec::new(),
3008            description: crate::BoundedText::new("expr", 32)?,
3009            test: super::RuleExpr::Binary {
3010                op: super::BinaryOp::And,
3011                left: Box::new(
3012                    super::parse_imported_expr("5 % 2 == 1")
3013                        .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?,
3014                ),
3015                right: Box::new(
3016                    super::parse_imported_expr(r"/^%PDF-2\.[0-9]$/.test(header)")
3017                        .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?,
3018                ),
3019            },
3020            error: super::ErrorTemplate {
3021                message: crate::BoundedText::new("expr", 32)?,
3022            },
3023            references: Vec::new(),
3024        };
3025
3026        assert_eq!(
3027            evaluator.evaluate(object, &rule)?,
3028            super::RuleOutcome::Passed
3029        );
3030        Ok(())
3031    }
3032
3033    #[test]
3034    fn test_should_apply_failed_assertion_cap_per_rule() -> crate::Result<()> {
3035        let bytes = br"%PDF-1.7
30361 0 obj
3037<< /Type /Catalog >>
3038endobj
3039trailer
3040<< /Root 1 0 R >>
3041%%EOF
3042";
3043        let mut rules = Vec::new();
3044        for id in ["fail-a", "fail-b"] {
3045            rules.push(super::Rule {
3046                id: crate::RuleId(crate::Identifier::new(id)?),
3047                object_type: super::ObjectTypeName::new("document")?,
3048                deferred: false,
3049                tags: Vec::new(),
3050                description: crate::BoundedText::new(id, 64)?,
3051                test: super::RuleExpr::Bool { value: false },
3052                error: super::ErrorTemplate {
3053                    message: crate::BoundedText::new(id, 64)?,
3054                },
3055                references: Vec::new(),
3056            });
3057        }
3058        let profile = super::ValidationProfile {
3059            identity: crate::ProfileIdentity {
3060                id: crate::Identifier::new("test")?,
3061                name: crate::BoundedText::new("test", 64)?,
3062                version: None,
3063            },
3064            flavour: super::pdfa_1b_flavour()?,
3065            rules,
3066        };
3067        let validator = Validator::with_profiles(
3068            crate::ValidationOptions::default(),
3069            Arc::new(StaticRepo(profile)),
3070        )?;
3071        let report = validator.validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
3072
3073        assert_eq!(
3074            report
3075                .profile_reports
3076                .first()
3077                .map(|profile| profile.failed_assertions.len()),
3078            Some(2)
3079        );
3080        Ok(())
3081    }
3082}