1use std::num::NonZeroU32;
4
5use regex::RegexBuilder;
6use serde::{Deserialize, Serialize};
7
8use crate::{
9 AssertionStatus, BoundedText, CosObject, FlavourSelection, Identifier, ObjectKey, ParseFact,
10 ProfileError, ProfileIdentity, ResourceLimits, Result, RuleId, SpecReference, StreamFact,
11 ValidationFlavour,
12 generated_profiles::{GENERATED_PROFILE_SOURCES, GeneratedProfileSource, VERA_PDF_LIBRARY_PIN},
13};
14
15const MAX_RULE_INSTRUCTIONS: u64 = 512;
16const MAX_RULE_DEPTH: u32 = 32;
17const MAX_REGEX_PATTERN_BYTES: usize = 512;
18const MAX_REGEX_HAYSTACK_BYTES: usize = 4096;
19const MAX_PROFILE_XML_BYTES: u64 = 8 * 1024 * 1024;
20const MAX_PROFILE_XML_ELEMENTS: u64 = 100_000;
21const MAX_PROFILE_XML_DEPTH: u32 = 32;
22const MAX_PROFILE_XML_ATTRIBUTES: usize = 16;
23const MAX_PROFILE_RULES: usize = 10_000;
24const MAX_PROFILE_STRING_BYTES: usize = 4096;
25
26pub trait ProfileRepository {
28 fn profiles_for(&self, selection: &FlavourSelection) -> Result<Vec<ValidationProfile>>;
34}
35
36pub trait RuleEvaluator {
38 fn evaluate(&mut self, object: crate::ModelObjectRef<'_>, rule: &Rule) -> Result<RuleOutcome>;
44}
45
46#[derive(Clone, Debug, Default)]
48pub struct BuiltinProfileRepository;
49
50impl BuiltinProfileRepository {
51 #[must_use]
53 pub fn new() -> Self {
54 Self
55 }
56
57 pub fn list_profiles(&self) -> Result<Vec<ProfileCatalogEntry>> {
63 let mut entries = Vec::with_capacity(GENERATED_PROFILE_SOURCES.len().saturating_add(1));
64 let m4 = m4_profile(pdfa_1b_flavour()?)?;
65 entries.push(ProfileCatalogEntry::from_profile(
66 &m4,
67 "pdfv-internal",
68 "built-in smoke profile",
69 "pdfa-1b",
70 )?);
71 for source in GENERATED_PROFILE_SOURCES {
72 let import = import_generated_profile(source)?;
73 entries.push(ProfileCatalogEntry::from_import(source, &import)?);
74 }
75 Ok(entries)
76 }
77}
78
79impl ProfileRepository for BuiltinProfileRepository {
80 fn profiles_for(&self, selection: &FlavourSelection) -> Result<Vec<ValidationProfile>> {
81 match selection {
82 FlavourSelection::Auto { default } => {
83 let Some(flavour) = default else {
84 return Ok(Vec::new());
85 };
86 ensure_builtin_flavour(flavour)?;
87 Ok(vec![m4_profile(flavour.clone())?])
88 }
89 FlavourSelection::Explicit { flavour } => {
90 let source = builtin_source_for_flavour(flavour)?;
91 Ok(vec![import_generated_profile(source)?.profile])
92 }
93 FlavourSelection::CustomProfile { .. } => {
94 #[cfg(feature = "custom-profiles")]
95 {
96 let repository = CustomProfileRepository;
97 repository.profiles_for(selection)
98 }
99 #[cfg(not(feature = "custom-profiles"))]
100 {
101 Err(ProfileError::UnsupportedSelection.into())
102 }
103 }
104 }
105 }
106}
107
108#[cfg(feature = "custom-profiles")]
110#[derive(Clone, Debug, Default)]
111pub struct CustomProfileRepository;
112
113#[cfg(feature = "custom-profiles")]
114impl ProfileRepository for CustomProfileRepository {
115 fn profiles_for(&self, selection: &FlavourSelection) -> Result<Vec<ValidationProfile>> {
116 let FlavourSelection::CustomProfile { profile_path } = selection else {
117 return Err(ProfileError::UnsupportedSelection.into());
118 };
119 Ok(vec![load_verapdf_profile_path(profile_path)?.profile])
120 }
121}
122
123#[derive(Clone, Debug, Deserialize, Serialize)]
125#[non_exhaustive]
126#[serde(rename_all = "camelCase", deny_unknown_fields)]
127pub struct ProfileImportSummary {
128 pub profile: ValidationProfile,
130 pub supported_rules: u64,
132 pub unsupported_rules: u64,
134}
135
136#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
138#[non_exhaustive]
139#[serde(rename_all = "camelCase", deny_unknown_fields)]
140#[allow(
141 clippy::struct_field_names,
142 reason = "public report contract mirrors ExpressionCoverage terminology from the spec"
143)]
144pub struct ProfileCoverage {
145 pub total_rules: u64,
147 pub executable_rules: u64,
149 pub unsupported_rules: u64,
151}
152
153#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
155#[non_exhaustive]
156#[serde(rename_all = "camelCase", deny_unknown_fields)]
157pub struct ProfileCatalogEntry {
158 pub identity: ProfileIdentity,
160 pub flavour: ValidationFlavour,
162 pub display_flavour: BoundedText,
164 pub source_pin: Identifier,
166 pub source_file: BoundedText,
168 pub coverage: ProfileCoverage,
170}
171
172impl ProfileCatalogEntry {
173 fn from_import(source: &GeneratedProfileSource, import: &ProfileImportSummary) -> Result<Self> {
174 Self::from_profile(
175 &import.profile,
176 VERA_PDF_LIBRARY_PIN,
177 source.source_file,
178 source.display_flavour,
179 )
180 .map(|mut entry| {
181 entry.coverage = ProfileCoverage {
182 total_rules: import
183 .supported_rules
184 .saturating_add(import.unsupported_rules),
185 executable_rules: import.supported_rules,
186 unsupported_rules: import.unsupported_rules,
187 };
188 entry
189 })
190 }
191
192 fn from_profile(
193 profile: &ValidationProfile,
194 source_pin: &str,
195 source_file: &str,
196 display_flavour: &str,
197 ) -> Result<Self> {
198 Ok(Self {
199 identity: profile.identity.clone(),
200 flavour: profile.flavour.clone(),
201 display_flavour: BoundedText::new(display_flavour, 128)?,
202 source_pin: Identifier::new(source_pin)?,
203 source_file: BoundedText::new(source_file, 512)?,
204 coverage: ProfileCoverage {
205 total_rules: u64::try_from(profile.rules.len()).unwrap_or(u64::MAX),
206 executable_rules: u64::try_from(
207 profile
208 .rules
209 .iter()
210 .filter(|rule| !matches!(rule.test, RuleExpr::Unsupported { .. }))
211 .count(),
212 )
213 .unwrap_or(u64::MAX),
214 unsupported_rules: u64::try_from(
215 profile
216 .rules
217 .iter()
218 .filter(|rule| matches!(rule.test, RuleExpr::Unsupported { .. }))
219 .count(),
220 )
221 .unwrap_or(u64::MAX),
222 },
223 })
224 }
225}
226
227#[derive(Clone, Debug, Deserialize, Serialize)]
229#[non_exhaustive]
230#[serde(rename_all = "camelCase", deny_unknown_fields)]
231pub struct ValidationProfile {
232 pub identity: ProfileIdentity,
234 pub flavour: ValidationFlavour,
236 pub rules: Vec<Rule>,
238}
239
240#[derive(Clone, Debug, Deserialize, Serialize)]
242#[non_exhaustive]
243#[serde(rename_all = "camelCase", deny_unknown_fields)]
244pub struct Rule {
245 pub id: RuleId,
247 pub object_type: ObjectTypeName,
249 pub deferred: bool,
251 pub tags: Vec<Identifier>,
253 pub description: BoundedText,
255 pub test: RuleExpr,
257 pub error: ErrorTemplate,
259 pub references: Vec<SpecReference>,
261}
262
263#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
265#[non_exhaustive]
266#[serde(rename_all = "camelCase", deny_unknown_fields)]
267pub struct ErrorTemplate {
268 pub message: BoundedText,
270}
271
272#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
274#[serde(try_from = "String", into = "String")]
275pub struct ObjectTypeName(Identifier);
276
277impl ObjectTypeName {
278 pub fn new(value: impl Into<String>) -> std::result::Result<Self, crate::ConfigError> {
284 Ok(Self(Identifier::new(value)?))
285 }
286
287 #[must_use]
289 pub fn as_str(&self) -> &str {
290 self.0.as_str()
291 }
292
293 pub(crate) fn unchecked(value: &'static str) -> Self {
294 Self(Identifier::unchecked(value))
295 }
296}
297
298impl TryFrom<String> for ObjectTypeName {
299 type Error = crate::ConfigError;
300
301 fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
302 Self::new(value)
303 }
304}
305
306impl From<ObjectTypeName> for String {
307 fn from(value: ObjectTypeName) -> Self {
308 value.0.into()
309 }
310}
311
312#[derive(Clone, Debug, Deserialize, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize)]
314#[serde(try_from = "String", into = "String")]
315pub struct PropertyName(Identifier);
316
317impl PropertyName {
318 pub fn new(value: impl Into<String>) -> std::result::Result<Self, crate::ConfigError> {
324 Ok(Self(Identifier::new(value)?))
325 }
326
327 #[must_use]
329 pub fn as_str(&self) -> &str {
330 self.0.as_str()
331 }
332
333 pub(crate) fn unchecked(value: impl Into<String>) -> Self {
334 Self(Identifier::unchecked(value))
335 }
336}
337
338impl TryFrom<String> for PropertyName {
339 type Error = crate::ConfigError;
340
341 fn try_from(value: String) -> std::result::Result<Self, Self::Error> {
342 Self::new(value)
343 }
344}
345
346impl From<PropertyName> for String {
347 fn from(value: PropertyName) -> Self {
348 value.0.into()
349 }
350}
351
352#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
354#[serde(try_from = "Vec<String>", into = "Vec<String>")]
355pub struct PropertyPath(Vec<PropertyName>);
356
357impl PropertyPath {
358 #[must_use]
360 pub fn new(parts: Vec<PropertyName>) -> Self {
361 Self(parts)
362 }
363
364 #[must_use]
366 pub fn parts(&self) -> &[PropertyName] {
367 &self.0
368 }
369}
370
371impl TryFrom<Vec<String>> for PropertyPath {
372 type Error = crate::ConfigError;
373
374 fn try_from(value: Vec<String>) -> std::result::Result<Self, Self::Error> {
375 value
376 .into_iter()
377 .map(PropertyName::new)
378 .collect::<std::result::Result<Vec<_>, _>>()
379 .map(Self)
380 }
381}
382
383impl From<PropertyPath> for Vec<String> {
384 fn from(value: PropertyPath) -> Self {
385 value.0.into_iter().map(Into::into).collect()
386 }
387}
388
389#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
391#[non_exhaustive]
392#[serde(rename_all = "camelCase", tag = "kind")]
393pub enum RuleExpr {
394 Bool {
396 value: bool,
398 },
399 Number {
401 value: f64,
403 },
404 String {
406 value: BoundedText,
408 },
409 Null,
411 Property {
413 path: PropertyPath,
415 },
416 Unary {
418 op: UnaryOp,
420 expr: Box<RuleExpr>,
422 },
423 Binary {
425 op: BinaryOp,
427 left: Box<RuleExpr>,
429 right: Box<RuleExpr>,
431 },
432 Conditional {
434 condition: Box<RuleExpr>,
436 when_true: Box<RuleExpr>,
438 when_false: Box<RuleExpr>,
440 },
441 Call {
443 function: BuiltinFunction,
445 args: Vec<RuleExpr>,
447 },
448 Unsupported {
450 fragment: BoundedText,
452 reason: BoundedText,
454 },
455}
456
457#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
459#[non_exhaustive]
460#[serde(rename_all = "camelCase")]
461pub enum UnaryOp {
462 Not,
464}
465
466#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
468#[non_exhaustive]
469#[serde(rename_all = "camelCase")]
470pub enum BinaryOp {
471 Eq,
473 Ne,
475 Le,
477 Ge,
479 Lt,
481 Gt,
483 And,
485 Or,
487 Add,
489 Sub,
491 Mul,
493 Div,
495 Rem,
497}
498
499#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
501#[non_exhaustive]
502#[serde(rename_all = "camelCase")]
503pub enum BuiltinFunction {
504 HasParseFact,
506 Size,
508 IsEmpty,
510 Contains,
512 All,
514 Exists,
516 Matches,
518}
519
520#[derive(Clone, Debug, PartialEq)]
522#[non_exhaustive]
523pub enum ModelValue {
524 Null,
526 Bool(bool),
528 Number(f64),
530 String(BoundedText),
532 ObjectKey(ObjectKey),
534 List(Vec<ModelValue>),
536}
537
538#[derive(Clone, Copy, Debug, Eq, PartialEq)]
540#[non_exhaustive]
541pub enum RuleOutcome {
542 Passed,
544 Failed,
546}
547
548impl RuleOutcome {
549 #[must_use]
551 pub fn assertion_status(self) -> AssertionStatus {
552 match self {
553 Self::Passed => AssertionStatus::Passed,
554 Self::Failed => AssertionStatus::Failed,
555 }
556 }
557}
558
559#[derive(Clone, Debug)]
561pub struct DefaultRuleEvaluator {
562 limits: ResourceLimits,
563 instructions: u64,
564}
565
566impl DefaultRuleEvaluator {
567 #[must_use]
569 pub fn new(limits: ResourceLimits) -> Self {
570 Self {
571 limits,
572 instructions: 0,
573 }
574 }
575
576 fn eval(
577 &mut self,
578 object: &crate::ModelObjectRef<'_>,
579 expr: &RuleExpr,
580 depth: u32,
581 ) -> Result<ModelValue> {
582 self.instructions =
583 self.instructions
584 .checked_add(1)
585 .ok_or(ProfileError::BudgetExceeded {
586 budget: "instructions",
587 })?;
588 if self.instructions > MAX_RULE_INSTRUCTIONS {
589 return Err(ProfileError::BudgetExceeded {
590 budget: "instructions",
591 }
592 .into());
593 }
594 if depth > MAX_RULE_DEPTH || depth > self.limits.max_object_depth {
595 return Err(ProfileError::BudgetExceeded { budget: "depth" }.into());
596 }
597
598 match expr {
599 RuleExpr::Bool { value } => Ok(ModelValue::Bool(*value)),
600 RuleExpr::Number { value } => Ok(ModelValue::Number(*value)),
601 RuleExpr::String { value } => Ok(ModelValue::String(value.clone())),
602 RuleExpr::Null => Ok(ModelValue::Null),
603 RuleExpr::Property { path } => property(object, path),
604 RuleExpr::Unary { op, expr } => {
605 let value = self.eval(object, expr, depth.saturating_add(1))?;
606 match (op, value) {
607 (UnaryOp::Not, ModelValue::Bool(value)) => Ok(ModelValue::Bool(!value)),
608 _ => Err(type_mismatch("unary operator requires boolean").into()),
609 }
610 }
611 RuleExpr::Binary { op, left, right } => {
612 self.eval_binary(object, *op, left, right, depth)
613 }
614 RuleExpr::Conditional {
615 condition,
616 when_true,
617 when_false,
618 } => {
619 if expect_bool(&self.eval(object, condition, depth.saturating_add(1))?)? {
620 self.eval(object, when_true, depth.saturating_add(1))
621 } else {
622 self.eval(object, when_false, depth.saturating_add(1))
623 }
624 }
625 RuleExpr::Call { function, args } => self.eval_call(object, *function, args, depth),
626 RuleExpr::Unsupported { reason, .. } => Err(ProfileError::UnsupportedRule {
627 reason: reason.clone(),
628 }
629 .into()),
630 }
631 }
632
633 fn eval_binary(
634 &mut self,
635 object: &crate::ModelObjectRef<'_>,
636 op: BinaryOp,
637 left: &RuleExpr,
638 right: &RuleExpr,
639 depth: u32,
640 ) -> Result<ModelValue> {
641 if op == BinaryOp::And {
642 let left = expect_bool(&self.eval(object, left, depth.saturating_add(1))?)?;
643 if !left {
644 return Ok(ModelValue::Bool(false));
645 }
646 let right = expect_bool(&self.eval(object, right, depth.saturating_add(1))?)?;
647 return Ok(ModelValue::Bool(right));
648 }
649 if op == BinaryOp::Or {
650 let left = expect_bool(&self.eval(object, left, depth.saturating_add(1))?)?;
651 if left {
652 return Ok(ModelValue::Bool(true));
653 }
654 let right = expect_bool(&self.eval(object, right, depth.saturating_add(1))?)?;
655 return Ok(ModelValue::Bool(right));
656 }
657
658 let left = self.eval(object, left, depth.saturating_add(1))?;
659 let right = self.eval(object, right, depth.saturating_add(1))?;
660 let result = match op {
661 BinaryOp::Eq => values_equal(&left, &right),
662 BinaryOp::Ne => !values_equal(&left, &right),
663 BinaryOp::Le => expect_number(&left)? <= expect_number(&right)?,
664 BinaryOp::Ge => expect_number(&left)? >= expect_number(&right)?,
665 BinaryOp::Lt => expect_number(&left)? < expect_number(&right)?,
666 BinaryOp::Gt => expect_number(&left)? > expect_number(&right)?,
667 BinaryOp::And | BinaryOp::Or => false,
668 BinaryOp::Add => {
669 return Ok(ModelValue::Number(
670 expect_number(&left)? + expect_number(&right)?,
671 ));
672 }
673 BinaryOp::Sub => {
674 return Ok(ModelValue::Number(
675 expect_number(&left)? - expect_number(&right)?,
676 ));
677 }
678 BinaryOp::Mul => {
679 return Ok(ModelValue::Number(
680 expect_number(&left)? * expect_number(&right)?,
681 ));
682 }
683 BinaryOp::Div => {
684 let divisor = expect_number(&right)?;
685 if divisor.abs() < f64::EPSILON {
686 return Err(type_mismatch("division by zero").into());
687 }
688 return Ok(ModelValue::Number(expect_number(&left)? / divisor));
689 }
690 BinaryOp::Rem => {
691 let divisor = expect_number(&right)?;
692 if divisor.abs() < f64::EPSILON {
693 return Err(type_mismatch("modulo by zero").into());
694 }
695 return Ok(ModelValue::Number(expect_number(&left)? % divisor));
696 }
697 };
698 Ok(ModelValue::Bool(result))
699 }
700
701 fn eval_call(
702 &mut self,
703 object: &crate::ModelObjectRef<'_>,
704 function: BuiltinFunction,
705 args: &[RuleExpr],
706 depth: u32,
707 ) -> Result<ModelValue> {
708 match function {
709 BuiltinFunction::HasParseFact => {
710 let value = self.eval_single_arg(object, args, depth, "hasParseFact")?;
711 let ModelValue::String(name) = value else {
712 return Err(type_mismatch("hasParseFact requires string").into());
713 };
714 Ok(ModelValue::Bool(has_parse_fact(
715 object.document().parse_facts.as_slice(),
716 name.as_str(),
717 )))
718 }
719 BuiltinFunction::Size => {
720 let value = self.eval_single_arg(object, args, depth, "size")?;
721 Ok(ModelValue::Number(usize_to_f64(collection_len(&value)?)?))
722 }
723 BuiltinFunction::IsEmpty => {
724 let value = self.eval_single_arg(object, args, depth, "isEmpty")?;
725 Ok(ModelValue::Bool(collection_len(&value)? == 0))
726 }
727 BuiltinFunction::Contains => {
728 if args.len() != 2 {
729 return Err(type_mismatch("contains requires two arguments").into());
730 }
731 let haystack = self.eval(
732 object,
733 args.first()
734 .ok_or_else(|| type_mismatch("contains requires haystack"))?,
735 depth.saturating_add(1),
736 )?;
737 let needle = self.eval(
738 object,
739 args.get(1)
740 .ok_or_else(|| type_mismatch("contains requires needle"))?,
741 depth.saturating_add(1),
742 )?;
743 Ok(ModelValue::Bool(contains_value(&haystack, &needle)?))
744 }
745 BuiltinFunction::All => {
746 let mut result = true;
747 for arg in args {
748 result &= expect_bool(&self.eval(object, arg, depth.saturating_add(1))?)?;
749 if !result {
750 break;
751 }
752 }
753 Ok(ModelValue::Bool(result))
754 }
755 BuiltinFunction::Exists => {
756 let mut result = false;
757 for arg in args {
758 result |= expect_bool(&self.eval(object, arg, depth.saturating_add(1))?)?;
759 if result {
760 break;
761 }
762 }
763 Ok(ModelValue::Bool(result))
764 }
765 BuiltinFunction::Matches => {
766 if args.len() != 2 {
767 return Err(type_mismatch("matches requires pattern and string").into());
768 }
769 let pattern = self.eval(
770 object,
771 args.first()
772 .ok_or_else(|| type_mismatch("matches requires pattern"))?,
773 depth.saturating_add(1),
774 )?;
775 let haystack = self.eval(
776 object,
777 args.get(1)
778 .ok_or_else(|| type_mismatch("matches requires string"))?,
779 depth.saturating_add(1),
780 )?;
781 let (ModelValue::String(pattern), ModelValue::String(haystack)) =
782 (pattern, haystack)
783 else {
784 return Err(type_mismatch("matches requires string arguments").into());
785 };
786 if pattern.as_str().len() > MAX_REGEX_PATTERN_BYTES
787 || haystack.as_str().len() > MAX_REGEX_HAYSTACK_BYTES
788 {
789 return Err(ProfileError::BudgetExceeded { budget: "regex" }.into());
790 }
791 let regex = RegexBuilder::new(pattern.as_str())
792 .size_limit(1 << 20)
793 .dfa_size_limit(1 << 20)
794 .build()
795 .map_err(|error| ProfileError::InvalidField {
796 field: "regex",
797 reason: BoundedText::new(error.to_string(), 512)
798 .unwrap_or_else(|_| BoundedText::unchecked("invalid regex")),
799 })?;
800 Ok(ModelValue::Bool(regex.is_match(haystack.as_str())))
801 }
802 }
803 }
804
805 fn eval_single_arg(
806 &mut self,
807 object: &crate::ModelObjectRef<'_>,
808 args: &[RuleExpr],
809 depth: u32,
810 name: &'static str,
811 ) -> Result<ModelValue> {
812 if args.len() != 1 {
813 return Err(type_mismatch("built-in requires exactly one argument").into());
814 }
815 let Some(first) = args.first() else {
816 return Err(type_mismatch(name).into());
817 };
818 self.eval(object, first, depth.saturating_add(1))
819 }
820}
821
822impl RuleEvaluator for DefaultRuleEvaluator {
823 fn evaluate(&mut self, object: crate::ModelObjectRef<'_>, rule: &Rule) -> Result<RuleOutcome> {
824 self.instructions = 0;
825 let value = self.eval(&object, &rule.test, 0)?;
826 if expect_bool(&value)? {
827 Ok(RuleOutcome::Passed)
828 } else {
829 Ok(RuleOutcome::Failed)
830 }
831 }
832}
833
834fn property(object: &crate::ModelObjectRef<'_>, path: &PropertyPath) -> Result<ModelValue> {
835 if path.parts().is_empty() {
836 return Err(ProfileError::UnknownProperty {
837 property: BoundedText::unchecked("empty"),
838 }
839 .into());
840 }
841 if path.parts().len() > 1 {
842 return Err(ProfileError::UnsupportedRule {
843 reason: BoundedText::unchecked("nested property path has no bound model link"),
844 }
845 .into());
846 }
847 let name = path
848 .parts()
849 .first()
850 .ok_or_else(|| ProfileError::UnknownProperty {
851 property: BoundedText::unchecked("empty"),
852 })?;
853 object.property(name)
854}
855
856fn expect_bool(value: &ModelValue) -> Result<bool> {
857 match value {
858 ModelValue::Bool(value) => Ok(*value),
859 _ => Err(type_mismatch("expected boolean").into()),
860 }
861}
862
863fn expect_number(value: &ModelValue) -> Result<f64> {
864 match value {
865 ModelValue::Number(value) => Ok(*value),
866 _ => Err(type_mismatch("expected number").into()),
867 }
868}
869
870fn values_equal(left: &ModelValue, right: &ModelValue) -> bool {
871 match (left, right) {
872 (ModelValue::Null, ModelValue::Null) => true,
873 (ModelValue::Bool(left), ModelValue::Bool(right)) => left == right,
874 (ModelValue::Number(left), ModelValue::Number(right)) => {
875 (left - right).abs() < f64::EPSILON
876 }
877 (ModelValue::String(left), ModelValue::String(right)) => left == right,
878 (ModelValue::ObjectKey(left), ModelValue::ObjectKey(right)) => left == right,
879 (ModelValue::List(left), ModelValue::List(right)) => left == right,
880 _ => false,
881 }
882}
883
884fn collection_len(value: &ModelValue) -> Result<usize> {
885 match value {
886 ModelValue::String(value) => Ok(value.as_str().len()),
887 ModelValue::List(value) => Ok(value.len()),
888 _ => Err(type_mismatch("expected collection or string").into()),
889 }
890}
891
892fn usize_to_f64(value: usize) -> Result<f64> {
893 let value = u32::try_from(value).map_err(|_| ProfileError::BudgetExceeded {
894 budget: "collection_size",
895 })?;
896 Ok(f64::from(value))
897}
898
899fn contains_value(haystack: &ModelValue, needle: &ModelValue) -> Result<bool> {
900 match (haystack, needle) {
901 (ModelValue::String(haystack), ModelValue::String(needle)) => {
902 Ok(haystack.as_str().contains(needle.as_str()))
903 }
904 (ModelValue::List(values), needle) => {
905 Ok(values.iter().any(|value| values_equal(value, needle)))
906 }
907 _ => Err(type_mismatch("contains requires compatible arguments").into()),
908 }
909}
910
911fn type_mismatch(message: &'static str) -> ProfileError {
912 ProfileError::TypeMismatch {
913 message: BoundedText::unchecked(message),
914 }
915}
916
917fn has_parse_fact(facts: &[ParseFact], name: &str) -> bool {
918 facts.iter().any(|fact| match (name, fact) {
919 ("header", ParseFact::Header { .. })
920 | (
921 "encryption",
922 ParseFact::Encryption {
923 encrypted: true, ..
924 },
925 ) => true,
926 (
927 "streamLengthMismatch",
928 ParseFact::Stream {
929 fact:
930 StreamFact::Length {
931 declared,
932 discovered,
933 },
934 ..
935 },
936 ) => declared != discovered,
937 _ => false,
938 })
939}
940
941fn pdfa_1b_flavour() -> Result<ValidationFlavour> {
942 Ok(ValidationFlavour::new("pdfa", NonZeroU32::MIN, "b")?)
943}
944
945fn ensure_builtin_flavour(flavour: &ValidationFlavour) -> Result<()> {
946 builtin_source_for_flavour(flavour).map(|_| ())
947}
948
949fn import_generated_profile(source: &GeneratedProfileSource) -> Result<ProfileImportSummary> {
950 let mut import = import_verapdf_profile_xml(source.xml)?;
951 import.profile.identity.id = Identifier::new(source.id)?;
952 import.profile.identity.version = Some(Identifier::new("verapdf-generated")?);
953 import.profile.flavour = parse_display_flavour(source.display_flavour)?;
954 apply_model_schema_checks(&mut import)?;
955 Ok(import)
956}
957
958fn apply_model_schema_checks(import: &mut ProfileImportSummary) -> Result<()> {
959 let registry = crate::validation::ModelRegistry::default_registry();
960 let mut supported_rules = 0_u64;
961 let mut unsupported_rules = 0_u64;
962 for rule in &mut import.profile.rules {
963 if matches!(rule.test, RuleExpr::Unsupported { .. }) {
964 unsupported_rules = unsupported_rules.saturating_add(1);
965 continue;
966 }
967 let unsupported_reason = if registry.has_family(&rule.object_type) {
968 unsupported_property_reason(®istry, &rule.object_type, &rule.test)?
969 } else {
970 Some(BoundedText::new(
971 format!(
972 "unknown validation model family {}",
973 rule.object_type.as_str()
974 ),
975 512,
976 )?)
977 };
978 if let Some(reason) = unsupported_reason {
979 let fragment = BoundedText::new(format!("{:?}", rule.test), MAX_PROFILE_STRING_BYTES)
980 .unwrap_or_else(|_| BoundedText::unchecked("rule expression exceeds limit"));
981 rule.test = RuleExpr::Unsupported { fragment, reason };
982 unsupported_rules = unsupported_rules.saturating_add(1);
983 } else {
984 supported_rules = supported_rules.saturating_add(1);
985 }
986 }
987 import.supported_rules = supported_rules;
988 import.unsupported_rules = unsupported_rules;
989 Ok(())
990}
991
992fn unsupported_property_reason(
993 registry: &crate::validation::ModelRegistry,
994 object_type: &ObjectTypeName,
995 expr: &RuleExpr,
996) -> Result<Option<BoundedText>> {
997 let mut properties = Vec::new();
998 collect_property_paths(expr, &mut properties);
999 for property in properties {
1000 let Some(first) = property.parts().first() else {
1001 return Ok(Some(BoundedText::unchecked("empty model property path")));
1002 };
1003 if property.parts().len() > 1 {
1004 return Ok(Some(BoundedText::unchecked(
1005 "nested property path has no bound model link",
1006 )));
1007 }
1008 if !registry.has_family_property(object_type, first) {
1009 return Ok(Some(BoundedText::new(
1010 format!(
1011 "unknown validation model property {} on {}",
1012 first.as_str(),
1013 object_type.as_str()
1014 ),
1015 512,
1016 )?));
1017 }
1018 }
1019 Ok(None)
1020}
1021
1022fn collect_property_paths<'a>(expr: &'a RuleExpr, properties: &mut Vec<&'a PropertyPath>) {
1023 match expr {
1024 RuleExpr::Property { path } => properties.push(path),
1025 RuleExpr::Unary { expr, .. } => collect_property_paths(expr, properties),
1026 RuleExpr::Binary { left, right, .. } => {
1027 collect_property_paths(left, properties);
1028 collect_property_paths(right, properties);
1029 }
1030 RuleExpr::Conditional {
1031 condition,
1032 when_true,
1033 when_false,
1034 } => {
1035 collect_property_paths(condition, properties);
1036 collect_property_paths(when_true, properties);
1037 collect_property_paths(when_false, properties);
1038 }
1039 RuleExpr::Call { args, .. } => {
1040 for arg in args {
1041 collect_property_paths(arg, properties);
1042 }
1043 }
1044 RuleExpr::Bool { .. }
1045 | RuleExpr::Number { .. }
1046 | RuleExpr::String { .. }
1047 | RuleExpr::Null
1048 | RuleExpr::Unsupported { .. } => {}
1049 }
1050}
1051
1052fn builtin_source_for_flavour(
1053 flavour: &ValidationFlavour,
1054) -> Result<&'static GeneratedProfileSource> {
1055 let display = display_flavour(flavour)?;
1056 GENERATED_PROFILE_SOURCES
1057 .iter()
1058 .find(|source| source.display_flavour == display.as_str())
1059 .ok_or_else(|| ProfileError::UnsupportedSelection.into())
1060}
1061
1062pub fn display_flavour(flavour: &ValidationFlavour) -> Result<BoundedText> {
1069 let family = flavour.family.as_str();
1070 let conformance = flavour.conformance.as_str();
1071 let value = match family {
1072 "pdfa" if conformance == "none" => format!("pdfa-{}", flavour.part),
1073 "pdfa" => format!("pdfa-{}{}", flavour.part, conformance),
1074 "pdfua" if flavour.part.get() == 1 && conformance == "none" => String::from("pdfua-1"),
1075 "pdfua" if flavour.part.get() == 2 && conformance == "iso32005" => {
1076 String::from("pdfua-2-iso32005")
1077 }
1078 "wtpdf" if matches!(conformance, "reuse" | "accessibility") => {
1079 format!("wtpdf-1-0-{conformance}")
1080 }
1081 _ => return Err(ProfileError::UnsupportedSelection.into()),
1082 };
1083 Ok(BoundedText::new(value, 128)?)
1084}
1085
1086fn parse_display_flavour(value: &str) -> Result<ValidationFlavour> {
1087 if let Some(rest) = value.strip_prefix("pdfa-") {
1088 return parse_display_pdfa_flavour(rest);
1089 }
1090 if let Some(rest) = value.strip_prefix("pdfua-") {
1091 return parse_display_pdfua_flavour(rest);
1092 }
1093 if let Some(level) = value.strip_prefix("wtpdf-1-0-")
1094 && matches!(level, "reuse" | "accessibility")
1095 {
1096 return ValidationFlavour::new("wtpdf", NonZeroU32::MIN, level).map_err(Into::into);
1097 }
1098 Err(ProfileError::UnsupportedSelection.into())
1099}
1100
1101fn parse_display_pdfa_flavour(rest: &str) -> Result<ValidationFlavour> {
1102 let split_at = rest
1103 .find(|character: char| !character.is_ascii_digit())
1104 .unwrap_or(rest.len());
1105 let (part, conformance) = rest.split_at(split_at);
1106 let part = part
1107 .parse::<u32>()
1108 .map_err(|_| ProfileError::InvalidField {
1109 field: "flavour",
1110 reason: BoundedText::unchecked("PDF/A part is not numeric"),
1111 })?;
1112 let part = NonZeroU32::new(part).ok_or(ProfileError::InvalidField {
1113 field: "flavour",
1114 reason: BoundedText::unchecked("PDF/A part is zero"),
1115 })?;
1116 let conformance = if conformance.is_empty() {
1117 "none"
1118 } else {
1119 conformance
1120 };
1121 ValidationFlavour::new("pdfa", part, conformance).map_err(Into::into)
1122}
1123
1124fn parse_display_pdfua_flavour(rest: &str) -> Result<ValidationFlavour> {
1125 match rest {
1126 "1" => ValidationFlavour::new("pdfua", NonZeroU32::MIN, "none").map_err(Into::into),
1127 "2-iso32005" => ValidationFlavour::new(
1128 "pdfua",
1129 NonZeroU32::new(2).ok_or(ProfileError::UnsupportedSelection)?,
1130 "iso32005",
1131 )
1132 .map_err(Into::into),
1133 _ => Err(ProfileError::UnsupportedSelection.into()),
1134 }
1135}
1136
1137fn m4_profile(flavour: ValidationFlavour) -> Result<ValidationProfile> {
1138 Ok(ValidationProfile {
1139 identity: ProfileIdentity {
1140 id: Identifier::new("pdfv-m4")?,
1141 name: BoundedText::new("pdfv M4 built-in profile", 128)?,
1142 version: Some(Identifier::new("0.1.0")?),
1143 },
1144 flavour,
1145 rules: vec![
1146 rule(
1147 "m0-header-offset-zero",
1148 "document",
1149 "PDF header must start at byte zero",
1150 property_expr("headerOffset")?,
1151 BinaryOp::Eq,
1152 RuleExpr::Number { value: 0.0 },
1153 )?,
1154 rule(
1155 "m0-document-not-encrypted",
1156 "document",
1157 "Encrypted documents are not validated in M0",
1158 property_expr("encrypted")?,
1159 BinaryOp::Eq,
1160 RuleExpr::Bool { value: false },
1161 )?,
1162 rule(
1163 "m0-catalog-present",
1164 "document",
1165 "Trailer must reference a catalog",
1166 property_expr("hasCatalog")?,
1167 BinaryOp::Eq,
1168 RuleExpr::Bool { value: true },
1169 )?,
1170 rule(
1171 "m4-page-contents-present",
1172 "page",
1173 "Page dictionaries must contain contents",
1174 property_expr("hasContents")?,
1175 BinaryOp::Eq,
1176 RuleExpr::Bool { value: true },
1177 )?,
1178 rule(
1179 "m4-page-resources-present",
1180 "page",
1181 "Page dictionaries must contain resources",
1182 property_expr("hasResources")?,
1183 BinaryOp::Eq,
1184 RuleExpr::Bool { value: true },
1185 )?,
1186 rule(
1187 "m4-font-subtype-present",
1188 "font",
1189 "Font dictionaries must contain a Subtype entry",
1190 property_expr("hasSubtype")?,
1191 BinaryOp::Eq,
1192 RuleExpr::Bool { value: true },
1193 )?,
1194 rule(
1195 "m4-annotation-subtype-present",
1196 "annotation",
1197 "Annotation dictionaries must contain a Subtype entry",
1198 property_expr("hasSubtype")?,
1199 BinaryOp::Eq,
1200 RuleExpr::Bool { value: true },
1201 )?,
1202 rule(
1203 "m4-output-intent-profile-present",
1204 "outputIntent",
1205 "Output intent dictionaries must contain a destination output profile",
1206 property_expr("hasDestOutputProfile")?,
1207 BinaryOp::Eq,
1208 RuleExpr::Bool { value: true },
1209 )?,
1210 rule(
1211 "m4-content-stream-length-non-negative",
1212 "contentStream",
1213 "Page content streams must expose a non-negative declared or discovered length",
1214 property_expr("declaredLength")?,
1215 BinaryOp::Ge,
1216 RuleExpr::Number { value: 0.0 },
1217 )?,
1218 rule(
1219 "m0-stream-length-matches",
1220 "stream",
1221 "Stream declared length must match discovered length",
1222 property_expr("lengthMatches")?,
1223 BinaryOp::Eq,
1224 RuleExpr::Bool { value: true },
1225 )?,
1226 ],
1227 })
1228}
1229
1230fn rule(
1231 id: &str,
1232 object_type: &str,
1233 description: &str,
1234 left: RuleExpr,
1235 op: BinaryOp,
1236 right: RuleExpr,
1237) -> Result<Rule> {
1238 Ok(Rule {
1239 id: RuleId(Identifier::new(id)?),
1240 object_type: ObjectTypeName::new(object_type)?,
1241 deferred: false,
1242 tags: Vec::new(),
1243 description: BoundedText::new(description, 256)?,
1244 test: RuleExpr::Binary {
1245 op,
1246 left: Box::new(left),
1247 right: Box::new(right),
1248 },
1249 error: ErrorTemplate {
1250 message: BoundedText::new(description, 256)?,
1251 },
1252 references: Vec::new(),
1253 })
1254}
1255
1256fn property_expr(name: &str) -> Result<RuleExpr> {
1257 Ok(RuleExpr::Property {
1258 path: PropertyPath::new(vec![PropertyName(Identifier::new(name)?)]),
1259 })
1260}
1261
1262#[cfg(feature = "custom-profiles")]
1263#[allow(
1264 clippy::disallowed_methods,
1265 reason = "custom profile loading is a synchronous library API matching validate_path"
1266)]
1267fn load_verapdf_profile_path(path: &std::path::Path) -> Result<ProfileImportSummary> {
1268 let metadata = std::fs::metadata(path).map_err(|source| crate::PdfvError::Io {
1269 path: Some(path.to_path_buf()),
1270 source,
1271 })?;
1272 if metadata.len() > MAX_PROFILE_XML_BYTES {
1273 return Err(ProfileError::InvalidXml {
1274 reason: BoundedText::unchecked("profile XML exceeds byte limit"),
1275 }
1276 .into());
1277 }
1278 let xml = std::fs::read_to_string(path).map_err(|source| crate::PdfvError::Io {
1279 path: Some(path.to_path_buf()),
1280 source,
1281 })?;
1282 import_verapdf_profile_xml(&xml)
1283}
1284
1285pub fn import_verapdf_profile_xml(xml: &str) -> Result<ProfileImportSummary> {
1294 import_verapdf_profile_xml_impl(xml)
1295}
1296
1297#[allow(
1298 clippy::too_many_lines,
1299 reason = "event-driven XML import keeps parser state local and explicit"
1300)]
1301fn import_verapdf_profile_xml_impl(xml: &str) -> Result<ProfileImportSummary> {
1302 use quick_xml::{Reader, events::Event};
1303
1304 if u64::try_from(xml.len()).map_err(|_| ProfileError::InvalidXml {
1305 reason: BoundedText::unchecked("profile XML length overflow"),
1306 })? > MAX_PROFILE_XML_BYTES
1307 {
1308 return Err(ProfileError::InvalidXml {
1309 reason: BoundedText::unchecked("profile XML exceeds byte limit"),
1310 }
1311 .into());
1312 }
1313
1314 let mut reader = Reader::from_str(xml);
1315 reader.config_mut().trim_text(true);
1316 let mut elements = 0_u64;
1317 let mut profile_name: Option<BoundedText> = None;
1318 let mut flavour: Option<ValidationFlavour> = None;
1319 let mut rules = Vec::new();
1320 let mut current_rule: Option<XmlRuleBuilder> = None;
1321 let mut current_text = XmlTextTarget::None;
1322 let mut depth = 0_u32;
1323
1324 loop {
1325 let event = reader
1326 .read_event()
1327 .map_err(|error| ProfileError::InvalidXml {
1328 reason: BoundedText::new(error.to_string(), 512)
1329 .unwrap_or_else(|_| BoundedText::unchecked("XML parser error")),
1330 })?;
1331 match event {
1332 Event::Start(element) => {
1333 validate_element(&element)?;
1334 depth = depth.checked_add(1).ok_or(ProfileError::InvalidXml {
1335 reason: BoundedText::unchecked("profile XML depth overflow"),
1336 })?;
1337 if depth > MAX_PROFILE_XML_DEPTH {
1338 return Err(ProfileError::InvalidXml {
1339 reason: BoundedText::unchecked("profile XML exceeds depth limit"),
1340 }
1341 .into());
1342 }
1343 elements = elements.checked_add(1).ok_or(ProfileError::InvalidXml {
1344 reason: BoundedText::unchecked("profile XML element count overflow"),
1345 })?;
1346 if elements > MAX_PROFILE_XML_ELEMENTS {
1347 return Err(ProfileError::InvalidXml {
1348 reason: BoundedText::unchecked("profile XML exceeds element limit"),
1349 }
1350 .into());
1351 }
1352 match element.name().as_ref() {
1353 b"profile" => {
1354 flavour = profile_flavour_attr(&element)?;
1355 }
1356 b"rule" => {
1357 if rules.len() >= MAX_PROFILE_RULES {
1358 return Err(ProfileError::InvalidXml {
1359 reason: BoundedText::unchecked("profile XML exceeds rule limit"),
1360 }
1361 .into());
1362 }
1363 current_rule = Some(XmlRuleBuilder::from_rule_start(&element)?);
1364 }
1365 b"name" if current_rule.is_none() => current_text = XmlTextTarget::ProfileName,
1366 b"description" if current_rule.is_some() => {
1367 current_text = XmlTextTarget::RuleDescription;
1368 }
1369 b"test" if current_rule.is_some() => current_text = XmlTextTarget::RuleTest,
1370 b"message" if current_rule.is_some() => {
1371 current_text = XmlTextTarget::RuleMessage;
1372 }
1373 b"id" if current_rule.is_some() => {
1374 if let Some(rule) = current_rule.as_mut() {
1375 rule.id = Some(rule_id_from_attrs(&element)?);
1376 }
1377 }
1378 b"reference" if current_rule.is_some() => {
1379 if let Some(rule) = current_rule.as_mut() {
1380 rule.references.push(reference_from_attrs(&element)?);
1381 }
1382 }
1383 _ => {}
1384 }
1385 }
1386 Event::Text(text) => {
1387 let decoded = text.decode().map_err(|error| ProfileError::InvalidXml {
1388 reason: BoundedText::new(error.to_string(), 512)
1389 .unwrap_or_else(|_| BoundedText::unchecked("XML text decode error")),
1390 })?;
1391 let bounded = BoundedText::new(decoded.into_owned(), MAX_PROFILE_STRING_BYTES)?;
1392 match current_text {
1393 XmlTextTarget::ProfileName => profile_name = Some(bounded),
1394 XmlTextTarget::RuleDescription => {
1395 if let Some(rule) = current_rule.as_mut() {
1396 rule.description = Some(bounded);
1397 }
1398 }
1399 XmlTextTarget::RuleTest => {
1400 if let Some(rule) = current_rule.as_mut() {
1401 rule.test = Some(bounded);
1402 }
1403 }
1404 XmlTextTarget::RuleMessage => {
1405 if let Some(rule) = current_rule.as_mut() {
1406 rule.message = Some(bounded);
1407 }
1408 }
1409 XmlTextTarget::None => {}
1410 }
1411 }
1412 Event::End(element) => {
1413 match element.name().as_ref() {
1414 b"name" | b"description" | b"test" | b"message" => {
1415 current_text = XmlTextTarget::None;
1416 }
1417 b"rule" => {
1418 let Some(builder) = current_rule.take() else {
1419 return Err(ProfileError::InvalidXml {
1420 reason: BoundedText::unchecked("closing rule without start"),
1421 }
1422 .into());
1423 };
1424 rules.push(builder.finish()?);
1425 }
1426 _ => {}
1427 }
1428 depth = depth.checked_sub(1).ok_or(ProfileError::InvalidXml {
1429 reason: BoundedText::unchecked("profile XML depth underflow"),
1430 })?;
1431 }
1432 Event::Empty(element) => {
1433 validate_element(&element)?;
1434 elements = elements.checked_add(1).ok_or(ProfileError::InvalidXml {
1435 reason: BoundedText::unchecked("profile XML element count overflow"),
1436 })?;
1437 if elements > MAX_PROFILE_XML_ELEMENTS {
1438 return Err(ProfileError::InvalidXml {
1439 reason: BoundedText::unchecked("profile XML exceeds element limit"),
1440 }
1441 .into());
1442 }
1443 if element.name().as_ref() == b"id"
1444 && let Some(rule) = current_rule.as_mut()
1445 {
1446 rule.id = Some(rule_id_from_attrs(&element)?);
1447 }
1448 if element.name().as_ref() == b"reference"
1449 && let Some(rule) = current_rule.as_mut()
1450 {
1451 rule.references.push(reference_from_attrs(&element)?);
1452 }
1453 }
1454 Event::Eof => break,
1455 _ => {}
1456 }
1457 }
1458
1459 let flavour = flavour.ok_or(ProfileError::InvalidXml {
1460 reason: BoundedText::unchecked("profile flavour is missing"),
1461 })?;
1462 let profile_id = profile_id_for_flavour(&flavour)?;
1463 let mut supported_rules = 0_u64;
1464 let mut unsupported_rules = 0_u64;
1465 for rule in &rules {
1466 if matches!(rule.test, RuleExpr::Unsupported { .. }) {
1467 unsupported_rules = unsupported_rules.saturating_add(1);
1468 } else {
1469 supported_rules = supported_rules.saturating_add(1);
1470 }
1471 }
1472
1473 Ok(ProfileImportSummary {
1474 profile: ValidationProfile {
1475 identity: ProfileIdentity {
1476 id: profile_id,
1477 name: profile_name.unwrap_or_else(|| BoundedText::unchecked("veraPDF profile")),
1478 version: Some(Identifier::new("verapdf-xml")?),
1479 },
1480 flavour,
1481 rules,
1482 },
1483 supported_rules,
1484 unsupported_rules,
1485 })
1486}
1487
1488#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1489enum XmlTextTarget {
1490 None,
1491 ProfileName,
1492 RuleDescription,
1493 RuleTest,
1494 RuleMessage,
1495}
1496
1497#[derive(Debug, Default)]
1498struct XmlRuleBuilder {
1499 object_type: Option<ObjectTypeName>,
1500 unsupported_reason: Option<BoundedText>,
1501 deferred: bool,
1502 id: Option<RuleId>,
1503 description: Option<BoundedText>,
1504 test: Option<BoundedText>,
1505 message: Option<BoundedText>,
1506 references: Vec<SpecReference>,
1507}
1508
1509impl XmlRuleBuilder {
1510 fn from_rule_start(element: &quick_xml::events::BytesStart<'_>) -> Result<Self> {
1511 let source_object_type = required_attr(element, b"object")?;
1512 let (object_type, unsupported_reason) = map_verapdf_object_type(&source_object_type)?;
1513 Ok(Self {
1514 object_type: Some(object_type),
1515 unsupported_reason,
1516 deferred: optional_bool_attr(element, b"deferred")?,
1517 ..Self::default()
1518 })
1519 }
1520
1521 fn finish(self) -> Result<Rule> {
1522 let id = self.id.ok_or(ProfileError::InvalidXml {
1523 reason: BoundedText::unchecked("rule id is missing"),
1524 })?;
1525 let object_type = self.object_type.ok_or(ProfileError::InvalidXml {
1526 reason: BoundedText::unchecked("rule object type is missing"),
1527 })?;
1528 let description = self
1529 .description
1530 .unwrap_or_else(|| BoundedText::unchecked("Imported veraPDF rule"));
1531 let source_test = self.test.ok_or(ProfileError::InvalidXml {
1532 reason: BoundedText::unchecked("rule test is missing"),
1533 })?;
1534 let test = if let Some(reason) = self.unsupported_reason {
1535 RuleExpr::Unsupported {
1536 fragment: source_test.clone(),
1537 reason,
1538 }
1539 } else {
1540 parse_imported_expr(source_test.as_str()).unwrap_or_else(|reason| {
1541 RuleExpr::Unsupported {
1542 fragment: source_test.clone(),
1543 reason,
1544 }
1545 })
1546 };
1547 let message = self.message.unwrap_or_else(|| description.clone());
1548 Ok(Rule {
1549 id,
1550 object_type,
1551 deferred: self.deferred,
1552 tags: Vec::new(),
1553 description,
1554 test,
1555 error: ErrorTemplate { message },
1556 references: self.references,
1557 })
1558 }
1559}
1560
1561fn validate_element(element: &quick_xml::events::BytesStart<'_>) -> Result<()> {
1562 let name = element.name();
1563 let name = name.as_ref();
1564 if !matches!(
1565 name,
1566 b"profile"
1567 | b"details"
1568 | b"name"
1569 | b"description"
1570 | b"hash"
1571 | b"rules"
1572 | b"rule"
1573 | b"id"
1574 | b"test"
1575 | b"error"
1576 | b"message"
1577 | b"arguments"
1578 | b"argument"
1579 | b"references"
1580 | b"reference"
1581 | b"variables"
1582 | b"variable"
1583 | b"defaultValue"
1584 | b"value"
1585 ) {
1586 return Err(ProfileError::InvalidXml {
1587 reason: BoundedText::new(
1588 format!(
1589 "unknown profile XML element {}",
1590 String::from_utf8_lossy(name)
1591 ),
1592 512,
1593 )
1594 .unwrap_or_else(|_| BoundedText::unchecked("unknown profile XML element")),
1595 }
1596 .into());
1597 }
1598 let mut attributes = 0_usize;
1599 for attr in element.attributes().with_checks(true) {
1600 let attr = attr.map_err(|error| ProfileError::InvalidXml {
1601 reason: BoundedText::new(error.to_string(), 512)
1602 .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1603 })?;
1604 attributes = attributes.checked_add(1).ok_or(ProfileError::InvalidXml {
1605 reason: BoundedText::unchecked("profile XML attribute count overflow"),
1606 })?;
1607 if attributes > MAX_PROFILE_XML_ATTRIBUTES {
1608 return Err(ProfileError::InvalidXml {
1609 reason: BoundedText::unchecked("profile XML exceeds attribute limit"),
1610 }
1611 .into());
1612 }
1613 validate_attribute(name, attr.key.as_ref())?;
1614 }
1615 Ok(())
1616}
1617
1618fn validate_attribute(element: &[u8], attr: &[u8]) -> Result<()> {
1619 let allowed = match element {
1620 b"profile" => matches!(attr, b"flavour" | b"xmlns"),
1621 b"details" => matches!(attr, b"creator" | b"created"),
1622 b"rule" => matches!(attr, b"object" | b"deferred" | b"tags"),
1623 b"id" => matches!(attr, b"specification" | b"clause" | b"testNumber"),
1624 b"reference" => matches!(attr, b"specification" | b"clause"),
1625 b"variable" => matches!(attr, b"name" | b"object"),
1626 _ => false,
1627 };
1628 if allowed {
1629 Ok(())
1630 } else {
1631 Err(ProfileError::InvalidXml {
1632 reason: BoundedText::new(
1633 format!(
1634 "unknown profile XML attribute {}",
1635 String::from_utf8_lossy(attr)
1636 ),
1637 512,
1638 )
1639 .unwrap_or_else(|_| BoundedText::unchecked("unknown profile XML attribute")),
1640 }
1641 .into())
1642 }
1643}
1644
1645fn profile_flavour_attr(
1646 element: &quick_xml::events::BytesStart<'_>,
1647) -> Result<Option<ValidationFlavour>> {
1648 for attr in element.attributes().with_checks(true) {
1649 let attr = attr.map_err(|error| ProfileError::InvalidXml {
1650 reason: BoundedText::new(error.to_string(), 512)
1651 .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1652 })?;
1653 if attr.key.as_ref() == b"flavour" {
1654 let value = String::from_utf8_lossy(attr.value.as_ref()).into_owned();
1655 return Ok(Some(parse_verapdf_flavour(&value)?));
1656 }
1657 }
1658 Ok(None)
1659}
1660
1661fn required_attr(element: &quick_xml::events::BytesStart<'_>, name: &[u8]) -> Result<String> {
1662 for attr in element.attributes().with_checks(true) {
1663 let attr = attr.map_err(|error| ProfileError::InvalidXml {
1664 reason: BoundedText::new(error.to_string(), 512)
1665 .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1666 })?;
1667 if attr.key.as_ref() == name {
1668 return Ok(String::from_utf8_lossy(attr.value.as_ref()).into_owned());
1669 }
1670 }
1671 Err(ProfileError::InvalidXml {
1672 reason: BoundedText::unchecked("required XML attribute is missing"),
1673 }
1674 .into())
1675}
1676
1677fn optional_bool_attr(element: &quick_xml::events::BytesStart<'_>, name: &[u8]) -> Result<bool> {
1678 for attr in element.attributes().with_checks(true) {
1679 let attr = attr.map_err(|error| ProfileError::InvalidXml {
1680 reason: BoundedText::new(error.to_string(), 512)
1681 .unwrap_or_else(|_| BoundedText::unchecked("XML attribute error")),
1682 })?;
1683 if attr.key.as_ref() == name {
1684 return match attr.value.as_ref() {
1685 b"true" => Ok(true),
1686 b"false" => Ok(false),
1687 _ => Err(ProfileError::InvalidField {
1688 field: "deferred",
1689 reason: BoundedText::unchecked("expected true or false"),
1690 }
1691 .into()),
1692 };
1693 }
1694 }
1695 Ok(false)
1696}
1697
1698fn rule_id_from_attrs(element: &quick_xml::events::BytesStart<'_>) -> Result<RuleId> {
1699 let specification = required_attr(element, b"specification")?;
1700 let clause = required_attr(element, b"clause")?;
1701 let test_number = required_attr(element, b"testNumber")?;
1702 let text = format!(
1703 "{}-{}-{}",
1704 identifier_fragment(&specification),
1705 identifier_fragment(&clause),
1706 identifier_fragment(&test_number)
1707 );
1708 Ok(RuleId(Identifier::new(text)?))
1709}
1710
1711fn reference_from_attrs(element: &quick_xml::events::BytesStart<'_>) -> Result<SpecReference> {
1712 Ok(SpecReference {
1713 specification: BoundedText::new(required_attr(element, b"specification")?, 512)?,
1714 clause: BoundedText::new(required_attr(element, b"clause")?, 512)?,
1715 })
1716}
1717
1718fn identifier_fragment(value: &str) -> String {
1719 value
1720 .chars()
1721 .map(|character| {
1722 if character.is_ascii_alphanumeric() {
1723 character.to_ascii_lowercase()
1724 } else {
1725 '-'
1726 }
1727 })
1728 .collect::<String>()
1729 .trim_matches('-')
1730 .to_owned()
1731}
1732
1733fn parse_verapdf_flavour(value: &str) -> Result<ValidationFlavour> {
1734 let parts = value.split('_').collect::<Vec<_>>();
1735 let Some(family) = parts.first().copied() else {
1736 return Err(ProfileError::InvalidField {
1737 field: "flavour",
1738 reason: BoundedText::unchecked("profile flavour is empty"),
1739 }
1740 .into());
1741 };
1742 match family {
1743 "PDFA" => parse_numbered_flavour("pdfa", &parts, "none"),
1744 "PDFUA" => parse_pdfua_xml_flavour(&parts),
1745 "WTPDF" => parse_wtpdf_flavour(&parts),
1746 _ => Err(ProfileError::InvalidField {
1747 field: "flavour",
1748 reason: BoundedText::unchecked("unsupported profile flavour family"),
1749 }
1750 .into()),
1751 }
1752}
1753
1754fn parse_pdfua_xml_flavour(parts: &[&str]) -> Result<ValidationFlavour> {
1755 let part = parts
1756 .get(1)
1757 .ok_or(ProfileError::InvalidField {
1758 field: "flavour",
1759 reason: BoundedText::unchecked("missing PDF/UA part"),
1760 })?
1761 .parse::<u32>()
1762 .map_err(|_| ProfileError::InvalidField {
1763 field: "flavour",
1764 reason: BoundedText::unchecked("PDF/UA part is not numeric"),
1765 })?;
1766 let part = NonZeroU32::new(part).ok_or(ProfileError::InvalidField {
1767 field: "flavour",
1768 reason: BoundedText::unchecked("PDF/UA part is zero"),
1769 })?;
1770 let conformance = if part.get() == 2 { "iso32005" } else { "none" };
1771 ValidationFlavour::new("pdfua", part, conformance).map_err(Into::into)
1772}
1773
1774fn parse_numbered_flavour(
1775 family: &str,
1776 parts: &[&str],
1777 default_conformance: &str,
1778) -> Result<ValidationFlavour> {
1779 let part = parts
1780 .get(1)
1781 .ok_or(ProfileError::InvalidField {
1782 field: "flavour",
1783 reason: BoundedText::unchecked("missing flavour part"),
1784 })?
1785 .parse::<u32>()
1786 .map_err(|_| ProfileError::InvalidField {
1787 field: "flavour",
1788 reason: BoundedText::unchecked("flavour part is not numeric"),
1789 })?;
1790 let part = NonZeroU32::new(part).ok_or(ProfileError::InvalidField {
1791 field: "flavour",
1792 reason: BoundedText::unchecked("flavour part is zero"),
1793 })?;
1794 let conformance = parts
1795 .get(2)
1796 .copied()
1797 .unwrap_or(default_conformance)
1798 .to_ascii_lowercase();
1799 ValidationFlavour::new(family, part, conformance).map_err(Into::into)
1800}
1801
1802fn parse_wtpdf_flavour(parts: &[&str]) -> Result<ValidationFlavour> {
1803 if parts.len() != 4 || parts.get(1).copied() != Some("1") || parts.get(2).copied() != Some("0")
1804 {
1805 return Err(ProfileError::InvalidField {
1806 field: "flavour",
1807 reason: BoundedText::unchecked("expected WTPDF_1_0_<level>"),
1808 }
1809 .into());
1810 }
1811 let conformance = parts
1812 .get(3)
1813 .ok_or(ProfileError::InvalidField {
1814 field: "flavour",
1815 reason: BoundedText::unchecked("missing WTPDF level"),
1816 })?
1817 .to_ascii_lowercase();
1818 ValidationFlavour::new("wtpdf", NonZeroU32::MIN, conformance).map_err(Into::into)
1819}
1820
1821fn profile_id_for_flavour(flavour: &ValidationFlavour) -> Result<Identifier> {
1822 let display = display_flavour(flavour)?;
1823 Identifier::new(format!("verapdf-{}", display.as_str())).map_err(Into::into)
1824}
1825
1826#[allow(
1827 clippy::too_many_lines,
1828 reason = "veraPDF object taxonomy mapping is intentionally centralized for schema checks"
1829)]
1830fn map_verapdf_object_type(value: &str) -> Result<(ObjectTypeName, Option<BoundedText>)> {
1831 let mapped = match value {
1832 "CosDocument" | "PDDocument" | "CosXRef" | "CosTrailer" | "CosIndirect" | "CosInfo" => {
1833 Some("document")
1834 }
1835 "CosStream" => Some("stream"),
1836 "CosArray"
1837 | "CosDict"
1838 | "CosInteger"
1839 | "CosName"
1840 | "CosReal"
1841 | "CosString"
1842 | "CosTextString"
1843 | "CosUnicodeName"
1844 | "CosLang"
1845 | "CosBBox"
1846 | "CosActualText"
1847 | "CosAlt"
1848 | "CosBM"
1849 | "CosRenderingIntent"
1850 | "CosFileSpecification"
1851 | "CosFilter"
1852 | "CosIIFilter" => Some("object"),
1853 "GFCosMetadata"
1854 | "PDMetadata"
1855 | "Metadata"
1856 | "PDFAIdentification"
1857 | "PDFUAIdentification"
1858 | "XMPPackage"
1859 | "MainXMPPackage"
1860 | "XMPProperty"
1861 | "XMPLangAlt"
1862 | "ExtensionSchemaValueType"
1863 | "ExtensionSchemaDefinition"
1864 | "ExtensionSchemaProperty"
1865 | "ExtensionSchemaField"
1866 | "ExtensionSchemasContainer"
1867 | "ExtensionSchemaObject" => Some("metadata"),
1868 "PDCatalog" | "Catalog" => Some("catalog"),
1869 "PDPage" | "Page" => Some("page"),
1870 "PDFont"
1871 | "Font"
1872 | "PDSimpleFont"
1873 | "PDTrueTypeFont"
1874 | "PDType0Font"
1875 | "PDType1Font"
1876 | "PDCIDFont"
1877 | "TrueTypeFontProgram"
1878 | "Glyph" => Some("font"),
1879 "PDCMap" | "PDReferencedCMap" | "CMapFile" => Some("cMap"),
1880 "EmbeddedFile" => Some("embeddedFontFile"),
1881 "PDAnnotation"
1882 | "Annotation"
1883 | "PDAnnot"
1884 | "PDWidgetAnnot"
1885 | "PDLinkAnnot"
1886 | "PDMarkupAnnot"
1887 | "PDTrapNetAnnot"
1888 | "PDPrinterMarkAnnot"
1889 | "PDWatermarkAnnot"
1890 | "PDSoundAnnot"
1891 | "PDScreenAnnot"
1892 | "PDPopupAnnot"
1893 | "PDMovieAnnot"
1894 | "PDFileAttachmentAnnot"
1895 | "PDRubberStampAnnot"
1896 | "PDRichMediaAnnot"
1897 | "PD3DAnnot"
1898 | "PDInkAnnot" => Some("annotation"),
1899 "PDAction" | "PDNamedAction" | "PDGoToAction" | "PDAdditionalActions" => Some("action"),
1900 "PDAcroForm" => Some("acroForm"),
1901 "PDFormField" | "PDTextField" => Some("formField"),
1902 "OutputIntents" | "OutputIntent" | "PDOutputIntent" => Some("outputIntent"),
1903 "PDXObject" | "PDXForm" | "PD3DStream" | "PDMediaClip" | "PDRichMedia" => Some("xObject"),
1904 "PDXImage" | "JPEG2000" | "PDMaskImage" => Some("image"),
1905 "PDContentStream" | "Op_q_gsave" => Some("contentStream"),
1906 "Op_Undefined" => Some("undefinedOperator"),
1907 "PDOCConfig" => Some("optionalContentProperties"),
1908 "PDPerms" => Some("permissions"),
1909 "PDOutline" => Some("outline"),
1910 "PDDestination" => Some("destination"),
1911 "PDExtGState" => Some("extGState"),
1912 "PDDeviceN" | "PDICCBasedCMYK" | "PDDeviceRGB" | "PDDeviceGray" | "PDDeviceCMYK"
1913 | "PDSeparation" | "PDHalftone" | "PDGroup" | "ICCProfile" | "ICCOutputProfile"
1914 | "ICCInputProfile" => Some("colorSpace"),
1915 "PDStructTreeRoot" => Some("structureTreeRoot"),
1916 "PDStructElem"
1917 | "SEDocument"
1918 | "SEDocumentFragment"
1919 | "SEPart"
1920 | "SEArt"
1921 | "SESect"
1922 | "SEDiv"
1923 | "SEBlockQuote"
1924 | "SECaption"
1925 | "SETOC"
1926 | "SETOCI"
1927 | "SEIndex"
1928 | "SENonStruct"
1929 | "SEPrivate"
1930 | "SEP"
1931 | "SEH"
1932 | "SEHn"
1933 | "SEH1"
1934 | "SEH2"
1935 | "SEH3"
1936 | "SEH4"
1937 | "SEH5"
1938 | "SEH6"
1939 | "SEL"
1940 | "SELI"
1941 | "SELbl"
1942 | "SELBody"
1943 | "SETable"
1944 | "SETR"
1945 | "SETH"
1946 | "SETD"
1947 | "SETHead"
1948 | "SETBody"
1949 | "SETFoot"
1950 | "SESpan"
1951 | "SEQuote"
1952 | "SENote"
1953 | "SEReference"
1954 | "SEBibEntry"
1955 | "SECode"
1956 | "SELink"
1957 | "SEAnnot"
1958 | "SERuby"
1959 | "SEWarichu"
1960 | "SEFigure"
1961 | "SEFormula"
1962 | "SEForm"
1963 | "SEArtifact"
1964 | "SEStrong"
1965 | "SEEm"
1966 | "SETitle"
1967 | "SEFENote"
1968 | "SEAside"
1969 | "SESub"
1970 | "SEMathMLStructElem"
1971 | "SEMarkedContent"
1972 | "SESimpleContentItem"
1973 | "SEGraphicContentItem"
1974 | "SETableCell"
1975 | "SENonStandard"
1976 | "SETextItem"
1977 | "SEWT"
1978 | "SEWP"
1979 | "SERT"
1980 | "SERP"
1981 | "SERB" => Some("structureElement"),
1982 "PDSignature" | "PDSigRef" | "PKCSDataObject" => Some("signature"),
1983 "PDEncryption" => Some("security"),
1984 _ => None,
1985 };
1986 if let Some(mapped) = mapped {
1987 Ok((ObjectTypeName::new(mapped)?, None))
1988 } else {
1989 Ok((
1990 ObjectTypeName::new("document")?,
1991 Some(BoundedText::new(
1992 format!("unsupported veraPDF object type {value}"),
1993 512,
1994 )?),
1995 ))
1996 }
1997}
1998
1999fn parse_imported_expr(input: &str) -> std::result::Result<RuleExpr, BoundedText> {
2000 let mut parser = ExprParser::new(input);
2001 let expr = parser.parse_conditional()?;
2002 if matches!(expr, RuleExpr::Unsupported { .. }) {
2003 return Ok(expr);
2004 }
2005 parser.skip_ws();
2006 if parser.remaining().is_empty() {
2007 Ok(expr)
2008 } else {
2009 Err(BoundedText::unchecked("trailing expression input"))
2010 }
2011}
2012
2013#[derive(Debug)]
2014struct ExprParser<'a> {
2015 input: &'a str,
2016 offset: usize,
2017}
2018
2019impl<'a> ExprParser<'a> {
2020 fn new(input: &'a str) -> Self {
2021 Self { input, offset: 0 }
2022 }
2023
2024 fn remaining(&self) -> &'a str {
2025 &self.input[self.offset..]
2026 }
2027
2028 fn skip_ws(&mut self) {
2029 while self
2030 .remaining()
2031 .as_bytes()
2032 .first()
2033 .is_some_and(u8::is_ascii_whitespace)
2034 {
2035 self.offset = self.offset.saturating_add(1);
2036 }
2037 }
2038
2039 fn consume(&mut self, token: &str) -> bool {
2040 self.skip_ws();
2041 if self.remaining().starts_with(token) {
2042 self.offset = self.offset.saturating_add(token.len());
2043 true
2044 } else {
2045 false
2046 }
2047 }
2048
2049 fn parse_conditional(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2050 let condition = self.parse_or()?;
2051 if self.consume("?") {
2052 let when_true = self.parse_conditional()?;
2053 if !self.consume(":") {
2054 return Err(BoundedText::unchecked("missing ternary separator"));
2055 }
2056 let when_false = self.parse_conditional()?;
2057 Ok(RuleExpr::Conditional {
2058 condition: Box::new(condition),
2059 when_true: Box::new(when_true),
2060 when_false: Box::new(when_false),
2061 })
2062 } else {
2063 Ok(condition)
2064 }
2065 }
2066
2067 fn parse_or(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2068 let mut expr = self.parse_and()?;
2069 while self.consume("||") {
2070 let right = self.parse_and()?;
2071 expr = RuleExpr::Binary {
2072 op: BinaryOp::Or,
2073 left: Box::new(expr),
2074 right: Box::new(right),
2075 };
2076 }
2077 Ok(expr)
2078 }
2079
2080 fn parse_and(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2081 let mut expr = self.parse_comparison()?;
2082 while self.consume("&&") {
2083 let right = self.parse_comparison()?;
2084 expr = RuleExpr::Binary {
2085 op: BinaryOp::And,
2086 left: Box::new(expr),
2087 right: Box::new(right),
2088 };
2089 }
2090 Ok(expr)
2091 }
2092
2093 fn parse_comparison(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2094 let left = self.parse_additive()?;
2095 let op = if self.consume("==") {
2096 Some(BinaryOp::Eq)
2097 } else if self.consume("!=") {
2098 Some(BinaryOp::Ne)
2099 } else if self.consume("<=") {
2100 Some(BinaryOp::Le)
2101 } else if self.consume(">=") {
2102 Some(BinaryOp::Ge)
2103 } else if self.consume("<") {
2104 Some(BinaryOp::Lt)
2105 } else if self.consume(">") {
2106 Some(BinaryOp::Gt)
2107 } else {
2108 None
2109 };
2110 if let Some(op) = op {
2111 let right = self.parse_additive()?;
2112 Ok(RuleExpr::Binary {
2113 op,
2114 left: Box::new(left),
2115 right: Box::new(right),
2116 })
2117 } else {
2118 Ok(left)
2119 }
2120 }
2121
2122 fn parse_additive(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2123 let mut expr = self.parse_multiplicative()?;
2124 loop {
2125 let op = if self.consume("+") {
2126 Some(BinaryOp::Add)
2127 } else if self.consume("-") {
2128 Some(BinaryOp::Sub)
2129 } else {
2130 None
2131 };
2132 let Some(op) = op else {
2133 return Ok(expr);
2134 };
2135 expr = RuleExpr::Binary {
2136 op,
2137 left: Box::new(expr),
2138 right: Box::new(self.parse_multiplicative()?),
2139 };
2140 }
2141 }
2142
2143 fn parse_multiplicative(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2144 let mut expr = self.parse_unary()?;
2145 loop {
2146 let op = if self.consume("*") {
2147 Some(BinaryOp::Mul)
2148 } else if self.consume("/") {
2149 Some(BinaryOp::Div)
2150 } else if self.consume("%") {
2151 Some(BinaryOp::Rem)
2152 } else {
2153 None
2154 };
2155 let Some(op) = op else {
2156 return Ok(expr);
2157 };
2158 expr = RuleExpr::Binary {
2159 op,
2160 left: Box::new(expr),
2161 right: Box::new(self.parse_unary()?),
2162 };
2163 }
2164 }
2165
2166 fn parse_unary(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2167 self.skip_ws();
2168 if self.consume("!") {
2169 return Ok(RuleExpr::Unary {
2170 op: UnaryOp::Not,
2171 expr: Box::new(self.parse_unary()?),
2172 });
2173 }
2174 self.parse_postfix()
2175 }
2176
2177 fn parse_postfix(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2178 let expr = self.parse_primary()?;
2179 self.skip_ws();
2180 if self.consume(".") {
2181 if self.consume("length") && self.consume("(") && self.consume(")") {
2182 return Ok(RuleExpr::Call {
2183 function: BuiltinFunction::Size,
2184 args: vec![expr],
2185 });
2186 }
2187 if self.consume("test") && self.consume("(") {
2188 let arg = self.parse_conditional()?;
2189 if !self.consume(")") {
2190 return Err(BoundedText::unchecked("missing call closing parenthesis"));
2191 }
2192 return Ok(RuleExpr::Call {
2193 function: BuiltinFunction::Matches,
2194 args: vec![expr, arg],
2195 });
2196 }
2197 return Ok(RuleExpr::Unsupported {
2198 fragment: BoundedText::new(self.input, MAX_PROFILE_STRING_BYTES)
2199 .map_err(|_| BoundedText::unchecked("expression exceeds limit"))?,
2200 reason: BoundedText::unchecked(
2201 "nested property path has no bound model link in this phase",
2202 ),
2203 });
2204 }
2205 Ok(expr)
2206 }
2207
2208 fn parse_primary(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2209 self.skip_ws();
2210 if self.consume("(") {
2211 let expr = self.parse_conditional()?;
2212 if !self.consume(")") {
2213 return Err(BoundedText::unchecked("missing closing parenthesis"));
2214 }
2215 return Ok(expr);
2216 }
2217 if self.remaining().starts_with('/') {
2218 return self.parse_regex_literal();
2219 }
2220 if self.remaining().starts_with('"') || self.remaining().starts_with('\'') {
2221 return self.parse_string();
2222 }
2223 if self.remaining().starts_with("true") {
2224 self.offset = self.offset.saturating_add(4);
2225 return Ok(RuleExpr::Bool { value: true });
2226 }
2227 if self.remaining().starts_with("false") {
2228 self.offset = self.offset.saturating_add(5);
2229 return Ok(RuleExpr::Bool { value: false });
2230 }
2231 if self.remaining().starts_with("null") {
2232 self.offset = self.offset.saturating_add(4);
2233 return Ok(RuleExpr::Null);
2234 }
2235 if self
2236 .remaining()
2237 .as_bytes()
2238 .first()
2239 .is_some_and(|byte| byte.is_ascii_digit() || *byte == b'-')
2240 {
2241 return self.parse_number();
2242 }
2243 self.parse_property()
2244 }
2245
2246 fn parse_string(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2247 let quote = *self
2248 .remaining()
2249 .as_bytes()
2250 .first()
2251 .ok_or_else(|| BoundedText::unchecked("expected string quote"))?;
2252 self.offset = self.offset.saturating_add(1);
2253 let start = self.offset;
2254 while let Some(byte) = self.remaining().as_bytes().first() {
2255 if *byte == quote {
2256 let value = &self.input[start..self.offset];
2257 self.offset = self.offset.saturating_add(1);
2258 return Ok(RuleExpr::String {
2259 value: BoundedText::new(value, MAX_PROFILE_STRING_BYTES)
2260 .map_err(|_| BoundedText::unchecked("string literal exceeds limit"))?,
2261 });
2262 }
2263 self.offset = self.offset.saturating_add(1);
2264 }
2265 Err(BoundedText::unchecked("unterminated string literal"))
2266 }
2267
2268 fn parse_regex_literal(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2269 self.offset = self.offset.saturating_add(1);
2270 let start = self.offset;
2271 let mut escaped = false;
2272 while let Some(byte) = self.remaining().as_bytes().first() {
2273 if *byte == b'/' && !escaped {
2274 let value = &self.input[start..self.offset];
2275 self.offset = self.offset.saturating_add(1);
2276 return Ok(RuleExpr::String {
2277 value: BoundedText::new(value, MAX_REGEX_PATTERN_BYTES)
2278 .map_err(|_| BoundedText::unchecked("regex literal exceeds limit"))?,
2279 });
2280 }
2281 escaped = *byte == b'\\' && !escaped;
2282 if *byte != b'\\' {
2283 escaped = false;
2284 }
2285 self.offset = self.offset.saturating_add(1);
2286 }
2287 Err(BoundedText::unchecked("unterminated regex literal"))
2288 }
2289
2290 fn parse_number(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2291 let start = self.offset;
2292 while let Some(byte) = self.remaining().as_bytes().first() {
2293 if byte.is_ascii_digit() || matches!(*byte, b'-' | b'.') {
2294 self.offset = self.offset.saturating_add(1);
2295 } else {
2296 break;
2297 }
2298 }
2299 let value = self.input[start..self.offset]
2300 .parse::<f64>()
2301 .map_err(|_| BoundedText::unchecked("invalid number literal"))?;
2302 Ok(RuleExpr::Number { value })
2303 }
2304
2305 fn parse_property(&mut self) -> std::result::Result<RuleExpr, BoundedText> {
2306 let first = self.parse_identifier()?;
2307 if self.consume("(") {
2308 let mut args = Vec::new();
2309 if !self.consume(")") {
2310 loop {
2311 args.push(self.parse_conditional()?);
2312 if self.consume(")") {
2313 break;
2314 }
2315 if !self.consume(",") {
2316 return Err(BoundedText::unchecked(
2317 "missing function argument separator",
2318 ));
2319 }
2320 }
2321 }
2322 return Ok(RuleExpr::Call {
2323 function: builtin_function(&first)?,
2324 args,
2325 });
2326 }
2327 let parts = vec![property_name_from_source(&first)?];
2328 if self.consume(".") {
2329 let _member = self.parse_identifier()?;
2330 return Ok(RuleExpr::Unsupported {
2331 fragment: BoundedText::new(self.input, MAX_PROFILE_STRING_BYTES)
2332 .map_err(|_| BoundedText::unchecked("expression exceeds limit"))?,
2333 reason: BoundedText::unchecked(
2334 "nested property path has no bound model link in this phase",
2335 ),
2336 });
2337 }
2338 Ok(RuleExpr::Property {
2339 path: PropertyPath::new(parts),
2340 })
2341 }
2342
2343 fn parse_identifier(&mut self) -> std::result::Result<String, BoundedText> {
2344 let start = self.offset;
2345 while let Some(byte) = self.remaining().as_bytes().first() {
2346 if byte.is_ascii_alphanumeric() || *byte == b'_' {
2347 self.offset = self.offset.saturating_add(1);
2348 } else {
2349 break;
2350 }
2351 }
2352 if start == self.offset {
2353 return Err(BoundedText::unchecked("expected expression"));
2354 }
2355 Ok(self.input[start..self.offset].to_owned())
2356 }
2357}
2358
2359fn property_name_from_source(value: &str) -> std::result::Result<PropertyName, BoundedText> {
2360 PropertyName::new(map_verapdf_property(value))
2361 .map_err(|_| BoundedText::unchecked("invalid property"))
2362}
2363
2364fn builtin_function(value: &str) -> std::result::Result<BuiltinFunction, BoundedText> {
2365 match value {
2366 "hasParseFact" => Ok(BuiltinFunction::HasParseFact),
2367 "size" => Ok(BuiltinFunction::Size),
2368 "isEmpty" => Ok(BuiltinFunction::IsEmpty),
2369 "contains" => Ok(BuiltinFunction::Contains),
2370 "all" => Ok(BuiltinFunction::All),
2371 "exists" => Ok(BuiltinFunction::Exists),
2372 "matches" => Ok(BuiltinFunction::Matches),
2373 _ => Err(BoundedText::unchecked("unsupported built-in function")),
2374 }
2375}
2376
2377fn map_verapdf_property(value: &str) -> &str {
2378 match value {
2379 "Length" => "declaredLength",
2380 "realLength" => "discoveredLength",
2381 "isEncrypted" => "encrypted",
2382 "containsMetadata" => "hasMetadata",
2383 "isCatalogMetadata" => "catalogMetadata",
2384 other => other,
2385 }
2386}
2387
2388impl From<CosObject> for ModelValue {
2389 fn from(value: CosObject) -> Self {
2390 match value {
2391 CosObject::Boolean(value) => Self::Bool(value),
2392 CosObject::Real(value) => Self::Number(value),
2393 CosObject::Name(name) => Self::String(BoundedText::unchecked(
2394 String::from_utf8_lossy(name.as_bytes()).into_owned(),
2395 )),
2396 CosObject::String(value) => Self::String(BoundedText::unchecked(
2397 String::from_utf8_lossy(value.as_bytes()).into_owned(),
2398 )),
2399 CosObject::Reference(value) => Self::ObjectKey(value),
2400 CosObject::Null
2401 | CosObject::Integer(_)
2402 | CosObject::Array(_)
2403 | CosObject::Dictionary(_)
2404 | CosObject::Stream(_) => Self::Null,
2405 }
2406 }
2407}
2408
2409#[cfg(test)]
2410mod tests {
2411 use std::{io::Cursor, sync::Arc};
2412
2413 use super::{BuiltinProfileRepository, DefaultRuleEvaluator, ProfileRepository, RuleEvaluator};
2414 use crate::{FlavourSelection, Parser, Validator};
2415
2416 const MINIMAL_PDF: &[u8] = br"%PDF-1.7
24171 0 obj
2418<< /Type /Catalog >>
2419endobj
2420trailer
2421<< /Root 1 0 R >>
2422%%EOF
2423";
2424
2425 #[derive(Debug)]
2426 struct StaticRepo(super::ValidationProfile);
2427
2428 impl super::ProfileRepository for StaticRepo {
2429 fn profiles_for(
2430 &self,
2431 _selection: &crate::FlavourSelection,
2432 ) -> crate::Result<Vec<super::ValidationProfile>> {
2433 Ok(vec![self.0.clone()])
2434 }
2435 }
2436
2437 #[test]
2438 fn test_should_return_builtin_profile_for_default_auto_selection() -> crate::Result<()> {
2439 let profiles = BuiltinProfileRepository::new().profiles_for(&FlavourSelection::default())?;
2440
2441 assert_eq!(profiles.len(), 1);
2442 assert_eq!(
2443 profiles.first().map(|profile| profile.rules.len()),
2444 Some(10)
2445 );
2446 assert_eq!(
2447 profiles.first().map(|profile| profile.identity.id.as_str()),
2448 Some("pdfv-m4")
2449 );
2450 Ok(())
2451 }
2452
2453 #[test]
2454 fn test_should_return_no_builtin_profile_for_auto_without_default() -> crate::Result<()> {
2455 let profiles = BuiltinProfileRepository::new()
2456 .profiles_for(&FlavourSelection::Auto { default: None })?;
2457
2458 assert!(profiles.is_empty());
2459 Ok(())
2460 }
2461
2462 #[cfg(feature = "custom-profiles")]
2463 #[test]
2464 fn test_should_import_representative_verapdf_xml_rules() -> crate::Result<()> {
2465 let import = super::import_verapdf_profile_xml(
2466 crate::generated_profiles::GENERATED_PROFILE_SOURCES
2467 .iter()
2468 .find(|source| source.display_flavour == "pdfa-1b")
2469 .ok_or(crate::ProfileError::UnsupportedSelection)?
2470 .xml,
2471 )?;
2472
2473 assert!(import.profile.rules.len() > 100);
2474 assert!(import.supported_rules > 0);
2475 assert!(import.unsupported_rules > 0);
2476 assert_eq!(import.profile.identity.id.as_str(), "verapdf-pdfa-1b");
2477 assert!(
2478 import
2479 .profile
2480 .rules
2481 .iter()
2482 .any(|rule| !rule.references.is_empty())
2483 );
2484 Ok(())
2485 }
2486
2487 #[cfg(feature = "custom-profiles")]
2488 #[test]
2489 fn test_should_map_verapdf_undefined_operator_to_sparse_family() -> crate::Result<()> {
2490 let import = super::import_verapdf_profile_xml(
2491 crate::generated_profiles::GENERATED_PROFILE_SOURCES
2492 .iter()
2493 .find(|source| source.display_flavour == "pdfa-1b")
2494 .ok_or(crate::ProfileError::UnsupportedSelection)?
2495 .xml,
2496 )?;
2497
2498 let rule = import
2499 .profile
2500 .rules
2501 .iter()
2502 .find(|rule| rule.id.0.as_str() == "iso-19005-1-6-2-10-1")
2503 .ok_or(crate::ProfileError::UnsupportedSelection)?;
2504
2505 assert_eq!(rule.object_type.as_str(), "undefinedOperator");
2506 Ok(())
2507 }
2508
2509 #[test]
2510 fn test_should_list_every_generated_builtin_profile_with_coverage() -> crate::Result<()> {
2511 let profiles = BuiltinProfileRepository::new().list_profiles()?;
2512
2513 assert_eq!(
2514 profiles.len(),
2515 crate::generated_profiles::GENERATED_PROFILE_SOURCES.len() + 1
2516 );
2517 assert!(profiles.iter().any(|profile| {
2518 profile.identity.id.as_str() == "verapdf-pdfua-2-iso32005"
2519 && profile.display_flavour.as_str() == "pdfua-2-iso32005"
2520 && profile.coverage.total_rules > 0
2521 }));
2522 assert!(profiles.iter().any(|profile| {
2523 profile.identity.id.as_str() == "verapdf-wtpdf-1-0-reuse"
2524 && profile.source_pin.as_str() == crate::generated_profiles::VERA_PDF_LIBRARY_PIN
2525 }));
2526 Ok(())
2527 }
2528
2529 #[test]
2530 fn test_should_improve_m6_official_profile_coverage_for_accessibility_profiles()
2531 -> crate::Result<()> {
2532 let profiles = BuiltinProfileRepository::new().list_profiles()?;
2533
2534 for display_flavour in [
2535 "pdfua-2-iso32005",
2536 "wtpdf-1-0-accessibility",
2537 "wtpdf-1-0-reuse",
2538 ] {
2539 let profile = profiles
2540 .iter()
2541 .find(|profile| profile.display_flavour.as_str() == display_flavour)
2542 .ok_or(crate::ProfileError::UnsupportedSelection)?;
2543 assert!(
2544 profile.coverage.executable_rules.saturating_mul(100)
2545 >= profile.coverage.total_rules.saturating_mul(90),
2546 "{display_flavour} coverage is {:?}",
2547 profile.coverage
2548 );
2549 }
2550 Ok(())
2551 }
2552
2553 #[test]
2554 fn test_should_reject_inexact_pdfua_2_flavour_selection() -> crate::Result<()> {
2555 let flavour = crate::ValidationFlavour::new(
2556 "pdfua",
2557 std::num::NonZeroU32::new(2).ok_or(crate::ProfileError::UnsupportedSelection)?,
2558 "wrong",
2559 )?;
2560 let result =
2561 BuiltinProfileRepository::new().profiles_for(&FlavourSelection::Explicit { flavour });
2562
2563 assert!(matches!(
2564 result,
2565 Err(crate::PdfvError::Profile(
2566 crate::ProfileError::UnsupportedSelection
2567 ))
2568 ));
2569 Ok(())
2570 }
2571
2572 #[test]
2573 fn test_should_load_and_validate_every_generated_builtin_profile() -> crate::Result<()> {
2574 for source in crate::generated_profiles::GENERATED_PROFILE_SOURCES {
2575 let flavour = super::parse_display_flavour(source.display_flavour)?;
2576 let report = Validator::new(
2577 crate::ValidationOptions::builder()
2578 .flavour(FlavourSelection::Explicit { flavour })
2579 .build(),
2580 )?
2581 .validate_reader(Cursor::new(MINIMAL_PDF), crate::InputName::memory())?;
2582
2583 assert_eq!(report.status, crate::ValidationStatus::Incomplete);
2584 assert_eq!(
2585 report
2586 .profile_reports
2587 .first()
2588 .map(|profile| profile.profile.id.as_str()),
2589 Some(source.id)
2590 );
2591 assert!(
2592 report
2593 .profile_reports
2594 .first()
2595 .is_some_and(|profile| !profile.unsupported_rules.is_empty())
2596 );
2597 assert!(report.profile_reports.first().is_some_and(|profile| {
2598 profile
2599 .unsupported_rules
2600 .iter()
2601 .any(|rule| !rule.references.is_empty())
2602 }));
2603 }
2604 Ok(())
2605 }
2606
2607 #[cfg(feature = "custom-profiles")]
2608 #[test]
2609 fn test_should_load_custom_xml_profile() -> crate::Result<()> {
2610 let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
2611<profile flavour="PDFA_1_B">
2612 <details><name>Custom smoke profile</name></details>
2613 <rules>
2614 <rule object="CosDocument">
2615 <id specification="LOCAL" clause="1" testNumber="1"/>
2616 <description>Catalog must be present</description>
2617 <test>hasCatalog == true</test>
2618 <error><message>Catalog is missing</message></error>
2619 </rule>
2620 </rules>
2621</profile>"#;
2622 let import = super::import_verapdf_profile_xml(xml)?;
2623
2624 assert_eq!(import.profile.rules.len(), 1);
2625 assert_eq!(import.supported_rules, 1);
2626 assert_eq!(import.unsupported_rules, 0);
2627 Ok(())
2628 }
2629
2630 #[test]
2631 fn test_should_evaluate_m0_document_rules() -> crate::Result<()> {
2632 let bytes = br"%PDF-1.7
26331 0 obj
2634<< /Type /Catalog >>
2635endobj
2636trailer
2637<< /Root 1 0 R >>
2638%%EOF
2639";
2640 let document = Parser::default().parse(Cursor::new(bytes))?;
2641 let model = crate::validation::DocumentModel::new(&document);
2642 let object = crate::ModelObjectRef::Document(model);
2643 let profile = BuiltinProfileRepository::new()
2644 .profiles_for(&FlavourSelection::default())?
2645 .remove(0);
2646 let mut evaluator = DefaultRuleEvaluator::new(crate::ResourceLimits::default());
2647
2648 for rule in profile
2649 .rules
2650 .iter()
2651 .filter(|rule| rule.object_type.as_str() == "document")
2652 {
2653 let outcome = evaluator.evaluate(object.clone(), rule)?;
2654 assert_eq!(outcome, super::RuleOutcome::Passed);
2655 }
2656 Ok(())
2657 }
2658
2659 #[test]
2660 fn test_should_validate_reader_end_to_end() -> crate::Result<()> {
2661 let bytes = br"%PDF-1.7
26621 0 obj
2663<< /Type /Catalog >>
2664endobj
2665trailer
2666<< /Root 1 0 R >>
2667%%EOF
2668";
2669 let report = Validator::new(crate::ValidationOptions::default())?
2670 .validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2671
2672 assert_eq!(report.status, crate::ValidationStatus::Valid);
2673 Ok(())
2674 }
2675
2676 #[test]
2677 fn test_should_validate_stream_with_declared_length_and_eol() -> crate::Result<()> {
2678 let bytes = br"%PDF-1.7
26791 0 obj
2680<< /Type /Catalog >>
2681endobj
26822 0 obj
2683<< /Length 4 >>
2684stream
2685abc
2686endstream
2687endobj
2688trailer
2689<< /Root 1 0 R >>
2690%%EOF
2691";
2692 let report = Validator::new(crate::ValidationOptions::default())?
2693 .validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2694
2695 assert_eq!(report.status, crate::ValidationStatus::Valid);
2696 Ok(())
2697 }
2698
2699 #[test]
2700 fn test_should_apply_m4_feature_fact_rules_to_linked_objects() -> crate::Result<()> {
2701 let report = Validator::new(crate::ValidationOptions::default())?
2702 .validate_reader(Cursor::new(m4_feature_pdf()), crate::InputName::memory())?;
2703 let profile =
2704 report
2705 .profile_reports
2706 .first()
2707 .ok_or(crate::ValidationError::LimitExceeded {
2708 limit: "profile_reports",
2709 })?;
2710
2711 assert_eq!(
2712 report.status,
2713 crate::ValidationStatus::Valid,
2714 "{profile:#?}"
2715 );
2716 assert_eq!(profile.rules_executed, 12);
2717 Ok(())
2718 }
2719
2720 #[test]
2721 fn test_should_report_imported_derived_property_as_unsupported() -> crate::Result<()> {
2722 let rule = super::Rule {
2723 id: crate::RuleId(crate::Identifier::new("derived-font-name")?),
2724 object_type: super::ObjectTypeName::new("font")?,
2725 deferred: false,
2726 tags: Vec::new(),
2727 description: crate::BoundedText::new("derived font name", 64)?,
2728 test: super::RuleExpr::Binary {
2729 op: super::BinaryOp::Eq,
2730 left: Box::new(super::property_expr("fontName")?),
2731 right: Box::new(super::RuleExpr::Null),
2732 },
2733 error: super::ErrorTemplate {
2734 message: crate::BoundedText::new("derived font name", 64)?,
2735 },
2736 references: Vec::new(),
2737 };
2738 let profile = super::ValidationProfile {
2739 identity: crate::ProfileIdentity {
2740 id: crate::Identifier::new("derived-property")?,
2741 name: crate::BoundedText::new("derived property", 64)?,
2742 version: None,
2743 },
2744 flavour: super::pdfa_1b_flavour()?,
2745 rules: vec![rule],
2746 };
2747 let validator = Validator::with_profiles(
2748 crate::ValidationOptions::default(),
2749 Arc::new(StaticRepo(profile)),
2750 )?;
2751 let report =
2752 validator.validate_reader(Cursor::new(m4_feature_pdf()), crate::InputName::memory())?;
2753 let profile =
2754 report
2755 .profile_reports
2756 .first()
2757 .ok_or(crate::ValidationError::LimitExceeded {
2758 limit: "profile_reports",
2759 })?;
2760
2761 assert_eq!(report.status, crate::ValidationStatus::Incomplete);
2762 assert_eq!(profile.unsupported_rules.len(), 1);
2763 Ok(())
2764 }
2765
2766 #[test]
2767 fn test_should_fail_m4_feature_fact_rule_on_invalid_font() -> crate::Result<()> {
2768 let bytes = br"%PDF-1.7
27691 0 obj
2770<< /Type /Catalog /Pages 2 0 R >>
2771endobj
27722 0 obj
2773<< /Type /Pages /Kids [3 0 R] /Count 1 >>
2774endobj
27753 0 obj
2776<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /Contents 5 0 R >>
2777endobj
27784 0 obj
2779<< /Type /Font /BaseFont /Helvetica >>
2780endobj
27815 0 obj
2782<< /Length 4 >>
2783stream
2784q Q
2785endstream
2786endobj
2787trailer
2788<< /Root 1 0 R >>
2789%%EOF
2790";
2791 let report = Validator::new(crate::ValidationOptions::default())?
2792 .validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2793 let profile =
2794 report
2795 .profile_reports
2796 .first()
2797 .ok_or(crate::ValidationError::LimitExceeded {
2798 limit: "profile_reports",
2799 })?;
2800
2801 assert_eq!(report.status, crate::ValidationStatus::Invalid);
2802 assert!(profile.failed_assertions.iter().any(|assertion| {
2803 assertion.rule_id.0.as_str() == "m4-font-subtype-present"
2804 && assertion
2805 .object_context
2806 .as_ref()
2807 .is_some_and(|context| context.as_str() == "root/page[0]/font[F1]")
2808 }));
2809 Ok(())
2810 }
2811
2812 fn m4_feature_pdf() -> &'static [u8] {
2813 br"%PDF-1.7
28141 0 obj
2815<< /Type /Catalog /Pages 2 0 R /OutputIntents [8 0 R] >>
2816endobj
28172 0 obj
2818<< /Type /Pages /Kids [3 0 R] /Count 1 >>
2819endobj
28203 0 obj
2821<< /Type /Page /Parent 2 0 R /Resources << /Font << /F1 4 0 R >> >> /Annots [5 0 R] /Contents 6 0 R >>
2822endobj
28234 0 obj
2824<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>
2825endobj
28265 0 obj
2827<< /Type /Annot /Subtype /Text >>
2828endobj
28296 0 obj
2830<< /Length 0 >>
2831stream
2832endstream
2833endobj
28347 0 obj
2835<< /Length 0 >>
2836stream
2837endstream
2838endobj
28398 0 obj
2840<< /Type /OutputIntent /S /GTS_PDFA1 /DestOutputProfile 7 0 R >>
2841endobj
2842trailer
2843<< /Root 1 0 R >>
2844%%EOF
2845"
2846 }
2847
2848 #[test]
2849 fn test_should_reject_unsupported_rule_ir_silently_fallbacks() -> crate::Result<()> {
2850 let bytes = br"%PDF-1.7
28511 0 obj
2852<< /Type /Catalog >>
2853endobj
2854trailer
2855<< /Root 1 0 R >>
2856%%EOF
2857";
2858 let document = Parser::default().parse(Cursor::new(bytes))?;
2859 let model = crate::validation::DocumentModel::new(&document);
2860 let object = crate::ModelObjectRef::Document(model);
2861 let mut evaluator = DefaultRuleEvaluator::new(crate::ResourceLimits::default());
2862 let nested_rule = super::Rule {
2863 id: crate::RuleId(crate::Identifier::new("bad-nested")?),
2864 object_type: super::ObjectTypeName::new("document")?,
2865 deferred: false,
2866 tags: Vec::new(),
2867 description: crate::BoundedText::new("nested", 32)?,
2868 test: super::RuleExpr::Property {
2869 path: super::PropertyPath::new(vec![
2870 super::PropertyName::new("headerOffset")?,
2871 super::PropertyName::new("extra")?,
2872 ]),
2873 },
2874 error: super::ErrorTemplate {
2875 message: crate::BoundedText::new("nested", 32)?,
2876 },
2877 references: Vec::new(),
2878 };
2879 let arity_rule = super::Rule {
2880 id: crate::RuleId(crate::Identifier::new("bad-arity")?),
2881 object_type: super::ObjectTypeName::new("document")?,
2882 deferred: false,
2883 tags: Vec::new(),
2884 description: crate::BoundedText::new("arity", 32)?,
2885 test: super::RuleExpr::Call {
2886 function: super::BuiltinFunction::HasParseFact,
2887 args: vec![
2888 super::RuleExpr::String {
2889 value: crate::BoundedText::new("header", 32)?,
2890 },
2891 super::RuleExpr::String {
2892 value: crate::BoundedText::new("extra", 32)?,
2893 },
2894 ],
2895 },
2896 error: super::ErrorTemplate {
2897 message: crate::BoundedText::new("arity", 32)?,
2898 },
2899 references: Vec::new(),
2900 };
2901
2902 assert!(evaluator.evaluate(object.clone(), &nested_rule).is_err());
2903 assert!(evaluator.evaluate(object, &arity_rule).is_err());
2904 Ok(())
2905 }
2906
2907 #[test]
2908 fn test_should_report_unsupported_rule_as_incomplete() -> crate::Result<()> {
2909 let bytes = br"%PDF-1.7
29101 0 obj
2911<< /Type /Catalog >>
2912endobj
2913trailer
2914<< /Root 1 0 R >>
2915%%EOF
2916";
2917 let rule = super::Rule {
2918 id: crate::RuleId(crate::Identifier::new("unsupported")?),
2919 object_type: super::ObjectTypeName::new("document")?,
2920 deferred: false,
2921 tags: Vec::new(),
2922 description: crate::BoundedText::new("unsupported", 64)?,
2923 test: super::RuleExpr::Property {
2924 path: super::PropertyPath::new(vec![
2925 super::PropertyName::new("headerOffset")?,
2926 super::PropertyName::new("extra")?,
2927 ]),
2928 },
2929 error: super::ErrorTemplate {
2930 message: crate::BoundedText::new("unsupported", 64)?,
2931 },
2932 references: Vec::new(),
2933 };
2934 let profile = super::ValidationProfile {
2935 identity: crate::ProfileIdentity {
2936 id: crate::Identifier::new("test")?,
2937 name: crate::BoundedText::new("test", 64)?,
2938 version: None,
2939 },
2940 flavour: super::pdfa_1b_flavour()?,
2941 rules: vec![rule],
2942 };
2943 let validator = Validator::with_profiles(
2944 crate::ValidationOptions::default(),
2945 Arc::new(StaticRepo(profile)),
2946 )?;
2947 let report = validator.validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
2948
2949 assert_eq!(report.status, crate::ValidationStatus::Incomplete);
2950 assert_eq!(
2951 report
2952 .profile_reports
2953 .first()
2954 .map(|profile| profile.unsupported_rules.len()),
2955 Some(1)
2956 );
2957 Ok(())
2958 }
2959
2960 #[test]
2961 fn test_should_parse_phase_13_expression_surface() -> crate::Result<()> {
2962 let modulo = super::parse_imported_expr("hexCount % 2 == 0")
2963 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2964 let ternary = super::parse_imported_expr(
2965 "gPageOutputCS == null ? gDocumentOutputCS == 'RGB ' : gPageOutputCS == 'RGB '",
2966 )
2967 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2968 let regex = super::parse_imported_expr(r"/^%PDF-2\.[0-9]$/.test(header)")
2969 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2970 let call = super::parse_imported_expr("contains(entries, 'UR3') == false")
2971 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2972
2973 assert!(matches!(modulo, super::RuleExpr::Binary { .. }));
2974 assert!(matches!(ternary, super::RuleExpr::Conditional { .. }));
2975 assert!(matches!(regex, super::RuleExpr::Call { .. }));
2976 assert!(matches!(call, super::RuleExpr::Binary { .. }));
2977 Ok(())
2978 }
2979
2980 #[test]
2981 fn test_should_import_nested_property_paths_as_static_unsupported() -> crate::Result<()> {
2982 let expr = super::parse_imported_expr("metadata.schema.part == 1")
2983 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?;
2984
2985 assert!(matches!(expr, super::RuleExpr::Unsupported { .. }));
2986 Ok(())
2987 }
2988
2989 #[test]
2990 fn test_should_evaluate_arithmetic_ternary_and_regex_builtins() -> crate::Result<()> {
2991 let bytes = br"%PDF-2.0
29921 0 obj
2993<< /Type /Catalog >>
2994endobj
2995trailer
2996<< /Root 1 0 R >>
2997%%EOF
2998";
2999 let document = Parser::default().parse(Cursor::new(bytes))?;
3000 let model = crate::validation::DocumentModel::new(&document);
3001 let object = crate::ModelObjectRef::Document(model);
3002 let mut evaluator = DefaultRuleEvaluator::new(crate::ResourceLimits::default());
3003 let rule = super::Rule {
3004 id: crate::RuleId(crate::Identifier::new("expr-surface")?),
3005 object_type: super::ObjectTypeName::new("document")?,
3006 deferred: false,
3007 tags: Vec::new(),
3008 description: crate::BoundedText::new("expr", 32)?,
3009 test: super::RuleExpr::Binary {
3010 op: super::BinaryOp::And,
3011 left: Box::new(
3012 super::parse_imported_expr("5 % 2 == 1")
3013 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?,
3014 ),
3015 right: Box::new(
3016 super::parse_imported_expr(r"/^%PDF-2\.[0-9]$/.test(header)")
3017 .map_err(|reason| crate::ProfileError::UnsupportedRule { reason })?,
3018 ),
3019 },
3020 error: super::ErrorTemplate {
3021 message: crate::BoundedText::new("expr", 32)?,
3022 },
3023 references: Vec::new(),
3024 };
3025
3026 assert_eq!(
3027 evaluator.evaluate(object, &rule)?,
3028 super::RuleOutcome::Passed
3029 );
3030 Ok(())
3031 }
3032
3033 #[test]
3034 fn test_should_apply_failed_assertion_cap_per_rule() -> crate::Result<()> {
3035 let bytes = br"%PDF-1.7
30361 0 obj
3037<< /Type /Catalog >>
3038endobj
3039trailer
3040<< /Root 1 0 R >>
3041%%EOF
3042";
3043 let mut rules = Vec::new();
3044 for id in ["fail-a", "fail-b"] {
3045 rules.push(super::Rule {
3046 id: crate::RuleId(crate::Identifier::new(id)?),
3047 object_type: super::ObjectTypeName::new("document")?,
3048 deferred: false,
3049 tags: Vec::new(),
3050 description: crate::BoundedText::new(id, 64)?,
3051 test: super::RuleExpr::Bool { value: false },
3052 error: super::ErrorTemplate {
3053 message: crate::BoundedText::new(id, 64)?,
3054 },
3055 references: Vec::new(),
3056 });
3057 }
3058 let profile = super::ValidationProfile {
3059 identity: crate::ProfileIdentity {
3060 id: crate::Identifier::new("test")?,
3061 name: crate::BoundedText::new("test", 64)?,
3062 version: None,
3063 },
3064 flavour: super::pdfa_1b_flavour()?,
3065 rules,
3066 };
3067 let validator = Validator::with_profiles(
3068 crate::ValidationOptions::default(),
3069 Arc::new(StaticRepo(profile)),
3070 )?;
3071 let report = validator.validate_reader(Cursor::new(bytes), crate::InputName::memory())?;
3072
3073 assert_eq!(
3074 report
3075 .profile_reports
3076 .first()
3077 .map(|profile| profile.failed_assertions.len()),
3078 Some(2)
3079 );
3080 Ok(())
3081 }
3082}