Skip to main content

vastlint_core/
lib.rs

1//! # vastlint-core
2//!
3//! A zero-I/O VAST XML validation library. Takes a VAST XML
4//! string and returns a structured [`ValidationResult`] listing every issue
5//! found, the detected VAST version, and a summary of error/warning/info counts.
6//!
7//! The entire public surface is two functions and a handful of types:
8//!
9//! - [`validate`] -- validate with default settings (most callers want this)
10//! - [`validate_with_context`] -- validate with rule overrides or wrapper depth
11//! - [`fix`] -- fix deterministic issues and return repaired XML
12//! - [`fix_with_context`] -- fix with rule overrides or wrapper depth
13//! - [`all_rules`] -- list the full 118-rule catalog
14//!
15//! # Performance — allocator recommendation
16//!
17//! `vastlint-core` builds an owned document tree on every call (one heap
18//! allocation per XML element, attribute, and text node). Under concurrent
19//! load the system allocator becomes a bottleneck because all threads compete
20//! for a shared free-list lock.
21//!
22//! Switching to [`mimalloc`](https://docs.rs/mimalloc) in your **binary**
23//! crate eliminates this contention and gives dramatically better throughput
24//! at high concurrency, especially for larger documents:
25//!
26//! ```toml
27//! # Cargo.toml (your binary, not a library crate)
28//! [dependencies]
29//! mimalloc = { version = "0.1", default-features = false }
30//! ```
31//!
32//! ```rust,ignore
33//! // src/main.rs
34//! use mimalloc::MiMalloc;
35//! #[global_allocator]
36//! static GLOBAL: MiMalloc = MiMalloc;
37//! ```
38//!
39//! Measured on Apple M4 (10 threads, production-realistic VAST tags):
40//!
41//! | Allocator | 17 KB tag | 44 KB tag |
42//! |---|---|---|
43//! | system (default) | 1,847 tags/s · 541 µs | 328 tags/s · 3,048 µs |
44//! | mimalloc | 15,760 tags/s · 63 µs | 2,635 tags/s · 380 µs |
45//!
46//! **mimalloc: ~8× throughput improvement on multi-threaded workloads.**
47//!
48//! > ⚠️ Do **not** set a global allocator in a library crate — it would
49//! > override the allocator for any host process that links you (Go, Python,
50//! > Ruby runtimes, etc.), which can cause heap corruption.
51//!
52//! # Quick start
53//!
54//! ```rust
55//! let xml = r#"<VAST version="2.0">
56//!   <Ad><InLine>
57//!     <AdSystem>Demo</AdSystem>
58//!     <AdTitle>Ad</AdTitle>
59//!     <Impression>https://t.example.com/imp</Impression>
60//!     <Creatives>
61//!       <Creative>
62//!         <Linear>
63//!           <Duration>00:00:15</Duration>
64//!           <MediaFiles>
65//!             <MediaFile delivery="progressive" type="video/mp4"
66//!                        width="640" height="360">
67//!               https://cdn.example.com/ad.mp4
68//!             </MediaFile>
69//!           </MediaFiles>
70//!         </Linear>
71//!       </Creative>
72//!     </Creatives>
73//!   </InLine></Ad>
74//! </VAST>"#;
75//!
76//! let result = vastlint_core::validate(xml);
77//! assert_eq!(result.summary.errors, 0);
78//! ```
79//!
80//! # Design constraints
81//!
82//! The library has no I/O, no logging, no global state, and no async runtime.
83//! It can be embedded in a CLI, HTTP server, WASM module, or FFI binding
84//! without pulling in any platform-specific dependencies.
85//!
86//! Three crate dependencies: `quick-xml` (XML parsing), `url` (RFC 3986),
87//! and `phf` (compile-time hash maps).
88
89mod detect;
90mod fix;
91mod parse;
92mod rules;
93mod summarize;
94
95pub use fix::{fix, fix_with_context, AppliedFix, FixResult};
96
97use std::collections::HashMap;
98
99// ── Public types ─────────────────────────────────────────────────────────────
100
101/// The VAST version as declared in the `version` attribute or inferred from
102/// document structure.
103///
104/// Covers all versions published by IAB Tech Lab: 2.0 through 4.3.
105#[derive(Debug, Clone, PartialEq, Eq)]
106pub enum VastVersion {
107    V2_0,
108    V3_0,
109    V4_0,
110    V4_1,
111    V4_2,
112    V4_3,
113}
114
115impl VastVersion {
116    pub fn as_str(&self) -> &'static str {
117        match self {
118            VastVersion::V2_0 => "2.0",
119            VastVersion::V3_0 => "3.0",
120            VastVersion::V4_0 => "4.0",
121            VastVersion::V4_1 => "4.1",
122            VastVersion::V4_2 => "4.2",
123            VastVersion::V4_3 => "4.3",
124        }
125    }
126
127    /// Returns true if this version is 4.x or later.
128    pub fn is_v4(&self) -> bool {
129        matches!(
130            self,
131            VastVersion::V4_0 | VastVersion::V4_1 | VastVersion::V4_2 | VastVersion::V4_3
132        )
133    }
134
135    /// Returns true if this version is at least the given version.
136    pub fn at_least(&self, other: &VastVersion) -> bool {
137        self.ordinal() >= other.ordinal()
138    }
139
140    fn ordinal(&self) -> u8 {
141        match self {
142            VastVersion::V2_0 => 0,
143            VastVersion::V3_0 => 1,
144            VastVersion::V4_0 => 2,
145            VastVersion::V4_1 => 3,
146            VastVersion::V4_2 => 4,
147            VastVersion::V4_3 => 5,
148        }
149    }
150}
151
152/// How the version was determined.
153///
154/// Version detection is a two-pass process: first the `version` attribute on
155/// the root `<VAST>` element is read (declared), then the document structure
156/// is scanned for version-specific elements (inferred). When both are
157/// available, consistency is checked and a mismatch produces a warning.
158#[derive(Debug, Clone, PartialEq, Eq)]
159pub enum DetectedVersion {
160    /// Version attribute was present and recognised.
161    Declared(VastVersion),
162    /// Version attribute was absent or unrecognised; version inferred from
163    /// document structure.
164    Inferred(VastVersion),
165    /// Both declared and inferred — may or may not agree.
166    DeclaredAndInferred {
167        declared: VastVersion,
168        inferred: VastVersion,
169        consistent: bool,
170    },
171    /// Could not determine version.
172    Unknown,
173}
174
175impl DetectedVersion {
176    /// Returns the best available version, preferring the declared value.
177    pub fn best(&self) -> Option<&VastVersion> {
178        match self {
179            DetectedVersion::Declared(v) => Some(v),
180            DetectedVersion::Inferred(v) => Some(v),
181            DetectedVersion::DeclaredAndInferred { declared, .. } => Some(declared),
182            DetectedVersion::Unknown => None,
183        }
184    }
185}
186
187/// Issue severity, based strictly on spec language.
188///
189/// Error   — spec says "must" or "required": the tag will likely fail to serve.
190/// Warning — spec says "should" or "recommended", or the feature is deprecated.
191/// Info    — advisory; not a spec violation but a known interoperability risk.
192#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
193pub enum Severity {
194    Info,
195    Warning,
196    Error,
197}
198
199impl Severity {
200    pub fn as_str(&self) -> &'static str {
201        match self {
202            Severity::Error => "error",
203            Severity::Warning => "warning",
204            Severity::Info => "info",
205        }
206    }
207}
208
209/// A single validation finding.
210#[derive(Debug, Clone)]
211pub struct Issue {
212    /// Stable rule identifier, e.g. "VAST-2.0-root-version".
213    pub id: &'static str,
214    /// Effective severity after applying any caller overrides.
215    pub severity: Severity,
216    /// Human-readable message. Static string; no allocation on the hot path.
217    pub message: &'static str,
218    /// XPath-like location in the document, e.g. `/VAST/Ad\[0\]/InLine/AdSystem`.
219    /// None when the issue applies to the document as a whole.
220    pub path: Option<String>,
221    /// Short spec reference, e.g. "IAB VAST 4.1 §3.4.1".
222    pub spec_ref: &'static str,
223    /// 1-based line number of the element that triggered this issue.
224    /// None for document-level issues (e.g. parse errors, missing root).
225    pub line: Option<u32>,
226    /// 1-based column number (byte offset within the line) of the element.
227    /// None for document-level issues.
228    pub col: Option<u32>,
229}
230
231/// Counts of issues by severity.
232///
233/// Use [`Summary::is_valid`] to check whether the document passes validation.
234/// A document is valid when `errors == 0`, regardless of warning or info count.
235#[derive(Debug, Clone, Default)]
236pub struct Summary {
237    pub errors: usize,
238    pub warnings: usize,
239    pub infos: usize,
240}
241
242impl Summary {
243    pub fn is_valid(&self) -> bool {
244        self.errors == 0
245    }
246}
247
248/// The full result of validating a VAST document.
249///
250/// Contains the detected version, all issues found, and a summary with counts.
251/// The `issues` vector is ordered by document position (depth-first traversal).
252#[derive(Debug, Clone)]
253pub struct ValidationResult {
254    pub version: DetectedVersion,
255    pub issues: Vec<Issue>,
256    pub summary: Summary,
257}
258
259// ── Rule configuration ────────────────────────────────────────────────────────
260
261/// Per-rule severity override. Mirrors Severity but adds Off.
262#[derive(Debug, Clone, Copy, PartialEq, Eq)]
263pub enum RuleLevel {
264    Error,
265    Warning,
266    Info,
267    /// Rule does not run. Produces no Issue.
268    Off,
269}
270
271/// Context passed to validate_with_context. All fields have safe defaults.
272#[derive(Debug, Clone)]
273pub struct ValidationContext {
274    /// Current wrapper chain depth. 0 = this document is the root.
275    pub wrapper_depth: u8,
276    /// Maximum allowed wrapper depth. IAB VAST 4.x recommends 5.
277    pub max_wrapper_depth: u8,
278    /// Per-rule severity overrides keyed by rule ID.
279    /// None means "use all recommended defaults".
280    pub rule_overrides: Option<HashMap<&'static str, RuleLevel>>,
281}
282
283impl Default for ValidationContext {
284    fn default() -> Self {
285        Self {
286            wrapper_depth: 0,
287            max_wrapper_depth: 5,
288            rule_overrides: None,
289        }
290    }
291}
292
293impl ValidationContext {
294    /// Resolve the effective level for a rule, applying any override.
295    /// Returns None when the rule should be silenced (Off).
296    pub(crate) fn resolve(&self, rule_id: &'static str, default: Severity) -> Option<Severity> {
297        match &self.rule_overrides {
298            None => Some(default),
299            Some(map) => match map.get(rule_id) {
300                None => Some(default),
301                Some(RuleLevel::Off) => None,
302                Some(RuleLevel::Error) => Some(Severity::Error),
303                Some(RuleLevel::Warning) => Some(Severity::Warning),
304                Some(RuleLevel::Info) => Some(Severity::Info),
305            },
306        }
307    }
308}
309
310// ── Entry points ──────────────────────────────────────────────────────────────
311
312/// Validate a VAST XML string using default settings.
313///
314/// This is the main entry point for most callers. It runs the full rule set
315/// against the document and returns a [`ValidationResult`] containing every
316/// issue found, a detected version, and a summary.
317///
318/// # Example
319///
320/// ```rust
321/// let xml = r#"<VAST version="4.1">
322///   <Ad id="1">
323///     <InLine>
324///       <AdSystem>Example</AdSystem>
325///       <AdTitle>Test Ad</AdTitle>
326///       <AdServingId>abc123</AdServingId>
327///       <Impression>https://track.example.com/imp</Impression>
328///       <Creatives>
329///         <Creative>
330///           <UniversalAdId idRegistry="ad-id.org">UID-001</UniversalAdId>
331///           <Linear>
332///             <Duration>00:00:30</Duration>
333///             <MediaFiles>
334///               <MediaFile delivery="progressive" type="video/mp4"
335///                          width="1920" height="1080">
336///                 https://cdn.example.com/ad.mp4
337///               </MediaFile>
338///             </MediaFiles>
339///           </Linear>
340///         </Creative>
341///       </Creatives>
342///     </InLine>
343///   </Ad>
344/// </VAST>"#;
345///
346/// let result = vastlint_core::validate(xml);
347/// assert!(result.summary.is_valid());
348/// // Info-level advisories (e.g. missing Mezzanine for CTV) may be present
349/// // but the document has no errors or warnings that affect validity.
350/// assert_eq!(result.summary.errors, 0);
351/// ```
352pub fn validate(input: &str) -> ValidationResult {
353    validate_with_context(input, ValidationContext::default())
354}
355
356/// Validate a VAST XML string with caller-supplied context.
357///
358/// Use this when you need to declare wrapper chain depth or override the
359/// severity of specific rules. For simple validation, prefer [`validate`].
360///
361/// # Wrapper chain depth
362///
363/// When following a wrapper chain, pass the current depth so the
364/// [`crate::Severity::Error`] rule for `VAST-2.0-wrapper-depth` fires at the
365/// right level:
366///
367/// ```rust
368/// use vastlint_core::{ValidationContext, validate_with_context};
369///
370/// let ctx = ValidationContext {
371///     wrapper_depth: 3,
372///     max_wrapper_depth: 5,
373///     ..Default::default()
374/// };
375/// let result = validate_with_context("<VAST/>", ctx);
376/// ```
377///
378/// # Rule overrides
379///
380/// Suppress or downgrade individual rules by passing a rule override map.
381/// Rule IDs are the stable identifiers from the [`all_rules`] catalog.
382///
383/// ```rust
384/// use std::collections::HashMap;
385/// use vastlint_core::{RuleLevel, ValidationContext, validate_with_context};
386///
387/// let mut overrides = HashMap::new();
388/// // Silence the HTTP-vs-HTTPS advisory for internal tooling.
389/// overrides.insert("VAST-2.0-mediafile-https", RuleLevel::Off);
390/// // Treat a missing version attribute as a hard error.
391/// overrides.insert("VAST-2.0-root-version", RuleLevel::Error);
392///
393/// let ctx = ValidationContext {
394///     rule_overrides: Some(overrides),
395///     ..Default::default()
396/// };
397/// let result = validate_with_context("<VAST/>", ctx);
398/// ```
399pub fn validate_with_context(input: &str, context: ValidationContext) -> ValidationResult {
400    let doc = parse::parse(input);
401    let version = detect::detect_version(&doc);
402    let mut issues = Vec::new();
403    rules::run(&doc, &version, &context, &mut issues);
404    let summary = summarize::summarize(&issues);
405    ValidationResult {
406        version,
407        issues,
408        summary,
409    }
410}
411
412// ── Test helpers (integration tests only) ────────────────────────────────────
413
414/// Re-exports the internal parser for integration tests that need to verify
415/// the repaired XML round-trips without parse errors.
416#[doc(hidden)]
417pub fn _test_parse(xml: &str) -> parse::VastDocument {
418    parse::parse(xml)
419}
420
421/// The external standard or authority that a rule is derived from.
422///
423/// Mirrors the standards listed in the README. Use this to filter the catalog
424/// by authority level — e.g. alert hard on [`RuleSource::VastSpec`] violations
425/// while only logging [`RuleSource::Inferred`] advisories.
426#[derive(Debug, Clone, Copy, PartialEq, Eq)]
427pub enum RuleSource {
428    /// IAB Tech Lab VAST spec normative prose (explicit §-references)
429    VastSpec,
430    /// IAB Tech Lab VAST published XSD schemas (structural and enum constraints)
431    VastXsd,
432    /// W3C XML 1.0 well-formedness
433    Xml,
434    /// RFC 3986 URI syntax
435    Rfc3986,
436    /// IANA Media Types registry
437    IanaMediaTypes,
438    /// ISO 4217 currency codes
439    Iso4217,
440    /// Ad-ID registry format
441    AdId,
442    /// vastlint heuristic — no single external spec authority
443    Inferred,
444    /// IAB Tech Lab SIMID spec normative prose
445    SimidSpec,
446    /// Industry best practice derived from real-world ad serving patterns;
447    /// violation has a direct revenue or measurement impact.
448    IndustryBestPractice,
449}
450
451impl RuleSource {
452    /// Short stable string identifier, suitable for JSON output and display.
453    pub fn as_str(self) -> &'static str {
454        match self {
455            RuleSource::VastSpec            => "VAST spec",
456            RuleSource::VastXsd             => "VAST XSD",
457            RuleSource::Xml                 => "W3C XML 1.0",
458            RuleSource::Rfc3986             => "RFC 3986",
459            RuleSource::IanaMediaTypes      => "IANA Media Types",
460            RuleSource::Iso4217             => "ISO 4217",
461            RuleSource::AdId                => "Ad-ID",
462            RuleSource::Inferred            => "inferred",
463            RuleSource::SimidSpec           => "IAB SIMID",
464            RuleSource::IndustryBestPractice => "revenue impact",
465        }
466    }
467}
468
469/// Metadata about a single rule, as exposed by the public catalog.
470///
471/// Marked `#[non_exhaustive]` so that adding fields in future minor releases
472/// does not break downstream code that reads (but never constructs) `RuleMeta`.
473#[non_exhaustive]
474pub struct RuleMeta {
475    pub id: &'static str,
476    pub default_severity: Severity,
477    pub description: &'static str,
478    /// The external standard this rule is derived from.
479    pub source: RuleSource,
480}
481
482impl RuleMeta {
483    /// Returns `true` when violating this rule has a direct revenue or
484    /// measurement impact — lost impressions, broken tracking, zero fill.
485    ///
486    /// This covers both rules whose [`source`](RuleMeta::source) is
487    /// [`RuleSource::IndustryBestPractice`] and rules whose source is an IAB
488    /// spec standard but whose real-world consequence is measurable revenue
489    /// loss (missing `<Impression>`, dead wrapper redirect, etc.).
490    pub fn revenue_impact(&self) -> bool {
491        matches!(
492            self.id,
493            // IndustryBestPractice-sourced rules
494            "VAST-2.0-mediafile-https"
495            | "VAST-2.0-tracking-https"
496            | "VAST-2.0-duplicate-impression"
497            | "VAST-4.1-mezzanine-recommended"
498            | "VAST-4.1-vpaid-in-interactive-context"
499            | "VAST-2.0-linear-tracking-quartiles"
500            // VastSpec-sourced rules with direct revenue consequence
501            | "VAST-2.0-inline-impression"
502            | "VAST-2.0-wrapper-impression"
503            | "VAST-2.0-wrapper-vastadtaguri"
504            | "VAST-2.0-url-empty"
505            | "VAST-4.1-vpaid-apiframework"
506            | "VAST-2.0-flash-mediafile"
507        )
508    }
509}
510
511/// Returns the full catalog of known rules in definition order.
512///
513/// Use this to power `vastlint rules` output or to validate config-file rule
514/// IDs before passing them into `ValidationContext.rule_overrides`.
515pub fn all_rules() -> &'static [RuleMeta] {
516    rules::CATALOG
517}