Skip to main content

vastlint_core/
lib.rs

1//! # vastlint-core
2//!
3//! A zero-I/O VAST XML validation library. Takes a VAST XML
4//! string and returns a structured [`ValidationResult`] listing every issue
5//! found, the detected VAST version, and a summary of error/warning/info counts.
6//!
7//! The entire public surface is two functions and a handful of types:
8//!
9//! - [`validate`] -- validate with default settings (most callers want this)
10//! - [`validate_with_context`] -- validate with rule overrides or wrapper depth
11//! - [`fix`] -- fix deterministic issues and return repaired XML
12//! - [`fix_with_context`] -- fix with rule overrides or wrapper depth
13//! - [`inspect_document`] -- extract creative and wrapper metadata from one VAST XML document
14//! - [`all_rules`] -- list the full 121-rule catalog
15//!
16//! # Performance — allocator recommendation
17//!
18//! `vastlint-core` builds an owned document tree on every call (one heap
19//! allocation per XML element, attribute, and text node). Under concurrent
20//! load the system allocator becomes a bottleneck because all threads compete
21//! for a shared free-list lock.
22//!
23//! Switching to [`mimalloc`](https://docs.rs/mimalloc) in your **binary**
24//! crate eliminates this contention and gives dramatically better throughput
25//! at high concurrency, especially for larger documents:
26//!
27//! ```toml
28//! # Cargo.toml (your binary, not a library crate)
29//! [dependencies]
30//! mimalloc = { version = "0.1", default-features = false }
31//! ```
32//!
33//! ```rust,ignore
34//! // src/main.rs
35//! use mimalloc::MiMalloc;
36//! #[global_allocator]
37//! static GLOBAL: MiMalloc = MiMalloc;
38//! ```
39//!
40//! Measured on Apple M4 (10 threads, production-realistic VAST tags):
41//!
42//! | Allocator | 17 KB tag | 44 KB tag |
43//! |---|---|---|
44//! | system (default) | 1,847 tags/s · 541 µs | 328 tags/s · 3,048 µs |
45//! | mimalloc | 15,760 tags/s · 63 µs | 2,635 tags/s · 380 µs |
46//!
47//! **mimalloc: ~8× throughput improvement on multi-threaded workloads.**
48//!
49//! > ⚠️ Do **not** set a global allocator in a library crate — it would
50//! > override the allocator for any host process that links you (Go, Python,
51//! > Ruby runtimes, etc.), which can cause heap corruption.
52//!
53//! # Quick start
54//!
55//! ```rust
56//! let xml = r#"<VAST version="2.0">
57//!   <Ad><InLine>
58//!     <AdSystem>Demo</AdSystem>
59//!     <AdTitle>Ad</AdTitle>
60//!     <Impression>https://t.example.com/imp</Impression>
61//!     <Creatives>
62//!       <Creative>
63//!         <Linear>
64//!           <Duration>00:00:15</Duration>
65//!           <MediaFiles>
66//!             <MediaFile delivery="progressive" type="video/mp4"
67//!                        width="640" height="360">
68//!               https://cdn.example.com/ad.mp4
69//!             </MediaFile>
70//!           </MediaFiles>
71//!         </Linear>
72//!       </Creative>
73//!     </Creatives>
74//!   </InLine></Ad>
75//! </VAST>"#;
76//!
77//! let result = vastlint_core::validate(xml);
78//! assert_eq!(result.summary.errors, 0);
79//! ```
80//!
81//! # Design constraints
82//!
83//! The library has no I/O, no logging, no global state, and no async runtime.
84//! It can be embedded in a CLI, HTTP server, WASM module, or FFI binding
85//! without pulling in any platform-specific dependencies.
86//!
87//! Three crate dependencies: `quick-xml` (XML parsing), `url` (RFC 3986),
88//! and `phf` (compile-time hash maps).
89
90mod detect;
91mod fix;
92mod inspect;
93mod parse;
94mod rules;
95mod summarize;
96
97pub use fix::{fix, fix_with_context, AppliedFix, FixResult};
98pub use inspect::{inspect_document, InspectAdType, InspectDocumentMeta, InspectMediaFile};
99
100use std::collections::HashMap;
101
102// ── Public types ─────────────────────────────────────────────────────────────
103
104/// The VAST version as declared in the `version` attribute or inferred from
105/// document structure.
106///
107/// Covers all versions published by IAB Tech Lab: 2.0 through 4.3.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109pub enum VastVersion {
110    V2_0,
111    V3_0,
112    V4_0,
113    V4_1,
114    V4_2,
115    V4_3,
116}
117
118impl VastVersion {
119    pub fn as_str(&self) -> &'static str {
120        match self {
121            VastVersion::V2_0 => "2.0",
122            VastVersion::V3_0 => "3.0",
123            VastVersion::V4_0 => "4.0",
124            VastVersion::V4_1 => "4.1",
125            VastVersion::V4_2 => "4.2",
126            VastVersion::V4_3 => "4.3",
127        }
128    }
129
130    /// Returns true if this version is 4.x or later.
131    pub fn is_v4(&self) -> bool {
132        matches!(
133            self,
134            VastVersion::V4_0 | VastVersion::V4_1 | VastVersion::V4_2 | VastVersion::V4_3
135        )
136    }
137
138    /// Returns true if this version is at least the given version.
139    pub fn at_least(&self, other: &VastVersion) -> bool {
140        self.ordinal() >= other.ordinal()
141    }
142
143    fn ordinal(&self) -> u8 {
144        match self {
145            VastVersion::V2_0 => 0,
146            VastVersion::V3_0 => 1,
147            VastVersion::V4_0 => 2,
148            VastVersion::V4_1 => 3,
149            VastVersion::V4_2 => 4,
150            VastVersion::V4_3 => 5,
151        }
152    }
153}
154
155/// How the version was determined.
156///
157/// Version detection is a two-pass process: first the `version` attribute on
158/// the root `<VAST>` element is read (declared), then the document structure
159/// is scanned for version-specific elements (inferred). When both are
160/// available, consistency is checked and a mismatch produces a warning.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub enum DetectedVersion {
163    /// Version attribute was present and recognised.
164    Declared(VastVersion),
165    /// Version attribute was absent or unrecognised; version inferred from
166    /// document structure.
167    Inferred(VastVersion),
168    /// Both declared and inferred — may or may not agree.
169    DeclaredAndInferred {
170        declared: VastVersion,
171        inferred: VastVersion,
172        consistent: bool,
173    },
174    /// Could not determine version.
175    Unknown,
176}
177
178impl DetectedVersion {
179    /// Returns the best available version, preferring the declared value.
180    pub fn best(&self) -> Option<&VastVersion> {
181        match self {
182            DetectedVersion::Declared(v) => Some(v),
183            DetectedVersion::Inferred(v) => Some(v),
184            DetectedVersion::DeclaredAndInferred { declared, .. } => Some(declared),
185            DetectedVersion::Unknown => None,
186        }
187    }
188}
189
190/// Issue severity, based strictly on spec language.
191///
192/// Error   — spec says "must" or "required": the tag will likely fail to serve.
193/// Warning — spec says "should" or "recommended", or the feature is deprecated.
194/// Info    — advisory; not a spec violation but a known interoperability risk.
195#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
196pub enum Severity {
197    Info,
198    Warning,
199    Error,
200}
201
202impl Severity {
203    pub fn as_str(&self) -> &'static str {
204        match self {
205            Severity::Error => "error",
206            Severity::Warning => "warning",
207            Severity::Info => "info",
208        }
209    }
210}
211
212/// A single validation finding.
213#[derive(Debug, Clone)]
214pub struct Issue {
215    /// Stable rule identifier, e.g. "VAST-2.0-root-version".
216    pub id: &'static str,
217    /// Effective severity after applying any caller overrides.
218    pub severity: Severity,
219    /// Human-readable message. Static string; no allocation on the hot path.
220    pub message: &'static str,
221    /// XPath-like location in the document, e.g. `/VAST/Ad\[0\]/InLine/AdSystem`.
222    /// None when the issue applies to the document as a whole.
223    pub path: Option<String>,
224    /// Short spec reference, e.g. "IAB VAST 4.1 §3.4.1".
225    pub spec_ref: &'static str,
226    /// 1-based line number of the element that triggered this issue.
227    /// None for document-level issues (e.g. parse errors, missing root).
228    pub line: Option<u32>,
229    /// 1-based column number (byte offset within the line) of the element.
230    /// None for document-level issues.
231    pub col: Option<u32>,
232}
233
234/// Counts of issues by severity.
235///
236/// Use [`Summary::is_valid`] to check whether the document passes validation.
237/// A document is valid when `errors == 0`, regardless of warning or info count.
238#[derive(Debug, Clone, Default)]
239pub struct Summary {
240    pub errors: usize,
241    pub warnings: usize,
242    pub infos: usize,
243}
244
245impl Summary {
246    pub fn is_valid(&self) -> bool {
247        self.errors == 0
248    }
249}
250
251/// The full result of validating a VAST document.
252///
253/// Contains the detected version, all issues found, and a summary with counts.
254/// The `issues` vector is ordered by document position (depth-first traversal).
255#[derive(Debug, Clone)]
256pub struct ValidationResult {
257    pub version: DetectedVersion,
258    pub issues: Vec<Issue>,
259    pub summary: Summary,
260}
261
262// ── Rule configuration ────────────────────────────────────────────────────────
263
264/// Per-rule severity override. Mirrors Severity but adds Off.
265#[derive(Debug, Clone, Copy, PartialEq, Eq)]
266pub enum RuleLevel {
267    Error,
268    Warning,
269    Info,
270    /// Rule does not run. Produces no Issue.
271    Off,
272}
273
274/// Context passed to validate_with_context. All fields have safe defaults.
275#[derive(Debug, Clone)]
276pub struct ValidationContext {
277    /// Current wrapper chain depth. 0 = this document is the root.
278    pub wrapper_depth: u8,
279    /// Maximum allowed wrapper depth. IAB VAST 4.x recommends 5.
280    pub max_wrapper_depth: u8,
281    /// Per-rule severity overrides keyed by rule ID.
282    /// None means "use all recommended defaults".
283    pub rule_overrides: Option<HashMap<&'static str, RuleLevel>>,
284    /// Override the VAST version used for validation, ignoring the version
285    /// attribute declared in the XML. None = auto-detect from the document
286    /// (default). Useful for validating templates or tags where the version
287    /// attribute is absent or incorrect.
288    pub forced_version: Option<VastVersion>,
289}
290
291impl Default for ValidationContext {
292    fn default() -> Self {
293        Self {
294            wrapper_depth: 0,
295            max_wrapper_depth: 5,
296            rule_overrides: None,
297            forced_version: None,
298        }
299    }
300}
301
302impl ValidationContext {
303    /// Resolve the effective level for a rule, applying any override.
304    /// Returns None when the rule should be silenced (Off).
305    pub(crate) fn resolve(&self, rule_id: &'static str, default: Severity) -> Option<Severity> {
306        match &self.rule_overrides {
307            None => Some(default),
308            Some(map) => match map.get(rule_id) {
309                None => Some(default),
310                Some(RuleLevel::Off) => None,
311                Some(RuleLevel::Error) => Some(Severity::Error),
312                Some(RuleLevel::Warning) => Some(Severity::Warning),
313                Some(RuleLevel::Info) => Some(Severity::Info),
314            },
315        }
316    }
317}
318
319// ── Entry points ──────────────────────────────────────────────────────────────
320
321/// Validate a VAST XML string using default settings.
322///
323/// This is the main entry point for most callers. It runs the full rule set
324/// against the document and returns a [`ValidationResult`] containing every
325/// issue found, a detected version, and a summary.
326///
327/// # Example
328///
329/// ```rust
330/// let xml = r#"<VAST version="4.1">
331///   <Ad id="1">
332///     <InLine>
333///       <AdSystem>Example</AdSystem>
334///       <AdTitle>Test Ad</AdTitle>
335///       <AdServingId>abc123</AdServingId>
336///       <Impression>https://track.example.com/imp</Impression>
337///       <Creatives>
338///         <Creative>
339///           <UniversalAdId idRegistry="ad-id.org">UID-001</UniversalAdId>
340///           <Linear>
341///             <Duration>00:00:30</Duration>
342///             <MediaFiles>
343///               <MediaFile delivery="progressive" type="video/mp4"
344///                          width="1920" height="1080">
345///                 https://cdn.example.com/ad.mp4
346///               </MediaFile>
347///             </MediaFiles>
348///           </Linear>
349///         </Creative>
350///       </Creatives>
351///     </InLine>
352///   </Ad>
353/// </VAST>"#;
354///
355/// let result = vastlint_core::validate(xml);
356/// assert!(result.summary.is_valid());
357/// // Info-level advisories (e.g. missing Mezzanine for CTV) may be present
358/// // but the document has no errors or warnings that affect validity.
359/// assert_eq!(result.summary.errors, 0);
360/// ```
361pub fn validate(input: &str) -> ValidationResult {
362    validate_with_context(input, ValidationContext::default())
363}
364
365/// Validate a VAST XML string with caller-supplied context.
366///
367/// Use this when you need to declare wrapper chain depth or override the
368/// severity of specific rules. For simple validation, prefer [`validate`].
369///
370/// # Wrapper chain depth
371///
372/// When following a wrapper chain, pass the current depth so the
373/// [`crate::Severity::Error`] rule for `VAST-2.0-wrapper-depth` fires at the
374/// right level:
375///
376/// ```rust
377/// use vastlint_core::{ValidationContext, validate_with_context};
378///
379/// let ctx = ValidationContext {
380///     wrapper_depth: 3,
381///     max_wrapper_depth: 5,
382///     ..Default::default()
383/// };
384/// let result = validate_with_context("<VAST/>", ctx);
385/// ```
386///
387/// # Rule overrides
388///
389/// Suppress or downgrade individual rules by passing a rule override map.
390/// Rule IDs are the stable identifiers from the [`all_rules`] catalog.
391///
392/// ```rust
393/// use std::collections::HashMap;
394/// use vastlint_core::{RuleLevel, ValidationContext, validate_with_context};
395///
396/// let mut overrides = HashMap::new();
397/// // Silence the HTTP-vs-HTTPS advisory for internal tooling.
398/// overrides.insert("VAST-2.0-mediafile-https", RuleLevel::Off);
399/// // Treat a missing version attribute as a hard error.
400/// overrides.insert("VAST-2.0-root-version", RuleLevel::Error);
401///
402/// let ctx = ValidationContext {
403///     rule_overrides: Some(overrides),
404///     ..Default::default()
405/// };
406/// let result = validate_with_context("<VAST/>", ctx);
407/// ```
408pub fn validate_with_context(input: &str, context: ValidationContext) -> ValidationResult {
409    let doc = parse::parse(input);
410    let version = match context.forced_version {
411        Some(v) => DetectedVersion::Declared(v),
412        None => detect::detect_version(&doc),
413    };
414    let mut issues = Vec::new();
415    rules::run(&doc, &version, &context, &mut issues);
416    let summary = summarize::summarize(&issues);
417    ValidationResult {
418        version,
419        issues,
420        summary,
421    }
422}
423
424// ── Test helpers (integration tests only) ────────────────────────────────────
425
426/// Re-exports the internal parser for integration tests that need to verify
427/// the repaired XML round-trips without parse errors.
428#[doc(hidden)]
429pub fn _test_parse(xml: &str) -> parse::VastDocument {
430    parse::parse(xml)
431}
432
433/// The external standard or authority that a rule is derived from.
434///
435/// Mirrors the standards listed in the README. Use this to filter the catalog
436/// by authority level — e.g. alert hard on [`RuleSource::VastSpec`] violations
437/// while only logging [`RuleSource::Inferred`] advisories.
438#[derive(Debug, Clone, Copy, PartialEq, Eq)]
439pub enum RuleSource {
440    /// IAB Tech Lab VAST spec normative prose (explicit §-references)
441    VastSpec,
442    /// IAB Tech Lab VAST published XSD schemas (structural and enum constraints)
443    VastXsd,
444    /// W3C XML 1.0 well-formedness
445    Xml,
446    /// RFC 3986 URI syntax
447    Rfc3986,
448    /// IANA Media Types registry
449    IanaMediaTypes,
450    /// ISO 4217 currency codes
451    Iso4217,
452    /// Ad-ID registry format
453    AdId,
454    /// vastlint heuristic — no single external spec authority
455    Inferred,
456    /// IAB Tech Lab SIMID spec normative prose
457    SimidSpec,
458    /// Industry best practice derived from real-world ad serving patterns;
459    /// violation has a direct revenue or measurement impact.
460    IndustryBestPractice,
461}
462
463impl RuleSource {
464    /// Short stable string identifier, suitable for JSON output and display.
465    pub fn as_str(self) -> &'static str {
466        match self {
467            RuleSource::VastSpec => "VAST spec",
468            RuleSource::VastXsd => "VAST XSD",
469            RuleSource::Xml => "W3C XML 1.0",
470            RuleSource::Rfc3986 => "RFC 3986",
471            RuleSource::IanaMediaTypes => "IANA Media Types",
472            RuleSource::Iso4217 => "ISO 4217",
473            RuleSource::AdId => "Ad-ID",
474            RuleSource::Inferred => "inferred",
475            RuleSource::SimidSpec => "IAB SIMID",
476            RuleSource::IndustryBestPractice => "revenue impact",
477        }
478    }
479}
480
481/// Metadata about a single rule, as exposed by the public catalog.
482///
483/// Marked `#[non_exhaustive]` so that adding fields in future minor releases
484/// does not break downstream code that reads (but never constructs) `RuleMeta`.
485#[non_exhaustive]
486pub struct RuleMeta {
487    pub id: &'static str,
488    pub default_severity: Severity,
489    pub description: &'static str,
490    /// The external standard this rule is derived from.
491    pub source: RuleSource,
492}
493
494impl RuleMeta {
495    /// Returns `true` when violating this rule has a direct revenue or
496    /// measurement impact — lost impressions, broken tracking, zero fill.
497    ///
498    /// This covers both rules whose [`source`](RuleMeta::source) is
499    /// [`RuleSource::IndustryBestPractice`] and rules whose source is an IAB
500    /// spec standard but whose real-world consequence is measurable revenue
501    /// loss (missing `<Impression>`, dead wrapper redirect, etc.).
502    pub fn revenue_impact(&self) -> bool {
503        matches!(
504            self.id,
505            // IndustryBestPractice-sourced rules
506            "VAST-2.0-mediafile-https"
507            | "VAST-2.0-tracking-https"
508            | "VAST-2.0-duplicate-impression"
509            | "VAST-4.1-mezzanine-recommended"
510            | "VAST-4.1-vpaid-in-interactive-context"
511            | "VAST-2.0-linear-tracking-quartiles"
512            // VastSpec-sourced rules with direct revenue consequence
513            | "VAST-2.0-inline-impression"
514            | "VAST-2.0-wrapper-impression"
515            | "VAST-2.0-wrapper-vastadtaguri"
516            | "VAST-2.0-url-empty"
517            | "VAST-4.1-vpaid-apiframework"
518            | "VAST-2.0-flash-mediafile"
519        )
520    }
521}
522
523/// Returns the full catalog of known rules in definition order.
524///
525/// Use this to power `vastlint rules` output or to validate config-file rule
526/// IDs before passing them into `ValidationContext.rule_overrides`.
527pub fn all_rules() -> &'static [RuleMeta] {
528    rules::CATALOG
529}