vastlint_core/lib.rs
1//! # vastlint-core
2//!
3//! A zero-I/O VAST XML validation library. Takes a VAST XML
4//! string and returns a structured [`ValidationResult`] listing every issue
5//! found, the detected VAST version, and a summary of error/warning/info counts.
6//!
7//! The entire public surface is two functions and a handful of types:
8//!
9//! - [`validate`] -- validate with default settings (most callers want this)
10//! - [`validate_with_context`] -- validate with rule overrides or wrapper depth
11//! - [`fix`] -- fix deterministic issues and return repaired XML
12//! - [`fix_with_context`] -- fix with rule overrides or wrapper depth
13//! - [`all_rules`] -- list the full 118-rule catalog
14//!
15//! # Performance — allocator recommendation
16//!
17//! `vastlint-core` builds an owned document tree on every call (one heap
18//! allocation per XML element, attribute, and text node). Under concurrent
19//! load the system allocator becomes a bottleneck because all threads compete
20//! for a shared free-list lock.
21//!
22//! Switching to [`mimalloc`](https://docs.rs/mimalloc) in your **binary**
23//! crate eliminates this contention and gives dramatically better throughput
24//! at high concurrency, especially for larger documents:
25//!
26//! ```toml
27//! # Cargo.toml (your binary, not a library crate)
28//! [dependencies]
29//! mimalloc = { version = "0.1", default-features = false }
30//! ```
31//!
32//! ```rust,ignore
33//! // src/main.rs
34//! use mimalloc::MiMalloc;
35//! #[global_allocator]
36//! static GLOBAL: MiMalloc = MiMalloc;
37//! ```
38//!
39//! Measured on Apple M4 (10 threads, production-realistic VAST tags):
40//!
41//! | Allocator | 17 KB tag | 44 KB tag |
42//! |---|---|---|
43//! | system (default) | 1,847 tags/s · 541 µs | 328 tags/s · 3,048 µs |
44//! | mimalloc | 15,760 tags/s · 63 µs | 2,635 tags/s · 380 µs |
45//!
46//! **mimalloc: ~8× throughput improvement on multi-threaded workloads.**
47//!
48//! > ⚠️ Do **not** set a global allocator in a library crate — it would
49//! > override the allocator for any host process that links you (Go, Python,
50//! > Ruby runtimes, etc.), which can cause heap corruption.
51//!
52//! # Quick start
53//!
54//! ```rust
55//! let xml = r#"<VAST version="2.0">
56//! <Ad><InLine>
57//! <AdSystem>Demo</AdSystem>
58//! <AdTitle>Ad</AdTitle>
59//! <Impression>https://t.example.com/imp</Impression>
60//! <Creatives>
61//! <Creative>
62//! <Linear>
63//! <Duration>00:00:15</Duration>
64//! <MediaFiles>
65//! <MediaFile delivery="progressive" type="video/mp4"
66//! width="640" height="360">
67//! https://cdn.example.com/ad.mp4
68//! </MediaFile>
69//! </MediaFiles>
70//! </Linear>
71//! </Creative>
72//! </Creatives>
73//! </InLine></Ad>
74//! </VAST>"#;
75//!
76//! let result = vastlint_core::validate(xml);
77//! assert_eq!(result.summary.errors, 0);
78//! ```
79//!
80//! # Design constraints
81//!
82//! The library has no I/O, no logging, no global state, and no async runtime.
83//! It can be embedded in a CLI, HTTP server, WASM module, or FFI binding
84//! without pulling in any platform-specific dependencies.
85//!
86//! Three crate dependencies: `quick-xml` (XML parsing), `url` (RFC 3986),
87//! and `phf` (compile-time hash maps).
88
89mod detect;
90mod fix;
91mod parse;
92mod rules;
93mod summarize;
94
95pub use fix::{fix, fix_with_context, AppliedFix, FixResult};
96
97use std::collections::HashMap;
98
99// ── Public types ─────────────────────────────────────────────────────────────
100
101/// The VAST version as declared in the `version` attribute or inferred from
102/// document structure.
103///
104/// Covers all versions published by IAB Tech Lab: 2.0 through 4.3.
105#[derive(Debug, Clone, PartialEq, Eq)]
106pub enum VastVersion {
107 V2_0,
108 V3_0,
109 V4_0,
110 V4_1,
111 V4_2,
112 V4_3,
113}
114
115impl VastVersion {
116 pub fn as_str(&self) -> &'static str {
117 match self {
118 VastVersion::V2_0 => "2.0",
119 VastVersion::V3_0 => "3.0",
120 VastVersion::V4_0 => "4.0",
121 VastVersion::V4_1 => "4.1",
122 VastVersion::V4_2 => "4.2",
123 VastVersion::V4_3 => "4.3",
124 }
125 }
126
127 /// Returns true if this version is 4.x or later.
128 pub fn is_v4(&self) -> bool {
129 matches!(
130 self,
131 VastVersion::V4_0 | VastVersion::V4_1 | VastVersion::V4_2 | VastVersion::V4_3
132 )
133 }
134
135 /// Returns true if this version is at least the given version.
136 pub fn at_least(&self, other: &VastVersion) -> bool {
137 self.ordinal() >= other.ordinal()
138 }
139
140 fn ordinal(&self) -> u8 {
141 match self {
142 VastVersion::V2_0 => 0,
143 VastVersion::V3_0 => 1,
144 VastVersion::V4_0 => 2,
145 VastVersion::V4_1 => 3,
146 VastVersion::V4_2 => 4,
147 VastVersion::V4_3 => 5,
148 }
149 }
150}
151
152/// How the version was determined.
153///
154/// Version detection is a two-pass process: first the `version` attribute on
155/// the root `<VAST>` element is read (declared), then the document structure
156/// is scanned for version-specific elements (inferred). When both are
157/// available, consistency is checked and a mismatch produces a warning.
158#[derive(Debug, Clone, PartialEq, Eq)]
159pub enum DetectedVersion {
160 /// Version attribute was present and recognised.
161 Declared(VastVersion),
162 /// Version attribute was absent or unrecognised; version inferred from
163 /// document structure.
164 Inferred(VastVersion),
165 /// Both declared and inferred — may or may not agree.
166 DeclaredAndInferred {
167 declared: VastVersion,
168 inferred: VastVersion,
169 consistent: bool,
170 },
171 /// Could not determine version.
172 Unknown,
173}
174
175impl DetectedVersion {
176 /// Returns the best available version, preferring the declared value.
177 pub fn best(&self) -> Option<&VastVersion> {
178 match self {
179 DetectedVersion::Declared(v) => Some(v),
180 DetectedVersion::Inferred(v) => Some(v),
181 DetectedVersion::DeclaredAndInferred { declared, .. } => Some(declared),
182 DetectedVersion::Unknown => None,
183 }
184 }
185}
186
187/// Issue severity, based strictly on spec language.
188///
189/// Error — spec says "must" or "required": the tag will likely fail to serve.
190/// Warning — spec says "should" or "recommended", or the feature is deprecated.
191/// Info — advisory; not a spec violation but a known interoperability risk.
192#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
193pub enum Severity {
194 Info,
195 Warning,
196 Error,
197}
198
199impl Severity {
200 pub fn as_str(&self) -> &'static str {
201 match self {
202 Severity::Error => "error",
203 Severity::Warning => "warning",
204 Severity::Info => "info",
205 }
206 }
207}
208
209/// A single validation finding.
210#[derive(Debug, Clone)]
211pub struct Issue {
212 /// Stable rule identifier, e.g. "VAST-2.0-root-version".
213 pub id: &'static str,
214 /// Effective severity after applying any caller overrides.
215 pub severity: Severity,
216 /// Human-readable message. Static string; no allocation on the hot path.
217 pub message: &'static str,
218 /// XPath-like location in the document, e.g. `/VAST/Ad\[0\]/InLine/AdSystem`.
219 /// None when the issue applies to the document as a whole.
220 pub path: Option<String>,
221 /// Short spec reference, e.g. "IAB VAST 4.1 §3.4.1".
222 pub spec_ref: &'static str,
223 /// 1-based line number of the element that triggered this issue.
224 /// None for document-level issues (e.g. parse errors, missing root).
225 pub line: Option<u32>,
226 /// 1-based column number (byte offset within the line) of the element.
227 /// None for document-level issues.
228 pub col: Option<u32>,
229}
230
231/// Counts of issues by severity.
232///
233/// Use [`Summary::is_valid`] to check whether the document passes validation.
234/// A document is valid when `errors == 0`, regardless of warning or info count.
235#[derive(Debug, Clone, Default)]
236pub struct Summary {
237 pub errors: usize,
238 pub warnings: usize,
239 pub infos: usize,
240}
241
242impl Summary {
243 pub fn is_valid(&self) -> bool {
244 self.errors == 0
245 }
246}
247
248/// The full result of validating a VAST document.
249///
250/// Contains the detected version, all issues found, and a summary with counts.
251/// The `issues` vector is ordered by document position (depth-first traversal).
252#[derive(Debug, Clone)]
253pub struct ValidationResult {
254 pub version: DetectedVersion,
255 pub issues: Vec<Issue>,
256 pub summary: Summary,
257}
258
259// ── Rule configuration ────────────────────────────────────────────────────────
260
261/// Per-rule severity override. Mirrors Severity but adds Off.
262#[derive(Debug, Clone, Copy, PartialEq, Eq)]
263pub enum RuleLevel {
264 Error,
265 Warning,
266 Info,
267 /// Rule does not run. Produces no Issue.
268 Off,
269}
270
271/// Context passed to validate_with_context. All fields have safe defaults.
272#[derive(Debug, Clone)]
273pub struct ValidationContext {
274 /// Current wrapper chain depth. 0 = this document is the root.
275 pub wrapper_depth: u8,
276 /// Maximum allowed wrapper depth. IAB VAST 4.x recommends 5.
277 pub max_wrapper_depth: u8,
278 /// Per-rule severity overrides keyed by rule ID.
279 /// None means "use all recommended defaults".
280 pub rule_overrides: Option<HashMap<&'static str, RuleLevel>>,
281}
282
283impl Default for ValidationContext {
284 fn default() -> Self {
285 Self {
286 wrapper_depth: 0,
287 max_wrapper_depth: 5,
288 rule_overrides: None,
289 }
290 }
291}
292
293impl ValidationContext {
294 /// Resolve the effective level for a rule, applying any override.
295 /// Returns None when the rule should be silenced (Off).
296 pub(crate) fn resolve(&self, rule_id: &'static str, default: Severity) -> Option<Severity> {
297 match &self.rule_overrides {
298 None => Some(default),
299 Some(map) => match map.get(rule_id) {
300 None => Some(default),
301 Some(RuleLevel::Off) => None,
302 Some(RuleLevel::Error) => Some(Severity::Error),
303 Some(RuleLevel::Warning) => Some(Severity::Warning),
304 Some(RuleLevel::Info) => Some(Severity::Info),
305 },
306 }
307 }
308}
309
310// ── Entry points ──────────────────────────────────────────────────────────────
311
312/// Validate a VAST XML string using default settings.
313///
314/// This is the main entry point for most callers. It runs the full rule set
315/// against the document and returns a [`ValidationResult`] containing every
316/// issue found, a detected version, and a summary.
317///
318/// # Example
319///
320/// ```rust
321/// let xml = r#"<VAST version="4.1">
322/// <Ad id="1">
323/// <InLine>
324/// <AdSystem>Example</AdSystem>
325/// <AdTitle>Test Ad</AdTitle>
326/// <AdServingId>abc123</AdServingId>
327/// <Impression>https://track.example.com/imp</Impression>
328/// <Creatives>
329/// <Creative>
330/// <UniversalAdId idRegistry="ad-id.org">UID-001</UniversalAdId>
331/// <Linear>
332/// <Duration>00:00:30</Duration>
333/// <MediaFiles>
334/// <MediaFile delivery="progressive" type="video/mp4"
335/// width="1920" height="1080">
336/// https://cdn.example.com/ad.mp4
337/// </MediaFile>
338/// </MediaFiles>
339/// </Linear>
340/// </Creative>
341/// </Creatives>
342/// </InLine>
343/// </Ad>
344/// </VAST>"#;
345///
346/// let result = vastlint_core::validate(xml);
347/// assert!(result.summary.is_valid());
348/// // Info-level advisories (e.g. missing Mezzanine for CTV) may be present
349/// // but the document has no errors or warnings that affect validity.
350/// assert_eq!(result.summary.errors, 0);
351/// ```
352pub fn validate(input: &str) -> ValidationResult {
353 validate_with_context(input, ValidationContext::default())
354}
355
356/// Validate a VAST XML string with caller-supplied context.
357///
358/// Use this when you need to declare wrapper chain depth or override the
359/// severity of specific rules. For simple validation, prefer [`validate`].
360///
361/// # Wrapper chain depth
362///
363/// When following a wrapper chain, pass the current depth so the
364/// [`crate::Severity::Error`] rule for `VAST-2.0-wrapper-depth` fires at the
365/// right level:
366///
367/// ```rust
368/// use vastlint_core::{ValidationContext, validate_with_context};
369///
370/// let ctx = ValidationContext {
371/// wrapper_depth: 3,
372/// max_wrapper_depth: 5,
373/// ..Default::default()
374/// };
375/// let result = validate_with_context("<VAST/>", ctx);
376/// ```
377///
378/// # Rule overrides
379///
380/// Suppress or downgrade individual rules by passing a rule override map.
381/// Rule IDs are the stable identifiers from the [`all_rules`] catalog.
382///
383/// ```rust
384/// use std::collections::HashMap;
385/// use vastlint_core::{RuleLevel, ValidationContext, validate_with_context};
386///
387/// let mut overrides = HashMap::new();
388/// // Silence the HTTP-vs-HTTPS advisory for internal tooling.
389/// overrides.insert("VAST-2.0-mediafile-https", RuleLevel::Off);
390/// // Treat a missing version attribute as a hard error.
391/// overrides.insert("VAST-2.0-root-version", RuleLevel::Error);
392///
393/// let ctx = ValidationContext {
394/// rule_overrides: Some(overrides),
395/// ..Default::default()
396/// };
397/// let result = validate_with_context("<VAST/>", ctx);
398/// ```
399pub fn validate_with_context(input: &str, context: ValidationContext) -> ValidationResult {
400 let doc = parse::parse(input);
401 let version = detect::detect_version(&doc);
402 let mut issues = Vec::new();
403 rules::run(&doc, &version, &context, &mut issues);
404 let summary = summarize::summarize(&issues);
405 ValidationResult {
406 version,
407 issues,
408 summary,
409 }
410}
411
412// ── Test helpers (integration tests only) ────────────────────────────────────
413
414/// Re-exports the internal parser for integration tests that need to verify
415/// the repaired XML round-trips without parse errors.
416#[doc(hidden)]
417pub fn _test_parse(xml: &str) -> parse::VastDocument {
418 parse::parse(xml)
419}
420
421/// The external standard or authority that a rule is derived from.
422///
423/// Mirrors the standards listed in the README. Use this to filter the catalog
424/// by authority level — e.g. alert hard on [`RuleSource::VastSpec`] violations
425/// while only logging [`RuleSource::Inferred`] advisories.
426#[derive(Debug, Clone, Copy, PartialEq, Eq)]
427pub enum RuleSource {
428 /// IAB Tech Lab VAST spec normative prose (explicit §-references)
429 VastSpec,
430 /// IAB Tech Lab VAST published XSD schemas (structural and enum constraints)
431 VastXsd,
432 /// W3C XML 1.0 well-formedness
433 Xml,
434 /// RFC 3986 URI syntax
435 Rfc3986,
436 /// IANA Media Types registry
437 IanaMediaTypes,
438 /// ISO 4217 currency codes
439 Iso4217,
440 /// Ad-ID registry format
441 AdId,
442 /// vastlint heuristic — no single external spec authority
443 Inferred,
444 /// IAB Tech Lab SIMID spec normative prose
445 SimidSpec,
446 /// Industry best practice derived from real-world ad serving patterns;
447 /// violation has a direct revenue or measurement impact.
448 IndustryBestPractice,
449}
450
451impl RuleSource {
452 /// Short stable string identifier, suitable for JSON output and display.
453 pub fn as_str(self) -> &'static str {
454 match self {
455 RuleSource::VastSpec => "VAST spec",
456 RuleSource::VastXsd => "VAST XSD",
457 RuleSource::Xml => "W3C XML 1.0",
458 RuleSource::Rfc3986 => "RFC 3986",
459 RuleSource::IanaMediaTypes => "IANA Media Types",
460 RuleSource::Iso4217 => "ISO 4217",
461 RuleSource::AdId => "Ad-ID",
462 RuleSource::Inferred => "inferred",
463 RuleSource::SimidSpec => "IAB SIMID",
464 RuleSource::IndustryBestPractice => "revenue impact",
465 }
466 }
467}
468
469/// Metadata about a single rule, as exposed by the public catalog.
470///
471/// Marked `#[non_exhaustive]` so that adding fields in future minor releases
472/// does not break downstream code that reads (but never constructs) `RuleMeta`.
473#[non_exhaustive]
474pub struct RuleMeta {
475 pub id: &'static str,
476 pub default_severity: Severity,
477 pub description: &'static str,
478 /// The external standard this rule is derived from.
479 pub source: RuleSource,
480}
481
482impl RuleMeta {
483 /// Returns `true` when violating this rule has a direct revenue or
484 /// measurement impact — lost impressions, broken tracking, zero fill.
485 ///
486 /// This covers both rules whose [`source`](RuleMeta::source) is
487 /// [`RuleSource::IndustryBestPractice`] and rules whose source is an IAB
488 /// spec standard but whose real-world consequence is measurable revenue
489 /// loss (missing `<Impression>`, dead wrapper redirect, etc.).
490 pub fn revenue_impact(&self) -> bool {
491 matches!(
492 self.id,
493 // IndustryBestPractice-sourced rules
494 "VAST-2.0-mediafile-https"
495 | "VAST-2.0-tracking-https"
496 | "VAST-2.0-duplicate-impression"
497 | "VAST-4.1-mezzanine-recommended"
498 | "VAST-4.1-vpaid-in-interactive-context"
499 | "VAST-2.0-linear-tracking-quartiles"
500 // VastSpec-sourced rules with direct revenue consequence
501 | "VAST-2.0-inline-impression"
502 | "VAST-2.0-wrapper-impression"
503 | "VAST-2.0-wrapper-vastadtaguri"
504 | "VAST-2.0-url-empty"
505 | "VAST-4.1-vpaid-apiframework"
506 | "VAST-2.0-flash-mediafile"
507 )
508 }
509}
510
511/// Returns the full catalog of known rules in definition order.
512///
513/// Use this to power `vastlint rules` output or to validate config-file rule
514/// IDs before passing them into `ValidationContext.rule_overrides`.
515pub fn all_rules() -> &'static [RuleMeta] {
516 rules::CATALOG
517}