vastlint_core/lib.rs
1//! # vastlint-core
2//!
3//! A zero-I/O VAST XML validation library. Takes a VAST XML
4//! string and returns a structured [`ValidationResult`] listing every issue
5//! found, the detected VAST version, and a summary of error/warning/info counts.
6//!
7//! The entire public surface is two functions and a handful of types:
8//!
9//! - [`validate`] -- validate with default settings (most callers want this)
10//! - [`validate_with_context`] -- validate with rule overrides or wrapper depth
11//! - [`fix`] -- fix deterministic issues and return repaired XML
12//! - [`fix_with_context`] -- fix with rule overrides or wrapper depth
13//! - [`inspect_document`] -- extract creative and wrapper metadata from one VAST XML document
14//! - [`all_rules`] -- list the full 121-rule catalog
15//!
16//! # Performance — allocator recommendation
17//!
18//! `vastlint-core` builds an owned document tree on every call (one heap
19//! allocation per XML element, attribute, and text node). Under concurrent
20//! load the system allocator becomes a bottleneck because all threads compete
21//! for a shared free-list lock.
22//!
23//! Switching to [`mimalloc`](https://docs.rs/mimalloc) in your **binary**
24//! crate eliminates this contention and gives dramatically better throughput
25//! at high concurrency, especially for larger documents:
26//!
27//! ```toml
28//! # Cargo.toml (your binary, not a library crate)
29//! [dependencies]
30//! mimalloc = { version = "0.1", default-features = false }
31//! ```
32//!
33//! ```rust,ignore
34//! // src/main.rs
35//! use mimalloc::MiMalloc;
36//! #[global_allocator]
37//! static GLOBAL: MiMalloc = MiMalloc;
38//! ```
39//!
40//! Measured on Apple M4 (10 threads, production-realistic VAST tags):
41//!
42//! | Allocator | 17 KB tag | 44 KB tag |
43//! |---|---|---|
44//! | system (default) | 1,847 tags/s · 541 µs | 328 tags/s · 3,048 µs |
45//! | mimalloc | 15,760 tags/s · 63 µs | 2,635 tags/s · 380 µs |
46//!
47//! **mimalloc: ~8× throughput improvement on multi-threaded workloads.**
48//!
49//! > ⚠️ Do **not** set a global allocator in a library crate — it would
50//! > override the allocator for any host process that links you (Go, Python,
51//! > Ruby runtimes, etc.), which can cause heap corruption.
52//!
53//! # Quick start
54//!
55//! ```rust
56//! let xml = r#"<VAST version="2.0">
57//! <Ad><InLine>
58//! <AdSystem>Demo</AdSystem>
59//! <AdTitle>Ad</AdTitle>
60//! <Impression>https://t.example.com/imp</Impression>
61//! <Creatives>
62//! <Creative>
63//! <Linear>
64//! <Duration>00:00:15</Duration>
65//! <MediaFiles>
66//! <MediaFile delivery="progressive" type="video/mp4"
67//! width="640" height="360">
68//! https://cdn.example.com/ad.mp4
69//! </MediaFile>
70//! </MediaFiles>
71//! </Linear>
72//! </Creative>
73//! </Creatives>
74//! </InLine></Ad>
75//! </VAST>"#;
76//!
77//! let result = vastlint_core::validate(xml);
78//! assert_eq!(result.summary.errors, 0);
79//! ```
80//!
81//! # Design constraints
82//!
83//! The library has no I/O, no logging, no global state, and no async runtime.
84//! It can be embedded in a CLI, HTTP server, WASM module, or FFI binding
85//! without pulling in any platform-specific dependencies.
86//!
87//! Three crate dependencies: `quick-xml` (XML parsing), `url` (RFC 3986),
88//! and `phf` (compile-time hash maps).
89
90mod detect;
91mod fix;
92mod inspect;
93mod parse;
94mod rules;
95mod summarize;
96
97pub use fix::{fix, fix_with_context, AppliedFix, FixResult};
98pub use inspect::{inspect_document, InspectAdType, InspectDocumentMeta, InspectMediaFile};
99
100use std::collections::HashMap;
101
102// ── Public types ─────────────────────────────────────────────────────────────
103
104/// The VAST version as declared in the `version` attribute or inferred from
105/// document structure.
106///
107/// Covers all versions published by IAB Tech Lab: 2.0 through 4.3.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109pub enum VastVersion {
110 V2_0,
111 V3_0,
112 V4_0,
113 V4_1,
114 V4_2,
115 V4_3,
116}
117
118impl VastVersion {
119 pub fn as_str(&self) -> &'static str {
120 match self {
121 VastVersion::V2_0 => "2.0",
122 VastVersion::V3_0 => "3.0",
123 VastVersion::V4_0 => "4.0",
124 VastVersion::V4_1 => "4.1",
125 VastVersion::V4_2 => "4.2",
126 VastVersion::V4_3 => "4.3",
127 }
128 }
129
130 /// Returns true if this version is 4.x or later.
131 pub fn is_v4(&self) -> bool {
132 matches!(
133 self,
134 VastVersion::V4_0 | VastVersion::V4_1 | VastVersion::V4_2 | VastVersion::V4_3
135 )
136 }
137
138 /// Returns true if this version is at least the given version.
139 pub fn at_least(&self, other: &VastVersion) -> bool {
140 self.ordinal() >= other.ordinal()
141 }
142
143 fn ordinal(&self) -> u8 {
144 match self {
145 VastVersion::V2_0 => 0,
146 VastVersion::V3_0 => 1,
147 VastVersion::V4_0 => 2,
148 VastVersion::V4_1 => 3,
149 VastVersion::V4_2 => 4,
150 VastVersion::V4_3 => 5,
151 }
152 }
153}
154
155/// How the version was determined.
156///
157/// Version detection is a two-pass process: first the `version` attribute on
158/// the root `<VAST>` element is read (declared), then the document structure
159/// is scanned for version-specific elements (inferred). When both are
160/// available, consistency is checked and a mismatch produces a warning.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub enum DetectedVersion {
163 /// Version attribute was present and recognised.
164 Declared(VastVersion),
165 /// Version attribute was absent or unrecognised; version inferred from
166 /// document structure.
167 Inferred(VastVersion),
168 /// Both declared and inferred — may or may not agree.
169 DeclaredAndInferred {
170 declared: VastVersion,
171 inferred: VastVersion,
172 consistent: bool,
173 },
174 /// Could not determine version.
175 Unknown,
176}
177
178impl DetectedVersion {
179 /// Returns the best available version, preferring the declared value.
180 pub fn best(&self) -> Option<&VastVersion> {
181 match self {
182 DetectedVersion::Declared(v) => Some(v),
183 DetectedVersion::Inferred(v) => Some(v),
184 DetectedVersion::DeclaredAndInferred { declared, .. } => Some(declared),
185 DetectedVersion::Unknown => None,
186 }
187 }
188}
189
190/// Issue severity, based strictly on spec language.
191///
192/// Error — spec says "must" or "required": the tag will likely fail to serve.
193/// Warning — spec says "should" or "recommended", or the feature is deprecated.
194/// Info — advisory; not a spec violation but a known interoperability risk.
195#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
196pub enum Severity {
197 Info,
198 Warning,
199 Error,
200}
201
202impl Severity {
203 pub fn as_str(&self) -> &'static str {
204 match self {
205 Severity::Error => "error",
206 Severity::Warning => "warning",
207 Severity::Info => "info",
208 }
209 }
210}
211
212/// A single validation finding.
213#[derive(Debug, Clone)]
214pub struct Issue {
215 /// Stable rule identifier, e.g. "VAST-2.0-root-version".
216 pub id: &'static str,
217 /// Effective severity after applying any caller overrides.
218 pub severity: Severity,
219 /// Human-readable message. Static string; no allocation on the hot path.
220 pub message: &'static str,
221 /// XPath-like location in the document, e.g. `/VAST/Ad\[0\]/InLine/AdSystem`.
222 /// None when the issue applies to the document as a whole.
223 pub path: Option<String>,
224 /// Short spec reference, e.g. "IAB VAST 4.1 §3.4.1".
225 pub spec_ref: &'static str,
226 /// 1-based line number of the element that triggered this issue.
227 /// None for document-level issues (e.g. parse errors, missing root).
228 pub line: Option<u32>,
229 /// 1-based column number (byte offset within the line) of the element.
230 /// None for document-level issues.
231 pub col: Option<u32>,
232}
233
234/// Counts of issues by severity.
235///
236/// Use [`Summary::is_valid`] to check whether the document passes validation.
237/// A document is valid when `errors == 0`, regardless of warning or info count.
238#[derive(Debug, Clone, Default)]
239pub struct Summary {
240 pub errors: usize,
241 pub warnings: usize,
242 pub infos: usize,
243}
244
245impl Summary {
246 pub fn is_valid(&self) -> bool {
247 self.errors == 0
248 }
249}
250
251/// The full result of validating a VAST document.
252///
253/// Contains the detected version, all issues found, and a summary with counts.
254/// The `issues` vector is ordered by document position (depth-first traversal).
255#[derive(Debug, Clone)]
256pub struct ValidationResult {
257 pub version: DetectedVersion,
258 pub issues: Vec<Issue>,
259 pub summary: Summary,
260}
261
262// ── Rule configuration ────────────────────────────────────────────────────────
263
264/// Per-rule severity override. Mirrors Severity but adds Off.
265#[derive(Debug, Clone, Copy, PartialEq, Eq)]
266pub enum RuleLevel {
267 Error,
268 Warning,
269 Info,
270 /// Rule does not run. Produces no Issue.
271 Off,
272}
273
274/// Context passed to validate_with_context. All fields have safe defaults.
275#[derive(Debug, Clone)]
276pub struct ValidationContext {
277 /// Current wrapper chain depth. 0 = this document is the root.
278 pub wrapper_depth: u8,
279 /// Maximum allowed wrapper depth. IAB VAST 4.x recommends 5.
280 pub max_wrapper_depth: u8,
281 /// Per-rule severity overrides keyed by rule ID.
282 /// None means "use all recommended defaults".
283 pub rule_overrides: Option<HashMap<&'static str, RuleLevel>>,
284 /// Override the VAST version used for validation, ignoring the version
285 /// attribute declared in the XML. None = auto-detect from the document
286 /// (default). Useful for validating templates or tags where the version
287 /// attribute is absent or incorrect.
288 pub forced_version: Option<VastVersion>,
289}
290
291impl Default for ValidationContext {
292 fn default() -> Self {
293 Self {
294 wrapper_depth: 0,
295 max_wrapper_depth: 5,
296 rule_overrides: None,
297 forced_version: None,
298 }
299 }
300}
301
302impl ValidationContext {
303 /// Resolve the effective level for a rule, applying any override.
304 /// Returns None when the rule should be silenced (Off).
305 pub(crate) fn resolve(&self, rule_id: &'static str, default: Severity) -> Option<Severity> {
306 match &self.rule_overrides {
307 None => Some(default),
308 Some(map) => match map.get(rule_id) {
309 None => Some(default),
310 Some(RuleLevel::Off) => None,
311 Some(RuleLevel::Error) => Some(Severity::Error),
312 Some(RuleLevel::Warning) => Some(Severity::Warning),
313 Some(RuleLevel::Info) => Some(Severity::Info),
314 },
315 }
316 }
317}
318
319// ── Entry points ──────────────────────────────────────────────────────────────
320
321/// Validate a VAST XML string using default settings.
322///
323/// This is the main entry point for most callers. It runs the full rule set
324/// against the document and returns a [`ValidationResult`] containing every
325/// issue found, a detected version, and a summary.
326///
327/// # Example
328///
329/// ```rust
330/// let xml = r#"<VAST version="4.1">
331/// <Ad id="1">
332/// <InLine>
333/// <AdSystem>Example</AdSystem>
334/// <AdTitle>Test Ad</AdTitle>
335/// <AdServingId>abc123</AdServingId>
336/// <Impression>https://track.example.com/imp</Impression>
337/// <Creatives>
338/// <Creative>
339/// <UniversalAdId idRegistry="ad-id.org">UID-001</UniversalAdId>
340/// <Linear>
341/// <Duration>00:00:30</Duration>
342/// <MediaFiles>
343/// <MediaFile delivery="progressive" type="video/mp4"
344/// width="1920" height="1080">
345/// https://cdn.example.com/ad.mp4
346/// </MediaFile>
347/// </MediaFiles>
348/// </Linear>
349/// </Creative>
350/// </Creatives>
351/// </InLine>
352/// </Ad>
353/// </VAST>"#;
354///
355/// let result = vastlint_core::validate(xml);
356/// assert!(result.summary.is_valid());
357/// // Info-level advisories (e.g. missing Mezzanine for CTV) may be present
358/// // but the document has no errors or warnings that affect validity.
359/// assert_eq!(result.summary.errors, 0);
360/// ```
361pub fn validate(input: &str) -> ValidationResult {
362 validate_with_context(input, ValidationContext::default())
363}
364
365/// Validate a VAST XML string with caller-supplied context.
366///
367/// Use this when you need to declare wrapper chain depth or override the
368/// severity of specific rules. For simple validation, prefer [`validate`].
369///
370/// # Wrapper chain depth
371///
372/// When following a wrapper chain, pass the current depth so the
373/// [`crate::Severity::Error`] rule for `VAST-2.0-wrapper-depth` fires at the
374/// right level:
375///
376/// ```rust
377/// use vastlint_core::{ValidationContext, validate_with_context};
378///
379/// let ctx = ValidationContext {
380/// wrapper_depth: 3,
381/// max_wrapper_depth: 5,
382/// ..Default::default()
383/// };
384/// let result = validate_with_context("<VAST/>", ctx);
385/// ```
386///
387/// # Rule overrides
388///
389/// Suppress or downgrade individual rules by passing a rule override map.
390/// Rule IDs are the stable identifiers from the [`all_rules`] catalog.
391///
392/// ```rust
393/// use std::collections::HashMap;
394/// use vastlint_core::{RuleLevel, ValidationContext, validate_with_context};
395///
396/// let mut overrides = HashMap::new();
397/// // Silence the HTTP-vs-HTTPS advisory for internal tooling.
398/// overrides.insert("VAST-2.0-mediafile-https", RuleLevel::Off);
399/// // Treat a missing version attribute as a hard error.
400/// overrides.insert("VAST-2.0-root-version", RuleLevel::Error);
401///
402/// let ctx = ValidationContext {
403/// rule_overrides: Some(overrides),
404/// ..Default::default()
405/// };
406/// let result = validate_with_context("<VAST/>", ctx);
407/// ```
408pub fn validate_with_context(input: &str, context: ValidationContext) -> ValidationResult {
409 let doc = parse::parse(input);
410 let version = match context.forced_version {
411 Some(v) => DetectedVersion::Declared(v),
412 None => detect::detect_version(&doc),
413 };
414 let mut issues = Vec::new();
415 rules::run(&doc, &version, &context, &mut issues);
416 let summary = summarize::summarize(&issues);
417 ValidationResult {
418 version,
419 issues,
420 summary,
421 }
422}
423
424// ── Test helpers (integration tests only) ────────────────────────────────────
425
426/// Re-exports the internal parser for integration tests that need to verify
427/// the repaired XML round-trips without parse errors.
428#[doc(hidden)]
429pub fn _test_parse(xml: &str) -> parse::VastDocument {
430 parse::parse(xml)
431}
432
433/// The external standard or authority that a rule is derived from.
434///
435/// Mirrors the standards listed in the README. Use this to filter the catalog
436/// by authority level — e.g. alert hard on [`RuleSource::VastSpec`] violations
437/// while only logging [`RuleSource::Inferred`] advisories.
438#[derive(Debug, Clone, Copy, PartialEq, Eq)]
439pub enum RuleSource {
440 /// IAB Tech Lab VAST spec normative prose (explicit §-references)
441 VastSpec,
442 /// IAB Tech Lab VAST published XSD schemas (structural and enum constraints)
443 VastXsd,
444 /// W3C XML 1.0 well-formedness
445 Xml,
446 /// RFC 3986 URI syntax
447 Rfc3986,
448 /// IANA Media Types registry
449 IanaMediaTypes,
450 /// ISO 4217 currency codes
451 Iso4217,
452 /// Ad-ID registry format
453 AdId,
454 /// vastlint heuristic — no single external spec authority
455 Inferred,
456 /// IAB Tech Lab SIMID spec normative prose
457 SimidSpec,
458 /// Industry best practice derived from real-world ad serving patterns;
459 /// violation has a direct revenue or measurement impact.
460 IndustryBestPractice,
461}
462
463impl RuleSource {
464 /// Short stable string identifier, suitable for JSON output and display.
465 pub fn as_str(self) -> &'static str {
466 match self {
467 RuleSource::VastSpec => "VAST spec",
468 RuleSource::VastXsd => "VAST XSD",
469 RuleSource::Xml => "W3C XML 1.0",
470 RuleSource::Rfc3986 => "RFC 3986",
471 RuleSource::IanaMediaTypes => "IANA Media Types",
472 RuleSource::Iso4217 => "ISO 4217",
473 RuleSource::AdId => "Ad-ID",
474 RuleSource::Inferred => "inferred",
475 RuleSource::SimidSpec => "IAB SIMID",
476 RuleSource::IndustryBestPractice => "revenue impact",
477 }
478 }
479}
480
481/// Metadata about a single rule, as exposed by the public catalog.
482///
483/// Marked `#[non_exhaustive]` so that adding fields in future minor releases
484/// does not break downstream code that reads (but never constructs) `RuleMeta`.
485#[non_exhaustive]
486pub struct RuleMeta {
487 pub id: &'static str,
488 pub default_severity: Severity,
489 pub description: &'static str,
490 /// The external standard this rule is derived from.
491 pub source: RuleSource,
492}
493
494impl RuleMeta {
495 /// Returns `true` when violating this rule has a direct revenue or
496 /// measurement impact — lost impressions, broken tracking, zero fill.
497 ///
498 /// This covers both rules whose [`source`](RuleMeta::source) is
499 /// [`RuleSource::IndustryBestPractice`] and rules whose source is an IAB
500 /// spec standard but whose real-world consequence is measurable revenue
501 /// loss (missing `<Impression>`, dead wrapper redirect, etc.).
502 pub fn revenue_impact(&self) -> bool {
503 matches!(
504 self.id,
505 // IndustryBestPractice-sourced rules
506 "VAST-2.0-mediafile-https"
507 | "VAST-2.0-tracking-https"
508 | "VAST-2.0-duplicate-impression"
509 | "VAST-4.1-mezzanine-recommended"
510 | "VAST-4.1-vpaid-in-interactive-context"
511 | "VAST-2.0-linear-tracking-quartiles"
512 // VastSpec-sourced rules with direct revenue consequence
513 | "VAST-2.0-inline-impression"
514 | "VAST-2.0-wrapper-impression"
515 | "VAST-2.0-wrapper-vastadtaguri"
516 | "VAST-2.0-url-empty"
517 | "VAST-4.1-vpaid-apiframework"
518 | "VAST-2.0-flash-mediafile"
519 )
520 }
521}
522
523/// Returns the full catalog of known rules in definition order.
524///
525/// Use this to power `vastlint rules` output or to validate config-file rule
526/// IDs before passing them into `ValidationContext.rule_overrides`.
527pub fn all_rules() -> &'static [RuleMeta] {
528 rules::CATALOG
529}