Skip to main content

xarf/
validator.rs

1//! Schema-based validator for raw XARF v4 report JSON.
2//!
3//! Performs three jobs on a single pass:
4//!
5//! 1. Runs the bundled JSON Schema (master + type-specific) against the input.
6//! 2. Detects unknown fields and reports them as warnings (or errors in strict
7//!    mode).
8//! 3. Optionally enumerates missing optional/recommended fields as
9//!    informational hints.
10//!
11//! This mirrors the Python `XARFValidator` / TS `XARFValidator` reference
12//! implementations.
13
14use std::collections::BTreeSet;
15
16use serde_json::Value;
17
18use crate::error::{Result, ValidationError, ValidationInfo, ValidationWarning};
19use crate::schemas::registry;
20
21/// Outcome of [`validate`]. `valid` is `true` iff `errors` is empty.
22#[derive(Debug, Clone)]
23pub struct ValidationResult {
24    pub valid: bool,
25    pub errors: Vec<ValidationError>,
26    pub warnings: Vec<ValidationWarning>,
27    pub info: Option<Vec<ValidationInfo>>,
28}
29
30/// Options for [`validate`].
31#[derive(Debug, Clone, Copy, Default)]
32pub struct ValidateOptions {
33    /// When `true`, fields marked `x-recommended: true` in the schema are
34    /// treated as required and unknown-field warnings are promoted to errors.
35    pub strict: bool,
36    /// When `true`, [`ValidationResult::info`] is populated with details on
37    /// every absent optional/recommended field.
38    pub show_missing_optional: bool,
39}
40
41/// Validate raw report data against the bundled XARF v4 schemas.
42///
43/// Accepts a `serde_json::Value` so callers can pre-parse from string or
44/// build dynamically. Returns a [`ValidationResult`]; never panics on bad
45/// data.
46pub fn validate(data: &Value, options: ValidateOptions) -> Result<ValidationResult> {
47    // ------------------------------------------------------------------
48    // Step 1 — JSON Schema validation against the (cached) master schema
49    // ------------------------------------------------------------------
50    let validator = registry().master_validator(options.strict)?;
51
52    let mut errors: Vec<ValidationError> = Vec::new();
53    let mut seen_errors: BTreeSet<(String, String)> = BTreeSet::new();
54    for err in validator.iter_errors(data) {
55        let field = err
56            .instance_path()
57            .as_str()
58            .trim_start_matches('/')
59            .replace('/', ".");
60        let message = err.to_string();
61        let key = (field.clone(), message.clone());
62        if seen_errors.insert(key) {
63            errors.push(ValidationError::new(field, message));
64        }
65    }
66
67    // ------------------------------------------------------------------
68    // Step 2 — Unknown-field detection (warnings, or errors in strict mode)
69    // ------------------------------------------------------------------
70    let mut warnings: Vec<ValidationWarning> = Vec::new();
71    let category = data.get("category").and_then(Value::as_str).unwrap_or("");
72    let type_name = data.get("type").and_then(Value::as_str).unwrap_or("");
73
74    if !category.is_empty() && !type_name.is_empty() {
75        if let Value::Object(obj) = data {
76            let type_fields = registry().type_known_fields(category, type_name);
77            for key in obj.keys() {
78                if key == "_internal" || is_known_field(key, type_fields) {
79                    continue;
80                }
81                warnings.push(ValidationWarning::new(
82                    key.clone(),
83                    format!("Unknown field '{key}' is not defined in the XARF schema"),
84                ));
85            }
86        }
87    }
88
89    // Strict mode: promote unknown-field warnings to errors.
90    if options.strict && !warnings.is_empty() {
91        for w in warnings.drain(..) {
92            let key = (w.field.clone(), w.message.clone());
93            if seen_errors.insert(key) {
94                errors.push(ValidationError::new(w.field, w.message));
95            }
96        }
97    }
98
99    // ------------------------------------------------------------------
100    // Step 3 — Missing optional/recommended-field discovery
101    // ------------------------------------------------------------------
102    let info = if options.show_missing_optional && !category.is_empty() && !type_name.is_empty() {
103        Some(collect_missing_optional(data, category, type_name))
104    } else {
105        None
106    };
107
108    Ok(ValidationResult {
109        valid: errors.is_empty(),
110        errors,
111        warnings,
112        info,
113    })
114}
115
116/// Check whether `key` is a known field for `(category, type)`. Looks up the
117/// type's precomputed sorted field list (which already includes core fields);
118/// no allocation, single binary search.
119fn is_known_field(key: &str, type_fields: Option<&[String]>) -> bool {
120    type_fields
121        .map(|fs| fs.binary_search_by(|n| n.as_str().cmp(key)).is_ok())
122        .unwrap_or_else(|| CORE_FIELD_NAMES.binary_search(&key).is_ok())
123}
124
125/// Alphabetically-sorted list of core fields, for the fallback path when the
126/// `(category, type)` combo is unknown.
127const CORE_FIELD_NAMES: &[&str] = &[
128    "_internal",
129    "category",
130    "confidence",
131    "description",
132    "evidence",
133    "evidence_source",
134    "legacy_version",
135    "report_id",
136    "reporter",
137    "sender",
138    "source_identifier",
139    "source_port",
140    "tags",
141    "timestamp",
142    "type",
143    "xarf_version",
144];
145
146/// Collect informational entries for every optional/recommended field that is
147/// absent from `data`. Order follows: core fields (alphabetic) then type
148/// fields (insertion order from the schema).
149fn collect_missing_optional(data: &Value, category: &str, type_name: &str) -> Vec<ValidationInfo> {
150    let mut info: Vec<ValidationInfo> = Vec::new();
151    let Value::Object(obj) = data else {
152        return info;
153    };
154    let reg = registry();
155
156    for meta in reg.core_optional_fields() {
157        if obj.contains_key(&meta.name) {
158            continue;
159        }
160        info.push(meta_to_info(meta));
161    }
162
163    if let Some(opt) = reg.type_optional_fields(category, type_name) {
164        for meta in opt {
165            if obj.contains_key(&meta.name) {
166                continue;
167            }
168            info.push(meta_to_info(meta));
169        }
170    }
171
172    info
173}
174
175fn meta_to_info(meta: &crate::schemas::FieldMeta) -> ValidationInfo {
176    let prefix = if meta.recommended {
177        "RECOMMENDED"
178    } else {
179        "OPTIONAL"
180    };
181    ValidationInfo::new(meta.name.clone(), format!("{prefix}: {}", meta.description))
182}
183
184/// Convenience wrapper that returns just the `errors` list, useful for
185/// quick checks. Equivalent to `validate(...).errors`.
186pub fn quick_errors(data: &Value, strict: bool) -> Result<Vec<ValidationError>> {
187    Ok(validate(
188        data,
189        ValidateOptions {
190            strict,
191            show_missing_optional: false,
192        },
193    )?
194    .errors)
195}