use std::collections::BTreeSet;
use serde_json::{Map, Value};
use crate::error::{Result, ValidationError, ValidationInfo, ValidationWarning, XarfError};
use crate::schemas::{self, registry};
#[derive(Debug, Clone)]
pub struct ValidationResult {
pub valid: bool,
pub errors: Vec<ValidationError>,
pub warnings: Vec<ValidationWarning>,
pub info: Option<Vec<ValidationInfo>>,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct ValidateOptions {
pub strict: bool,
pub show_missing_optional: bool,
}
pub fn validate(data: &Value, options: ValidateOptions) -> Result<ValidationResult> {
let validator = registry().master_validator(options.strict)?;
let mut errors: Vec<ValidationError> = Vec::new();
let mut seen_errors: BTreeSet<(String, String)> = BTreeSet::new();
for err in validator.iter_errors(data) {
let field = err
.instance_path()
.as_str()
.trim_start_matches('/')
.replace('/', ".");
let message = err.to_string();
let key = (field.clone(), message.clone());
if seen_errors.insert(key) {
errors.push(ValidationError::new(field, message));
}
}
let mut warnings: Vec<ValidationWarning> = Vec::new();
let category = data.get("category").and_then(Value::as_str).unwrap_or("");
let type_name = data.get("type").and_then(Value::as_str).unwrap_or("");
if !category.is_empty() && !type_name.is_empty() {
if let Value::Object(obj) = data {
let known = known_field_names(category, type_name);
for key in obj.keys() {
if !known.contains(key.as_str()) && key != "_internal" {
warnings.push(ValidationWarning::new(
key.clone(),
format!("Unknown field '{key}' is not defined in the XARF schema"),
));
}
}
}
}
if options.strict && !warnings.is_empty() {
for w in warnings.drain(..) {
let key = (w.field.clone(), w.message.clone());
if seen_errors.insert(key) {
errors.push(ValidationError::new(w.field, w.message));
}
}
}
let info = if options.show_missing_optional && !category.is_empty() && !type_name.is_empty() {
Some(collect_missing_optional(data, category, type_name))
} else {
None
};
Ok(ValidationResult {
valid: errors.is_empty(),
errors,
warnings,
info,
})
}
fn known_field_names(category: &str, type_name: &str) -> BTreeSet<&'static str> {
let mut names: BTreeSet<&'static str> = BTreeSet::new();
for n in CORE_FIELDS.iter().copied() {
names.insert(n);
}
if let Some(set) = type_schema_fields(category, type_name) {
for n in set {
names.insert(n);
}
}
names
}
const CORE_FIELDS: &[&str] = &[
"xarf_version",
"report_id",
"timestamp",
"reporter",
"sender",
"source_identifier",
"source_port",
"category",
"type",
"evidence_source",
"evidence",
"tags",
"confidence",
"description",
"legacy_version",
"_internal",
];
fn type_schema_fields(category: &str, type_name: &str) -> Option<Vec<&'static str>> {
let schema = registry().type_schema(category, type_name)?;
let mut acc: Vec<String> = Vec::new();
collect_property_names(schema, &mut acc);
Some(acc.into_iter().map(leak_str).collect())
}
fn leak_str(s: String) -> &'static str {
Box::leak(s.into_boxed_str())
}
fn collect_property_names(schema: &Value, out: &mut Vec<String>) {
if let Some(Value::Object(props)) = schema.get("properties") {
for k in props.keys() {
if !out.iter().any(|existing| existing == k) {
out.push(k.clone());
}
}
}
if let Some(Value::Array(all_of)) = schema.get("allOf") {
for sub in all_of {
if let Some(Value::String(href)) = sub.get("$ref") {
if href.contains("../xarf-core.json") {
continue;
}
if href.contains("content-base.json") {
if let Some(base) = registry()
.type_schema("content", "phishing")
.and_then(|_| try_get_base_schema("content-base"))
{
collect_property_names(&base, out);
}
}
continue;
}
collect_property_names(sub, out);
}
}
}
fn try_get_base_schema(stem: &str) -> Option<Value> {
let uri = format!("https://xarf.org/schemas/v4/types/{stem}.json");
let reg = registry();
if stem == "content-base" {
let s = include_str!("../schemas/v4/types/content-base.json");
if let Ok(v) = serde_json::from_str::<Value>(s) {
let _ = reg.master_schema();
let _ = uri;
return Some(v);
}
}
None
}
fn collect_missing_optional(
data: &Value,
category: &str,
type_name: &str,
) -> Vec<ValidationInfo> {
let mut info: Vec<ValidationInfo> = Vec::new();
let obj = match data {
Value::Object(o) => o,
_ => return info,
};
let required = schemas::core_required_fields();
let mut core_names = schemas::core_property_names();
core_names.sort();
for name in &core_names {
if required.iter().any(|r| r == name) || name == "_internal" {
continue;
}
if obj.contains_key(name) {
continue;
}
let (prefix, description) = core_field_metadata(name);
info.push(ValidationInfo::new(
name.clone(),
format!("{prefix}: {description}"),
));
}
if let Some(type_schema) = registry().type_schema(category, type_name) {
let mut seen: BTreeSet<String> = BTreeSet::new();
collect_type_optional_fields(type_schema, &BTreeSet::new(), &mut info, obj, &mut seen);
}
info
}
fn collect_type_optional_fields(
schema: &Value,
accumulated_required: &BTreeSet<String>,
out: &mut Vec<ValidationInfo>,
data: &Map<String, Value>,
seen: &mut BTreeSet<String>,
) {
let core_fields: BTreeSet<&str> = CORE_FIELDS.iter().copied().collect();
let skip = ["category", "type", "_internal"];
let mut effective_required: BTreeSet<String> = accumulated_required.clone();
if let Some(Value::Array(arr)) = schema.get("required") {
for v in arr {
if let Some(s) = v.as_str() {
effective_required.insert(s.to_string());
}
}
}
if let Some(Value::Object(props)) = schema.get("properties") {
for (k, v) in props {
if core_fields.contains(k.as_str()) || skip.contains(&k.as_str()) {
continue;
}
if effective_required.contains(k) {
continue;
}
if data.contains_key(k) || seen.contains(k) {
continue;
}
let description = v
.get("description")
.and_then(Value::as_str)
.map(String::from)
.unwrap_or_else(|| format!("Optional field: {k}"));
let recommended = v.get("x-recommended") == Some(&Value::Bool(true));
let prefix = if recommended { "RECOMMENDED" } else { "OPTIONAL" };
seen.insert(k.clone());
out.push(ValidationInfo::new(
k.clone(),
format!("{prefix}: {description}"),
));
}
}
if let Some(Value::Array(all_of)) = schema.get("allOf") {
for sub in all_of {
if let Some(Value::String(href)) = sub.get("$ref") {
if !href.contains("-base.json") {
continue;
}
if href.contains("content-base.json") {
if let Some(base) = try_get_base_schema("content-base") {
collect_type_optional_fields(
&base,
&effective_required,
out,
data,
seen,
);
}
}
} else {
collect_type_optional_fields(sub, &effective_required, out, data, seen);
}
}
}
}
fn core_field_metadata(name: &str) -> (&'static str, String) {
let schema = registry().core_schema();
let default_msg = format!("Optional field: {name}");
let Some(props) = schema.get("properties").and_then(Value::as_object) else {
return ("OPTIONAL", default_msg);
};
let Some(field) = props.get(name).and_then(Value::as_object) else {
return ("OPTIONAL", default_msg);
};
let recommended = field.get("x-recommended") == Some(&Value::Bool(true));
let description = field
.get("description")
.and_then(Value::as_str)
.map(String::from)
.unwrap_or(default_msg);
let prefix = if recommended { "RECOMMENDED" } else { "OPTIONAL" };
(prefix, description)
}
pub fn quick_errors(data: &Value, strict: bool) -> Result<Vec<ValidationError>> {
Ok(validate(
data,
ValidateOptions {
strict,
show_missing_optional: false,
},
)?
.errors)
}
#[allow(dead_code)]
fn _used(_x: XarfError) {}