use std::collections::HashMap;
use std::sync::{Arc, OnceLock};
use jsonschema::{Retrieve, Uri, Validator};
use once_cell::sync::Lazy;
use serde_json::Value;
use crate::error::XarfError;
const CORE_SCHEMA_URI: &str = "https://xarf.org/schemas/v4/xarf-core.json";
const CORE_SCHEMA_JSON: &str = include_str!("../schemas/v4/xarf-core.json");
const MASTER_SCHEMA_URI: &str = "https://xarf.org/schemas/v4/xarf-v4-master.json";
const MASTER_SCHEMA_JSON: &str = include_str!("../schemas/v4/xarf-v4-master.json");
macro_rules! type_schemas {
( $( ($category:literal, $type_name:literal, $file:literal) ),* $(,)? ) => {
const TYPE_SCHEMA_SOURCES: &[(&str, &str, &str, &str)] = &[
$(
(
$category,
$type_name,
concat!("https://xarf.org/schemas/v4/types/", $file, ".json"),
include_str!(concat!("../schemas/v4/types/", $file, ".json")),
),
)*
];
};
}
type_schemas! {
("messaging", "spam", "messaging-spam"),
("messaging", "bulk_messaging", "messaging-bulk-messaging"),
("connection", "login_attack", "connection-login-attack"),
("connection", "port_scan", "connection-port-scan"),
("connection", "ddos", "connection-ddos"),
("connection", "scraping", "connection-scraping"),
("connection", "sql_injection", "connection-sql-injection"),
("connection", "vulnerability_scan", "connection-vulnerability-scan"),
("connection", "infected_host", "connection-infected-host"),
("connection", "reconnaissance", "connection-reconnaissance"),
("content", "phishing", "content-phishing"),
("content", "malware", "content-malware"),
("content", "fraud", "content-fraud"),
("content", "csam", "content-csam"),
("content", "csem", "content-csem"),
("content", "exposed_data", "content-exposed-data"),
("content", "brand_infringement", "content-brand_infringement"),
("content", "suspicious_registration", "content-suspicious_registration"),
("content", "remote_compromise", "content-remote_compromise"),
("copyright", "copyright", "copyright-copyright"),
("copyright", "cyberlocker", "copyright-cyberlocker"),
("copyright", "link_site", "copyright-link-site"),
("copyright", "p2p", "copyright-p2p"),
("copyright", "usenet", "copyright-usenet"),
("copyright", "ugc_platform", "copyright-ugc-platform"),
("vulnerability", "cve", "vulnerability-cve"),
("vulnerability", "misconfiguration", "vulnerability-misconfiguration"),
("vulnerability", "open_service", "vulnerability-open-service"),
("infrastructure", "botnet", "infrastructure-botnet"),
("infrastructure", "compromised_server", "infrastructure-compromised-server"),
("reputation", "blocklist", "reputation-blocklist"),
("reputation", "threat_intelligence", "reputation-threat-intelligence"),
}
const CONTENT_BASE_URI: &str = "https://xarf.org/schemas/v4/types/content-base.json";
const CONTENT_BASE_JSON: &str = include_str!("../schemas/v4/types/content-base.json");
#[derive(Clone, Debug)]
struct StaticRetriever {
documents: Arc<HashMap<String, Value>>,
}
impl Retrieve for StaticRetriever {
fn retrieve(
&self,
uri: &Uri<String>,
) -> std::result::Result<Value, Box<dyn std::error::Error + Send + Sync>> {
let key = uri.as_str();
self.documents
.get(key)
.cloned()
.ok_or_else(|| format!("unknown schema reference: {key}").into())
}
}
#[derive(Debug)]
pub struct SchemaRegistry {
core_schema: Value,
master_schema: Value,
master_schema_strict: Value,
type_schemas: HashMap<(String, String), Value>,
retriever: StaticRetriever,
retriever_strict: StaticRetriever,
master_validator: OnceLock<Validator>,
master_validator_strict: OnceLock<Validator>,
}
impl SchemaRegistry {
fn build() -> Result<Self, XarfError> {
let core_schema: Value = serde_json::from_str(CORE_SCHEMA_JSON)
.map_err(|e| XarfError::Schema(format!("core schema parse: {e}")))?;
let master_schema: Value = serde_json::from_str(MASTER_SCHEMA_JSON)
.map_err(|e| XarfError::Schema(format!("master schema parse: {e}")))?;
let content_base: Value = serde_json::from_str(CONTENT_BASE_JSON)
.map_err(|e| XarfError::Schema(format!("content-base parse: {e}")))?;
let mut type_schemas = HashMap::new();
let mut documents = HashMap::new();
documents.insert(CORE_SCHEMA_URI.to_string(), core_schema.clone());
documents.insert(MASTER_SCHEMA_URI.to_string(), master_schema.clone());
documents.insert(CONTENT_BASE_URI.to_string(), content_base.clone());
for (category, type_name, uri, source) in TYPE_SCHEMA_SOURCES {
let parsed: Value = serde_json::from_str(source).map_err(|e| {
XarfError::Schema(format!(
"type schema parse failed for {category}/{type_name}: {e}"
))
})?;
documents.insert(uri.to_string(), parsed.clone());
type_schemas.insert(((*category).to_string(), (*type_name).to_string()), parsed);
}
let mut master_schema_strict = master_schema.clone();
promote_recommended_to_required(&mut master_schema_strict);
let mut documents_strict: HashMap<String, Value> = documents
.iter()
.map(|(k, v)| {
let mut clone = v.clone();
promote_recommended_to_required(&mut clone);
(k.clone(), clone)
})
.collect();
documents_strict.insert(MASTER_SCHEMA_URI.to_string(), master_schema_strict.clone());
Ok(Self {
core_schema,
master_schema,
master_schema_strict,
type_schemas,
retriever: StaticRetriever {
documents: Arc::new(documents),
},
retriever_strict: StaticRetriever {
documents: Arc::new(documents_strict),
},
master_validator: OnceLock::new(),
master_validator_strict: OnceLock::new(),
})
}
pub fn core_schema(&self) -> &Value {
&self.core_schema
}
pub fn master_schema(&self) -> &Value {
&self.master_schema
}
pub fn type_schema(&self, category: &str, type_name: &str) -> Option<&Value> {
self.type_schemas
.get(&(category.to_string(), type_name.to_string()))
}
pub fn master_validator(&self, strict: bool) -> Result<&Validator, XarfError> {
let cell = if strict {
&self.master_validator_strict
} else {
&self.master_validator
};
if let Some(v) = cell.get() {
return Ok(v);
}
let compiled = if strict {
self.compile(&self.master_schema_strict, true)?
} else {
self.compile(&self.master_schema, false)?
};
Ok(cell.get_or_init(|| compiled))
}
pub fn core_validator(&self) -> Result<Validator, XarfError> {
self.compile(&self.core_schema, false)
}
pub fn type_validator(
&self,
category: &str,
type_name: &str,
) -> Result<Option<Validator>, XarfError> {
let Some(schema) = self.type_schema(category, type_name) else {
return Ok(None);
};
self.compile(schema, false).map(Some)
}
fn compile(&self, schema: &Value, strict: bool) -> Result<Validator, XarfError> {
let retriever = if strict {
self.retriever_strict.clone()
} else {
self.retriever.clone()
};
jsonschema::options()
.with_retriever(retriever)
.should_validate_formats(true)
.build(schema)
.map_err(|e| XarfError::Schema(format!("schema compile: {e}")))
}
pub fn known_combinations(&self) -> impl Iterator<Item = (&str, &str)> {
self.type_schemas
.keys()
.map(|(c, t)| (c.as_str(), t.as_str()))
}
pub fn is_known_combination(&self, category: &str, type_name: &str) -> bool {
self.type_schemas
.contains_key(&(category.to_string(), type_name.to_string()))
}
}
pub fn registry() -> &'static SchemaRegistry {
static REGISTRY: Lazy<SchemaRegistry> = Lazy::new(|| {
SchemaRegistry::build().expect("bundled XARF schemas must parse")
});
®ISTRY
}
pub fn core_property_names() -> Vec<String> {
registry()
.core_schema()
.get("properties")
.and_then(Value::as_object)
.map(|obj| obj.keys().cloned().collect())
.unwrap_or_default()
}
pub(crate) fn promote_recommended_to_required(node: &mut Value) {
let Value::Object(map) = node else {
return;
};
let mut to_add: Vec<String> = Vec::new();
if let Some(Value::Object(props)) = map.get("properties") {
for (k, v) in props {
if let Value::Object(pmap) = v {
if pmap.get("x-recommended") == Some(&Value::Bool(true)) {
to_add.push(k.clone());
}
}
}
}
if !to_add.is_empty() {
let mut existing: Vec<String> = match map.get("required") {
Some(Value::Array(arr)) => arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect(),
_ => Vec::new(),
};
for name in to_add {
if !existing.contains(&name) {
existing.push(name);
}
}
map.insert(
"required".to_string(),
Value::Array(existing.into_iter().map(Value::String).collect()),
);
}
for key in ["properties", "$defs"] {
if let Some(Value::Object(sub)) = map.get_mut(key) {
for value in sub.values_mut() {
promote_recommended_to_required(value);
}
}
}
for key in ["allOf", "anyOf", "oneOf"] {
if let Some(Value::Array(arr)) = map.get_mut(key) {
for item in arr.iter_mut() {
promote_recommended_to_required(item);
}
}
}
for key in ["items", "if", "then", "else", "not", "additionalProperties"] {
if let Some(child) = map.get_mut(key) {
if child.is_object() {
promote_recommended_to_required(child);
}
}
}
}
pub fn core_required_fields() -> Vec<String> {
registry()
.core_schema()
.get("required")
.and_then(Value::as_array)
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default()
}