use super::util::example_path_or_basename;
use crate::compiler::prelude::*;
#[cfg(not(target_arch = "wasm32"))]
use std::path::PathBuf;
use std::sync::LazyLock;
static EXAMPLE_JSON_SCHEMA_VALID_EMAIL: LazyLock<&str> = LazyLock::new(|| {
let path =
example_path_or_basename("jsonschema/validate_json_schema/schema_with_email_format.json");
Box::leak(
format!(
r#"validate_json_schema!(s'{{ "productUser": "valid@email.com" }}', "{path}", false)"#
)
.into_boxed_str(),
)
});
static EXAMPLE_JSON_SCHEMA_INVALID_EMAIL: LazyLock<&str> = LazyLock::new(|| {
let path =
example_path_or_basename("jsonschema/validate_json_schema/schema_with_email_format.json");
Box::leak(
format!(
r#"validate_json_schema!(s'{{ "productUser": "invalidEmail" }}', "{path}", false)"#
)
.into_boxed_str(),
)
});
static EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_FALSE: LazyLock<&str> = LazyLock::new(|| {
let path =
example_path_or_basename("jsonschema/validate_json_schema/schema_with_custom_format.json");
Box::leak(
format!(r#"validate_json_schema!(s'{{ "productUser": "a-custom-formatted-string" }}', "{path}", false)"#)
.into_boxed_str(),
)
});
static EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_TRUE: LazyLock<&str> = LazyLock::new(|| {
let path =
example_path_or_basename("jsonschema/validate_json_schema/schema_with_custom_format.json");
Box::leak(
format!(r#"validate_json_schema!(s'{{ "productUser": "a-custom-formatted-string" }}', "{path}", true)"#)
.into_boxed_str(),
)
});
static EXAMPLES: LazyLock<Vec<Example>> = LazyLock::new(|| {
vec![
example! {
title: "Payload contains a valid email",
source: &EXAMPLE_JSON_SCHEMA_VALID_EMAIL,
result: Ok("true"),
},
example! {
title: "Payload contains an invalid email",
source: &EXAMPLE_JSON_SCHEMA_INVALID_EMAIL,
result: Err(Box::leak(
format!(
r#"function call error for "validate_json_schema" at (0:{}): JSON schema validation failed: "invalidEmail" is not a "email" at /productUser"#,
EXAMPLE_JSON_SCHEMA_INVALID_EMAIL.len()
)
.into_boxed_str(),
)),
},
example! {
title: "Payload contains a custom format declaration",
source: &EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_FALSE,
result: Err(Box::leak(
format!(
r#"function call error for "validate_json_schema" at (0:{}): Failed to compile schema: Unknown format: 'my-custom-format'. Adjust configuration to ignore unrecognized formats"#,
EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_FALSE.len()
)
.into_boxed_str(),
)),
},
example! {
title: "Payload contains a custom format declaration, with ignore_unknown_formats set to true",
source: &EXAMPLE_JSON_SCHEMA_CUSTOM_FORMAT_TRUE,
result: Ok("true"),
},
]
});
#[cfg(not(target_arch = "wasm32"))]
use non_wasm::ValidateJsonSchemaFn;
#[derive(Clone, Copy, Debug)]
pub struct ValidateJsonSchema;
impl Function for ValidateJsonSchema {
fn identifier(&self) -> &'static str {
"validate_json_schema"
}
fn usage(&self) -> &'static str {
"Check if `value` conforms to a JSON Schema definition. This function validates a JSON payload against a JSON Schema definition. It can be used to ensure that the data structure and types in `value` match the expectations defined in `schema_definition`."
}
fn category(&self) -> &'static str {
Category::Type.as_ref()
}
fn internal_failure_reasons(&self) -> &'static [&'static str] {
&[
"`value` is not a valid JSON Schema payload.",
"`value` contains custom format declarations and `ignore_unknown_formats` has not been set to `true`.",
"`schema_definition` is not a valid JSON Schema definition.",
"`schema_definition` file does not exist.",
]
}
fn return_kind(&self) -> u16 {
kind::BOOLEAN
}
fn return_rules(&self) -> &'static [&'static str] {
&[
"Returns `true` if `value` conforms to the JSON Schema definition.",
"Returns `false` if `value` does not conform to the JSON Schema definition.",
]
}
fn notices(&self) -> &'static [&'static str] {
&[indoc! {"
This function uses a compiled schema cache. The first time it is called with a specific
`schema_definition`, it will compile the schema and cache it for subsequent calls. This
improves performance when validating multiple values against the same schema. The cache
implementation is fairly naive and does not support refreshing the schema if it changes.
If you update the schema definition file, you must restart Vector to clear the cache.
"}]
}
fn examples(&self) -> &'static [Example] {
EXAMPLES.as_slice()
}
fn parameters(&self) -> &'static [Parameter] {
const PARAMETERS: &[Parameter] = &[
Parameter::required(
"value",
kind::BYTES,
"The value to check if it conforms to the JSON schema definition.",
),
Parameter::required(
"schema_definition",
kind::BYTES,
"The location (path) of the JSON Schema definition.",
),
Parameter::optional(
"ignore_unknown_formats",
kind::BOOLEAN,
"Unknown formats can be silently ignored by setting this to `true` and validation continues without failing due to those fields.",
),
];
PARAMETERS
}
#[cfg(not(target_arch = "wasm32"))]
fn compile(
&self,
state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");
let schema_definition = arguments.required_literal("schema_definition", state)?;
let ignore_unknown_formats = arguments
.optional("ignore_unknown_formats")
.unwrap_or(expr!(false));
let schema_file_str = schema_definition
.try_bytes_utf8_lossy()
.expect("schema definition file must be a string");
let schema_file_path = std::path::Path::new(schema_file_str.as_ref());
Ok(ValidateJsonSchemaFn {
value,
schema_path: PathBuf::from(schema_file_path),
ignore_unknown_formats,
}
.as_expr())
}
#[cfg(target_arch = "wasm32")]
fn compile(
&self,
_state: &state::TypeState,
ctx: &mut FunctionCompileContext,
_arguments: ArgumentList,
) -> Compiled {
Ok(super::WasmUnsupportedFunction::new(ctx.span(), TypeDef::bytes().fallible()).as_expr())
}
}
#[cfg(not(target_arch = "wasm32"))]
mod non_wasm {
use super::{
Context, Expression, FunctionExpression, Resolved, TypeDef, VrlValueConvert, state,
};
use crate::prelude::ExpressionError;
use crate::stdlib::json_utils::bom::StripBomFromUTF8;
use crate::value;
use jsonschema;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, LazyLock, RwLock};
static SCHEMA_CACHE: LazyLock<RwLock<HashMap<PathBuf, Arc<jsonschema::Validator>>>> =
LazyLock::new(|| RwLock::new(HashMap::new()));
#[derive(Debug, Clone)]
pub(super) struct ValidateJsonSchemaFn {
pub(super) value: Box<dyn Expression>,
pub(super) schema_path: PathBuf, pub(super) ignore_unknown_formats: Box<dyn Expression>,
}
impl FunctionExpression for ValidateJsonSchemaFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let value = self.value.resolve(ctx)?;
let ignore_unknown_formats = self.ignore_unknown_formats.resolve(ctx)?.try_boolean()?;
let bytes = value.try_bytes()?;
let stripped_bytes = bytes.strip_bom();
if bytes.is_empty() {
return Err(ExpressionError::from("Empty JSON value")); }
let json_value = if stripped_bytes.is_empty() {
serde_json::Value::Null
} else {
serde_json::from_slice(stripped_bytes).map_err(|e| format!("Invalid JSON: {e}"))?
};
let schema_validator =
get_or_compile_schema(&self.schema_path, ignore_unknown_formats)?;
let validation_errors = schema_validator
.iter_errors(&json_value)
.map(|e| {
format!(
"{} at {}",
e,
if e.instance_path().as_str().is_empty() {
"/"
} else {
e.instance_path().as_str()
}
)
})
.collect::<Vec<String>>()
.join(", ");
if validation_errors.is_empty() {
Ok(value!(true))
} else {
Err(ExpressionError::from(format!(
"JSON schema validation failed: {validation_errors}"
)))
}
}
fn type_def(&self, _: &state::TypeState) -> TypeDef {
TypeDef::boolean().fallible()
}
}
pub(super) fn get_json_schema_definition(path: &Path) -> Result<serde_json::Value, String> {
let b = std::fs::read(path).map_err(|e| {
format!(
"Failed to open schema definition file '{}': {e}",
path.display()
)
})?;
let schema: serde_json::Value = serde_json::from_slice(&b).map_err(|e| {
format!(
"Failed to parse schema definition file '{}': {e}",
path.display()
)
})?;
Ok(schema)
}
pub(super) fn get_or_compile_schema(
schema_path: &Path,
ignore_unknown_formats: bool,
) -> Result<Arc<jsonschema::Validator>, String> {
{
let cache = SCHEMA_CACHE.read().unwrap();
if let Some(schema) = cache.get(schema_path) {
return Ok(schema.clone());
}
}
let mut cache = SCHEMA_CACHE.write().unwrap();
if let Some(schema) = cache.get(schema_path) {
return Ok(schema.clone());
}
let schema_definition = get_json_schema_definition(schema_path)
.map_err(|e| format!("JSON schema not found: {e}"))?;
let compiled_schema = jsonschema::options()
.should_validate_formats(true)
.should_ignore_unknown_formats(ignore_unknown_formats)
.build(&schema_definition)
.map_err(|e| format!("Failed to compile schema: {e}"))?;
let compiled_schema = Arc::new(compiled_schema);
cache.insert(schema_path.to_path_buf(), compiled_schema.clone());
Ok(compiled_schema)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::value;
use std::env;
fn test_data_dir() -> PathBuf {
PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap()).join("tests/data/jsonschema/")
}
test_function![
validate_json_schema => ValidateJsonSchema;
valid_with_email_format_json {
args: func_args![
value: value!("{\"productUser\":\"email@domain.com\"}"),
schema_definition: test_data_dir().join("validate_json_schema/schema_with_email_format.json").to_str().unwrap().to_owned(),
ignore_unknown_formats: false],
want: Ok(value!(true)),
tdef: TypeDef::boolean().fallible(),
}
valid_with_array_of_things_json {
args: func_args![
value: value!("{\"fruits\":[\"apple\",\"orange\",\"pear\"],\"vegetables\":[{\"veggieName\":\"potato\",\"veggieLike\":true},{\"veggieName\":\"broccoli\",\"veggieLike\":false}]}"),
schema_definition: test_data_dir().join("validate_json_schema/schema_arrays_of_things.json").to_str().unwrap().to_owned(),
ignore_unknown_formats: false],
want: Ok(value!(true)),
tdef: TypeDef::boolean().fallible(),
}
invalid_email_json {
args: func_args![
value: value!("{\"productUser\":\"invalid-email\"}"),
schema_definition: test_data_dir().join("validate_json_schema/schema_with_email_format.json").to_str().unwrap().to_owned(),
ignore_unknown_formats: false],
want: Err("JSON schema validation failed: \"invalid-email\" is not a \"email\" at /productUser"),
tdef: TypeDef::boolean().fallible(),
}
custom_format_ignored_json {
args: func_args![
value: value!("{\"productUser\":\"just-a-string\"}"),
schema_definition: test_data_dir().join("validate_json_schema/schema_with_custom_format.json").to_str().unwrap().to_owned(),
ignore_unknown_formats: true],
want: Ok(value!(true)),
tdef: TypeDef::boolean().fallible(),
}
invalid_empty_json {
args: func_args![
value: value!(""),
schema_definition: test_data_dir().join("validate_json_schema/schema_with_email_format.json").to_str().unwrap().to_owned(),
ignore_unknown_formats: false],
want: Err("Empty JSON value"),
tdef: TypeDef::boolean().fallible(),
}
];
}