use std::path::Path;
use std::time::Duration;
use reqwest::Url;
use reqwest::blocking::Client;
use saphyr::LoadableYamlNode;
use saphyr::MarkedYaml;
use saphyr::Scalar;
use saphyr::YamlData;
use url::Url as ParseUrl;
use crate::Error;
use crate::Number;
use crate::Result;
use crate::RootSchema;
use crate::schemas::BooleanOrSchema;
use crate::schemas::YamlSchema;
use crate::utils::format_marker;
use crate::utils::try_unwrap_saphyr_scalar;
pub fn load_file<S: AsRef<str>>(path: S) -> Result<RootSchema> {
let fs_metadata = std::fs::metadata(path.as_ref())?;
if !fs_metadata.is_file() {
return Err(Error::FileNotFound(path.as_ref().to_string()));
}
let s = std::fs::read_to_string(path.as_ref())?;
let mut root = load_from_str(&s)?;
let canonical = Path::new(path.as_ref()).canonicalize()?;
root.base_uri = Some(
ParseUrl::from_file_path(canonical)
.map_err(|_| Error::GenericError("Failed to convert file path to URL".to_string()))?,
);
Ok(root)
}
pub fn load_from_str(s: &str) -> Result<RootSchema> {
let docs = MarkedYaml::load_from_str(s).map_err(Error::YamlParsingError)?;
load_from_docs(docs)
}
pub fn load_from_docs<'f>(docs: Vec<MarkedYaml<'f>>) -> Result<RootSchema> {
let Some(first_doc) = docs.first() else {
return Ok(RootSchema::empty());
};
load_from_doc(first_doc)
}
pub fn load_from_doc<'f>(doc: &MarkedYaml<'f>) -> Result<RootSchema> {
RootSchema::try_from(doc)
}
#[derive(thiserror::Error, Debug)]
pub enum UrlLoadError {
#[error("Failed to download from URL: {0}")]
DownloadError(#[from] reqwest::Error),
#[error("Failed to parse URL: {0}")]
ParseUrlError(#[from] url::ParseError),
#[error("Failed to parse YAML: {0}")]
ParseError(#[from] saphyr::ScanError),
#[error("No YAML documents found in the downloaded content")]
NoDocuments,
}
impl From<reqwest::Error> for crate::Error {
fn from(value: reqwest::Error) -> Self {
crate::Error::UrlLoadError(UrlLoadError::DownloadError(value))
}
}
pub fn load_from_content(content: &str, base_uri: Option<ParseUrl>) -> Result<RootSchema> {
let docs = MarkedYaml::load_from_str(content).map_err(Error::YamlParsingError)?;
let doc = docs
.first()
.ok_or_else(|| crate::generic_error!("No YAML documents in content"))?;
let mut root = load_from_doc(doc)?;
root.base_uri = base_uri;
Ok(root)
}
pub fn load_external_schema(doc_url: &str) -> Result<RootSchema> {
let parsed = ParseUrl::parse(doc_url).map_err(|e| Error::UrlLoadError(e.into()))?;
match parsed.scheme() {
"file" => {
let path = parsed
.to_file_path()
.map_err(|_| Error::GenericError("Invalid file URL".to_string()))?;
let path_str = path
.to_str()
.ok_or_else(|| Error::GenericError("Non-UTF-8 file path".to_string()))?;
load_file(path_str)
}
"http" | "https" => {
let (content, url) = fetch_url(doc_url, None)?;
load_from_content(&content, Some(url))
}
_ => Err(Error::GenericError(format!(
"Unsupported URL scheme for $ref: {}",
parsed.scheme()
))),
}
}
pub fn extract_dollar_schema_from_yaml(contents: &str) -> Result<Option<String>> {
let docs = MarkedYaml::load_from_str(contents).map_err(Error::YamlParsingError)?;
let Some(first) = docs.first() else {
return Ok(None);
};
match &first.data {
YamlData::Mapping(mapping) => {
let key = MarkedYaml::value_from_str("$schema");
match mapping.get(&key) {
Some(v) => Ok(Some(marked_yaml_to_string(v, "$schema must be a string")?)),
None => Ok(None),
}
}
_ => Ok(None),
}
}
pub fn load_root_schema_from_ref(
schema_ref: &str,
instance_parent: &Path,
) -> Result<(RootSchema, String)> {
let trimmed = schema_ref.trim();
if trimmed.is_empty() {
return Err(crate::generic_error!("$schema value is empty"));
}
let root = match ParseUrl::parse(trimmed) {
Ok(parsed) if matches!(parsed.scheme(), "http" | "https" | "file") => {
load_external_schema(trimmed)?
}
Ok(parsed) => {
return Err(crate::generic_error!(
"Unsupported URL scheme in $schema: {}",
parsed.scheme()
));
}
Err(_) => {
let path = Path::new(trimmed);
let resolved = if path.is_absolute() {
path.to_path_buf()
} else {
instance_parent.join(path)
};
let path_str = resolved
.to_str()
.ok_or_else(|| Error::GenericError("Non-UTF-8 schema path".to_string()))?;
load_file(path_str)?
}
};
let fallback = root
.base_uri
.as_ref()
.map(|u| u.to_string())
.ok_or_else(|| {
Error::GenericError("Internal error: loaded schema missing base URI".to_string())
})?;
Ok((root, fallback))
}
pub fn fetch_url(url_string: &str, timeout_seconds: Option<u64>) -> Result<(String, Url)> {
let url_owned = url_string.to_string();
let timeout = Duration::from_secs(timeout_seconds.unwrap_or(30));
std::thread::spawn(move || {
let client = Client::builder()
.timeout(timeout)
.use_native_tls()
.build()?;
let url = Url::parse(&url_owned).map_err(|e| Error::UrlLoadError(e.into()))?;
let response = client.get(url.clone()).send()?;
if !response.status().is_success() {
match response.error_for_status() {
Ok(_) => unreachable!(),
Err(e) => return Err(e.into()),
}
}
let content = response.text()?;
Ok((content, url))
})
.join()
.unwrap_or_else(|_| {
Err(Error::GenericError(
"HTTP fetch thread panicked".to_string(),
))
})
}
pub fn download_from_url(url_string: &str, timeout_seconds: Option<u64>) -> Result<RootSchema> {
let (yaml_content, url) = fetch_url(url_string, timeout_seconds)?;
let docs = MarkedYaml::load_from_str(&yaml_content).map_err(UrlLoadError::ParseError)?;
match docs.first() {
Some(doc) => {
let mut root = load_from_doc(doc)?;
root.base_uri = Some(url);
Ok(root)
}
None => Err(UrlLoadError::NoDocuments.into()),
}
}
pub fn marked_yaml_to_string<S: Into<String> + Copy>(yaml: &MarkedYaml, msg: S) -> Result<String> {
if let YamlData::Value(Scalar::String(s)) = &yaml.data {
Ok(s.to_string())
} else {
Err(Error::ExpectedScalar(msg.into()))
}
}
pub fn load_array_of_schemas_marked<'f>(value: &MarkedYaml<'f>) -> Result<Vec<YamlSchema>> {
if let YamlData::Sequence(values) = &value.data {
values
.iter()
.map(|v| {
if v.is_mapping() {
v.try_into()
} else {
Err(generic_error!("Expected a mapping, but got: {:?}", v))
}
})
.collect::<Result<Vec<YamlSchema>>>()
} else {
Err(generic_error!(
"{} Expected a sequence, but got: {:?}",
format_marker(&value.span.start),
value
))
}
}
pub fn load_integer(value: &saphyr::Yaml) -> Result<i64> {
let scalar = try_unwrap_saphyr_scalar(value)?;
match scalar {
saphyr::Scalar::Integer(i) => Ok(*i),
_ => Err(unsupported_type!(
"Expected type: integer, but got: {:?}",
value
)),
}
}
pub fn load_integer_marked(value: &MarkedYaml) -> Result<i64> {
if let YamlData::Value(Scalar::Integer(i)) = &value.data {
Ok(*i)
} else {
Err(generic_error!(
"{} Expected integer value, got: {:?}",
format_marker(&value.span.start),
value
))
}
}
pub fn load_number(value: &saphyr::Yaml) -> Result<Number> {
let scalar = try_unwrap_saphyr_scalar(value)?;
match scalar {
Scalar::Integer(i) => Ok(Number::integer(*i)),
Scalar::FloatingPoint(o) => Ok(Number::float(o.into_inner())),
_ => Err(unsupported_type!(
"Expected type: integer or float, but got: {:?}",
value
)),
}
}
pub fn load_array_items_marked<'input>(value: &MarkedYaml<'input>) -> Result<BooleanOrSchema> {
match &value.data {
YamlData::Value(scalar) => {
if let Scalar::Boolean(b) = scalar {
Ok(BooleanOrSchema::Boolean(*b))
} else {
Err(generic_error!(
"array: boolean or mapping with type or $ref, but got: {:?}",
value
))
}
}
YamlData::Mapping(_mapping) => {
let schema: YamlSchema = value.try_into()?;
Ok(BooleanOrSchema::schema(schema))
}
_ => Err(generic_error!(
"array: boolean or mapping with type or $ref, but got: {:?}",
value
)),
}
}
pub fn load_boolean_or_schema_marked(value: &MarkedYaml<'_>) -> Result<BooleanOrSchema> {
match &value.data {
YamlData::Value(scalar) => match scalar {
Scalar::Boolean(b) => Ok(BooleanOrSchema::Boolean(*b)),
_ => Err(generic_error!(
"{} Expected a boolean scalar, but got: {:?}",
format_marker(&value.span.start),
scalar
)),
},
YamlData::Mapping(_) => {
let schema: YamlSchema = value.try_into()?;
Ok(BooleanOrSchema::schema(schema))
}
_ => Err(unsupported_type!(
"Expected boolean or mapping, but got: {:?}",
value
)),
}
}
#[cfg(test)]
mod tests {
use regex::Regex;
use saphyr::LoadableYamlNode;
use saphyr::MarkedYaml;
use crate::ConstValue;
use crate::Engine;
use crate::Result;
use crate::Validator as _;
use crate::loader;
use crate::schemas::EnumSchema;
use crate::schemas::IntegerSchema;
use crate::schemas::SchemaType;
use crate::schemas::StringSchema;
use super::*;
#[test]
fn test_boolean_literal_true() {
let root_schema = load_from_doc(&MarkedYaml::value_from_str("true")).unwrap();
assert_eq!(root_schema.schema, YamlSchema::BooleanLiteral(true));
}
#[test]
fn test_boolean_literal_false() {
let root_schema = load_from_doc(&MarkedYaml::value_from_str("false")).unwrap();
assert_eq!(root_schema.schema, YamlSchema::BooleanLiteral(false));
}
#[test]
fn test_const_string() {
let docs = MarkedYaml::load_from_str("const: string value").unwrap();
let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(subschema.r#const, Some(ConstValue::string("string value")));
}
#[test]
fn test_const_integer() {
let docs = MarkedYaml::load_from_str("const: 42").unwrap();
let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(subschema.r#const, Some(ConstValue::integer(42)));
}
#[test]
fn test_const_array() {
let docs = MarkedYaml::load_from_str("const: [1, 2]").unwrap();
let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
let expected = ConstValue::Array(vec![ConstValue::integer(1), ConstValue::integer(2)]);
assert_eq!(subschema.r#const, Some(expected));
}
#[test]
fn test_const_object() {
let docs = MarkedYaml::load_from_str("const:\n a: 1").unwrap();
let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
let mut expected_obj = hashlink::LinkedHashMap::new();
expected_obj.insert("a".into(), ConstValue::integer(1));
assert_eq!(subschema.r#const, Some(ConstValue::Object(expected_obj)));
}
#[test]
fn test_type_foo_should_error() {
let docs = MarkedYaml::load_from_str("type: foo").unwrap();
let root_schema = load_from_doc(docs.first().unwrap());
assert!(root_schema.is_err());
assert_eq!(
root_schema.unwrap_err().to_string(),
"Unsupported type: Expected type: string, number, integer, object, array, boolean, or null, but got: foo"
);
}
#[test]
fn test_type_string() {
let docs = MarkedYaml::load_from_str("type: string").unwrap();
let root_schema = load_from_doc(docs.first().unwrap()).unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(subschema.r#type, SchemaType::new("string"));
}
#[test]
fn test_type_object_with_string_with_description() {
let root_schema = loader::load_from_str(
r#"
type: object
properties:
name:
type: string
description: This is a description
"#,
)
.expect("Failed to load schema");
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
let Some(object_schema) = &subschema.object_schema else {
panic!(
"Expected ObjectSchema, but got: {:?}",
&subschema.object_schema
);
};
let name_property = object_schema
.properties
.as_ref()
.expect("Expected properties")
.get("name")
.expect("Expected `name` property");
let YamlSchema::Subschema(name_property_schema) = &name_property else {
panic!(
"Expected Subschema for `name` property, but got: {:?}",
&name_property
);
};
assert_eq!(name_property_schema.r#type, SchemaType::new("string"));
assert_eq!(
name_property_schema.string_schema,
Some(StringSchema::default())
);
assert_eq!(
name_property_schema.metadata_and_annotations.description,
Some("This is a description".to_string())
);
}
#[test]
fn test_type_string_with_pattern() {
let root_schema = loader::load_from_str(
r#"
type: string
pattern: "^(\\([0-9]{3}\\))?[0-9]{3}-[0-9]{4}$"
"#,
)
.unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(subschema.r#type, SchemaType::new("string"));
let expected = StringSchema {
pattern: Some(Regex::new("^(\\([0-9]{3}\\))?[0-9]{3}-[0-9]{4}$").unwrap()),
..Default::default()
};
assert_eq!(subschema.string_schema, Some(expected));
}
#[test]
fn test_integer_schema() {
let root_schema = loader::load_from_str("type: integer").unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
let integer_schema = IntegerSchema::default();
assert_eq!(subschema.integer_schema, Some(integer_schema));
}
#[test]
fn test_enum() {
let root_schema = loader::load_from_str(
r#"
enum:
- foo
- bar
- baz
"#,
)
.unwrap();
let enum_values = ["foo", "bar", "baz"]
.iter()
.map(|s| ConstValue::string(s.to_string()))
.collect::<Vec<ConstValue>>();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(
subschema.r#enum,
Some(EnumSchema {
r#enum: enum_values
})
);
}
#[test]
fn test_enum_without_type() {
let root_schema = loader::load_from_str(
r#"
enum:
- red
- amber
- green
- null
- 42
"#,
)
.unwrap();
let enum_values = vec![
ConstValue::string("red".to_string()),
ConstValue::string("amber".to_string()),
ConstValue::string("green".to_string()),
ConstValue::null(),
ConstValue::integer(42),
];
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(
subschema.r#enum,
Some(EnumSchema {
r#enum: enum_values
})
);
}
#[test]
fn test_defs() {
let root_schema = loader::load_from_str(
r##"
$defs:
foo:
type: boolean
"##,
)
.unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert!(subschema.defs.is_some());
let Some(defs) = &subschema.defs else {
panic!("Expected defs, but got: {:?}", &subschema.defs);
};
assert_eq!(defs.len(), 1);
assert_eq!(defs.get("foo"), Some(&YamlSchema::typed_boolean()));
}
#[test]
fn test_one_of_with_ref() {
let root_schema = loader::load_from_str(
r##"
$defs:
foo:
type: boolean
oneOf:
- type: string
- $ref: "#/$defs/foo"
"##,
)
.unwrap();
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert!(subschema.one_of.is_some());
let Some(one_of) = &subschema.one_of else {
panic!("Expected oneOf, but got: {:?}", &subschema.one_of);
};
assert_eq!(one_of.one_of.len(), 2);
assert_eq!(
one_of.one_of[0],
YamlSchema::typed_string(StringSchema::default()),
"one_of[0] should be a string schema"
);
assert_eq!(
one_of.one_of[1],
YamlSchema::ref_str("#/$defs/foo"),
"one_of[1] should be a reference to '#/$defs/foo'"
);
let s = r#"
false
"#;
let docs = MarkedYaml::load_from_str(s).unwrap();
let value = docs.first().unwrap();
let context = crate::Context::with_root_schema(&root_schema, true);
let result = root_schema.validate(&context, value);
assert!(result.is_ok());
assert!(!context.has_errors());
}
#[test]
fn extract_dollar_schema_from_mapping() {
let yaml = "$schema: ./x.yaml\nfoo: 1\n";
assert_eq!(
extract_dollar_schema_from_yaml(yaml).unwrap(),
Some("./x.yaml".to_string())
);
}
#[test]
fn extract_dollar_schema_missing() {
assert_eq!(extract_dollar_schema_from_yaml("foo: 1\n").unwrap(), None);
}
#[test]
fn extract_dollar_schema_non_mapping_root() {
assert_eq!(extract_dollar_schema_from_yaml("- a\n").unwrap(), None);
}
#[test]
fn extract_dollar_schema_not_string_errors() {
let result = extract_dollar_schema_from_yaml("$schema: 42\n");
assert!(result.is_err());
}
#[test]
fn load_root_schema_from_ref_relative_path() {
let dir = std::env::temp_dir().join(format!("yaml_schema_ref_test_{}", std::process::id()));
std::fs::create_dir_all(&dir).expect("create temp dir");
let schema_path = dir.join("sch.yaml");
std::fs::write(
&schema_path,
"type: object\nproperties:\n a:\n type: string\n",
)
.expect("write schema");
let (root, uri) = load_root_schema_from_ref("sch.yaml", &dir).expect("load");
assert!(uri.starts_with("file://"));
let YamlSchema::Subschema(sub) = &root.schema else {
panic!("expected Subschema");
};
assert_eq!(sub.r#type, SchemaType::new("object"));
std::fs::remove_dir_all(&dir).ok();
}
#[test]
fn test_self_validate() -> Result<()> {
let schema_filename = "yaml-schema.yaml";
let root_schema = match loader::load_file(schema_filename) {
Ok(schema) => schema,
Err(e) => {
eprintln!("Failed to read YAML schema file: {schema_filename}");
log::error!("{e}");
return Err(e);
}
};
let yaml_contents = std::fs::read_to_string(schema_filename)?;
let context = Engine::evaluate(&root_schema, &yaml_contents, false)?;
if context.has_errors() {
for error in context.errors.borrow().iter() {
eprintln!("{error}");
}
}
assert!(!context.has_errors());
Ok(())
}
#[test]
fn test_download_from_url() {
if std::env::var("CI").is_ok() {
return;
}
let result = std::panic::catch_unwind(|| {
let url = "https://yaml-schema.net/yaml-schema.yaml";
let result = download_from_url(url, Some(10));
let root_schema = result.expect("Failed to download and parse YAML schema from URL");
let YamlSchema::Subschema(subschema) = &root_schema.schema else {
panic!("Expected Subschema, but got: {:?}", &root_schema.schema);
};
assert_eq!(subschema.r#type, SchemaType::new("object"));
assert!(subschema.object_schema.is_some());
if let Ok(local_schema) = std::fs::read_to_string("yaml-schema.yaml") {
let context = Engine::evaluate(&root_schema, &local_schema, false);
if let Ok(ctx) = context {
if ctx.has_errors() {
for error in ctx.errors.borrow().iter() {
eprintln!("Validation error: {}", error);
}
panic!("Downloaded schema failed validation against local schema");
}
} else if let Err(e) = context {
panic!("Failed to validate downloaded schema: {}", e);
}
}
});
if let Err(e) = result {
if let Some(s) = e.downcast_ref::<String>()
&& (s.contains("Network is unreachable")
|| s.contains("failed to lookup address information"))
{
eprintln!("Warning: Network unreachable, skipping download test");
return;
}
std::panic::resume_unwind(e);
}
}
}