use std::collections::{HashMap, HashSet};
use serde_json::Value;
use tower_lsp::lsp_types::Url;
pub(crate) const MAX_SCHEMA_BYTES: u64 = 5 * 1024 * 1024;
const MAX_URL_LENGTH: usize = 2048;
const MAX_JSON_DEPTH: usize = 50;
const MAX_REF_DEPTH: usize = 32;
const MAX_REMOTE_FETCH_COUNT: usize = 20;
const KNOWN_VOCABULARIES: &[&str] = &[
"https://json-schema.org/draft/2019-09/vocab/core",
"https://json-schema.org/draft/2019-09/vocab/applicator",
"https://json-schema.org/draft/2019-09/vocab/validation",
"https://json-schema.org/draft/2019-09/vocab/meta-data",
"https://json-schema.org/draft/2019-09/vocab/format",
"https://json-schema.org/draft/2019-09/vocab/content",
"https://json-schema.org/draft/2020-12/vocab/core",
"https://json-schema.org/draft/2020-12/vocab/applicator",
"https://json-schema.org/draft/2020-12/vocab/unevaluated",
"https://json-schema.org/draft/2020-12/vocab/validation",
"https://json-schema.org/draft/2020-12/vocab/meta-data",
"https://json-schema.org/draft/2020-12/vocab/format-annotation",
"https://json-schema.org/draft/2020-12/vocab/format-assertion",
"https://json-schema.org/draft/2020-12/vocab/content",
];
#[derive(Debug)]
pub enum SchemaError {
UrlNotPermitted(String),
FetchFailed(String),
ResponseTooLarge,
ParseFailed(String),
TooDeep,
TooManyRemoteFetches,
UnexpectedContentType(String),
}
impl std::fmt::Display for SchemaError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UrlNotPermitted(u) => write!(f, "URL not permitted: {u}"),
Self::FetchFailed(e) => write!(f, "fetch failed: {e}"),
Self::ResponseTooLarge => write!(f, "schema response exceeded size limit"),
Self::ParseFailed(e) => write!(f, "schema parse failed: {e}"),
Self::TooDeep => write!(f, "schema nesting depth exceeded limit"),
Self::TooManyRemoteFetches => {
write!(
f,
"remote fetch count exceeded limit ({MAX_REMOTE_FETCH_COUNT})"
)
}
Self::UnexpectedContentType(ct) => {
write!(f, "unexpected content type: {ct}")
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SchemaType {
Single(String),
Multiple(Vec<String>),
}
#[derive(Debug, Clone)]
pub enum AdditionalProperties {
Denied,
Schema(Box<JsonSchema>),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SchemaDraft {
Draft04,
Draft06,
Draft07,
Draft201909,
Draft202012,
#[default]
Unknown,
}
#[derive(Debug, Clone, Default)]
pub struct JsonSchema {
pub draft: SchemaDraft,
pub id: Option<String>,
pub schema_type: Option<SchemaType>,
pub title: Option<String>,
pub description: Option<String>,
pub format: Option<String>,
pub content_encoding: Option<String>,
pub content_media_type: Option<String>,
pub properties: Option<HashMap<String, Self>>,
pub required: Option<Vec<String>>,
pub enum_values: Option<Vec<Value>>,
pub default: Option<Value>,
pub examples: Option<Vec<Value>>,
pub items: Option<Box<Self>>,
pub prefix_items: Option<Vec<Self>>,
pub contains: Option<Box<Self>>,
pub min_items: Option<u64>,
pub max_items: Option<u64>,
pub max_contains: Option<u64>,
pub min_contains: Option<u64>,
pub unique_items: Option<bool>,
pub additional_properties: Option<AdditionalProperties>,
pub additional_items: Option<AdditionalProperties>,
pub min_properties: Option<u64>,
pub max_properties: Option<u64>,
pub pattern_properties: Option<Vec<(String, Self)>>,
pub property_names: Option<Box<Self>>,
pub all_of: Option<Vec<Self>>,
pub any_of: Option<Vec<Self>>,
pub one_of: Option<Vec<Self>>,
pub not: Option<Box<Self>>,
pub if_schema: Option<Box<Self>>,
pub then_schema: Option<Box<Self>>,
pub else_schema: Option<Box<Self>>,
pub ref_path: Option<String>,
pub anchor: Option<String>,
pub dynamic_anchor: Option<String>,
pub pattern: Option<String>,
pub minimum: Option<f64>,
pub maximum: Option<f64>,
pub min_length: Option<u64>,
pub max_length: Option<u64>,
pub exclusive_minimum: Option<f64>,
pub exclusive_maximum: Option<f64>,
pub exclusive_minimum_draft04: Option<bool>,
pub exclusive_maximum_draft04: Option<bool>,
pub multiple_of: Option<f64>,
pub const_value: Option<serde_json::Value>,
pub dependent_required: Option<HashMap<String, Vec<String>>>,
pub dependent_schemas: Option<HashMap<String, Self>>,
pub definitions: Option<HashMap<String, Self>>,
pub deprecated: Option<bool>,
pub unevaluated_properties: Option<AdditionalProperties>,
pub unevaluated_items: Option<Box<Self>>,
pub vocabulary: Option<HashMap<String, bool>>,
}
#[derive(Debug, Clone)]
pub struct SchemaAssociation {
pub pattern: String,
pub url: String,
}
#[derive(Debug, Clone)]
pub struct SchemaStoreEntry {
pub url: String,
pub file_match: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub struct SchemaStoreCatalog {
pub entries: Vec<SchemaStoreEntry>,
}
#[derive(Debug, Default)]
pub struct SchemaCache {
inner: HashMap<String, (Value, JsonSchema)>,
}
impl SchemaCache {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn get(&self, url: &str) -> Option<&JsonSchema> {
self.inner.get(url).map(|(_, s)| s)
}
pub fn insert(&mut self, url: String, value: Value, schema: JsonSchema) {
self.inner.entry(url).or_insert((value, schema));
}
#[must_use]
fn get_raw(&self, url: &str) -> Option<&(Value, JsonSchema)> {
self.inner.get(url)
}
pub fn get_or_fetch(
&mut self,
url: &str,
proxy: Option<&str>,
) -> Result<&JsonSchema, SchemaError> {
if !self.inner.contains_key(url) {
let (value, schema) = fetch_schema_raw(url, proxy)?;
self.inner.insert(url.to_string(), (value, schema));
}
Ok(self.inner.get(url).map(|(_, s)| s).expect("just inserted"))
}
#[must_use]
pub fn contains(&self, url: &str) -> bool {
self.inner.contains_key(url)
}
}
pub fn validate_and_normalize_url(raw: &str) -> Result<String, SchemaError> {
if raw.len() > MAX_URL_LENGTH {
return Err(SchemaError::UrlNotPermitted(
"URL exceeds maximum length".to_string(),
));
}
let url =
Url::parse(raw).map_err(|e| SchemaError::UrlNotPermitted(format!("invalid URL: {e}")))?;
match url.scheme() {
"http" | "https" => {}
s => {
return Err(SchemaError::UrlNotPermitted(format!(
"scheme '{s}' is not permitted"
)));
}
}
if let Some(host) = url.host_str()
&& is_ssrf_blocked_host(host)
{
return Err(SchemaError::UrlNotPermitted(format!(
"host '{host}' is not permitted"
)));
}
Ok(url.to_string())
}
fn is_ssrf_blocked_host(host: &str) -> bool {
use std::net::IpAddr;
if host.eq_ignore_ascii_case("localhost") {
return true;
}
let bare = host
.strip_prefix('[')
.and_then(|s| s.strip_suffix(']'))
.unwrap_or(host);
if let Ok(ip) = bare.parse::<IpAddr>() {
return match ip {
IpAddr::V4(v4) => {
v4.is_loopback() || v4.is_link_local() || v4.is_private() || v4.is_unspecified() }
IpAddr::V6(v6) => {
v6.is_loopback() || v6.is_unspecified() || v6.segments().first().is_some_and(|s| (s & 0xffc0) == 0xfe80)
|| v6.segments().first().is_some_and(|s| (s & 0xfe00) == 0xfc00)
|| v6.to_ipv4_mapped().is_some_and(|v4| {
v4.is_loopback()
|| v4.is_link_local()
|| v4.is_private()
|| v4.is_unspecified()
})
}
};
}
false
}
fn build_agent(proxy: Option<&str>) -> ureq::Agent {
let mut builder = ureq::Agent::config_builder()
.max_redirects(0)
.timeout_connect(Some(std::time::Duration::from_secs(5)))
.timeout_global(Some(std::time::Duration::from_secs(15)));
if let Some(url) = proxy {
if let Ok(p) = ureq::Proxy::new(url) {
builder = builder.proxy(Some(p));
}
}
builder.build().new_agent()
}
pub fn fetch_schema_raw(
url: &str,
proxy: Option<&str>,
) -> Result<(Value, JsonSchema), SchemaError> {
use std::io::Read as _;
validate_and_normalize_url(url)?;
let agent = build_agent(proxy);
let response = agent
.get(url)
.call()
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if !content_type.contains("application/json") && !content_type.contains("application/schema") {
return Err(SchemaError::UnexpectedContentType(content_type.to_string()));
}
let mut limited = response
.into_body()
.into_reader()
.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited
.read_to_end(&mut buf)
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
if buf.len() as u64 > MAX_SCHEMA_BYTES {
return Err(SchemaError::ResponseTooLarge);
}
let value: Value =
serde_json::from_slice(&buf).map_err(|e| SchemaError::ParseFailed(e.to_string()))?;
check_json_depth(&value, 0)?;
let schema = parse_schema(&value)
.ok_or_else(|| SchemaError::ParseFailed("not a JSON Schema".to_string()))?;
Ok((value, schema))
}
pub fn fetch_schema(url: &str, proxy: Option<&str>) -> Result<JsonSchema, SchemaError> {
fetch_schema_raw(url, proxy).map(|(_, schema)| schema)
}
const SCHEMASTORE_CATALOG_URL: &str = "https://www.schemastore.org/api/json/catalog.json";
pub fn fetch_schemastore_catalog(proxy: Option<&str>) -> Result<SchemaStoreCatalog, SchemaError> {
use std::io::Read as _;
let agent = build_agent(proxy);
let response = agent
.get(SCHEMASTORE_CATALOG_URL)
.call()
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
let mut limited = response
.into_body()
.into_reader()
.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited
.read_to_end(&mut buf)
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
if buf.len() as u64 > MAX_SCHEMA_BYTES {
return Err(SchemaError::ResponseTooLarge);
}
let value: Value =
serde_json::from_slice(&buf).map_err(|e| SchemaError::ParseFailed(e.to_string()))?;
parse_schemastore_catalog(&value)
.ok_or_else(|| SchemaError::ParseFailed("not a SchemaStore catalog".to_string()))
}
fn parse_schemastore_catalog(value: &Value) -> Option<SchemaStoreCatalog> {
let obj = value.as_object()?;
let schemas = obj.get("schemas")?.as_array()?;
let entries = schemas
.iter()
.filter_map(|entry| {
let entry_obj = entry.as_object()?;
let url = entry_obj.get("url")?.as_str()?.to_string();
if url.is_empty() {
return None;
}
let file_match: Vec<String> = entry_obj
.get("fileMatch")?
.as_array()?
.iter()
.filter_map(|v| v.as_str().map(String::from))
.filter(|p| {
std::path::Path::new(p.as_str())
.extension()
.is_some_and(|ext| {
ext.eq_ignore_ascii_case("yml") || ext.eq_ignore_ascii_case("yaml")
})
})
.collect();
if file_match.is_empty() {
None
} else {
Some(SchemaStoreEntry { url, file_match })
}
})
.collect();
Some(SchemaStoreCatalog { entries })
}
#[must_use]
pub fn match_schemastore(filename: &str, catalog: &SchemaStoreCatalog) -> Option<String> {
catalog.entries.iter().find_map(|entry| {
let matches = entry
.file_match
.iter()
.any(|pattern| glob_matches(pattern, filename));
if matches {
Some(entry.url.clone())
} else {
None
}
})
}
fn check_json_depth(value: &Value, depth: usize) -> Result<(), SchemaError> {
if depth > MAX_JSON_DEPTH {
return Err(SchemaError::TooDeep);
}
match value {
Value::Object(map) => {
for v in map.values() {
check_json_depth(v, depth + 1)?;
}
}
Value::Array(arr) => {
for v in arr {
check_json_depth(v, depth + 1)?;
}
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => {}
}
Ok(())
}
struct ParseContext<'a> {
cache: &'a mut SchemaCache,
proxy: Option<&'a str>,
visited: HashSet<String>,
}
impl<'a> ParseContext<'a> {
fn new(cache: &'a mut SchemaCache, proxy: Option<&'a str>) -> Self {
Self {
cache,
proxy,
visited: HashSet::new(),
}
}
fn try_visit(&mut self, url: &str) -> bool {
if self.visited.len() >= MAX_REMOTE_FETCH_COUNT {
return false;
}
self.visited.insert(url.to_string())
}
}
#[must_use]
pub fn parse_schema(value: &Value) -> Option<JsonSchema> {
parse_schema_with_root(value, value, None, None, 0)
}
#[must_use]
pub fn parse_schema_with_remote(
value: &Value,
cache: &mut SchemaCache,
proxy: Option<&str>,
) -> Option<JsonSchema> {
let mut ctx = ParseContext::new(cache, proxy);
parse_schema_with_root(value, value, None, Some(&mut ctx), 0)
}
#[must_use]
pub fn check_vocabulary(schema: &JsonSchema) -> Vec<String> {
let Some(vocab) = &schema.vocabulary else {
return vec![];
};
vocab
.iter()
.filter(|(uri, required)| **required && !KNOWN_VOCABULARIES.contains(&uri.as_str()))
.map(|(uri, _)| format!("Unknown required vocabulary: {uri}"))
.collect()
}
fn parse_scalar_fields(obj: &serde_json::Map<String, Value>, schema: &mut JsonSchema) {
schema.title = string_field(obj, "title");
schema.description = string_field(obj, "description");
schema.pattern = string_field(obj, "pattern");
schema.anchor = string_field(obj, "$anchor");
schema.dynamic_anchor = string_field(obj, "$dynamicAnchor");
schema.deprecated = obj.get("deprecated").and_then(Value::as_bool);
schema.minimum = obj.get("minimum").and_then(Value::as_f64);
schema.maximum = obj.get("maximum").and_then(Value::as_f64);
schema.min_length = obj.get("minLength").and_then(Value::as_u64);
schema.max_length = obj.get("maxLength").and_then(Value::as_u64);
if let Some(excl_min) = obj.get("exclusiveMinimum") {
if excl_min.is_number() {
schema.exclusive_minimum = excl_min.as_f64();
} else if excl_min.is_boolean() {
schema.exclusive_minimum_draft04 = excl_min.as_bool();
}
}
if let Some(excl_max) = obj.get("exclusiveMaximum") {
if excl_max.is_number() {
schema.exclusive_maximum = excl_max.as_f64();
} else if excl_max.is_boolean() {
schema.exclusive_maximum_draft04 = excl_max.as_bool();
}
}
schema.multiple_of = obj.get("multipleOf").and_then(Value::as_f64);
schema.const_value = obj.get("const").cloned();
schema.default = obj.get("default").cloned();
schema.examples = obj.get("examples").and_then(Value::as_array).cloned();
schema.enum_values = obj.get("enum").and_then(Value::as_array).cloned();
schema.format = string_field(obj, "format");
schema.content_encoding = string_field(obj, "contentEncoding");
schema.content_media_type = string_field(obj, "contentMediaType");
}
fn parse_object_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
if let Some(map) = obj.get("properties").and_then(Value::as_object) {
let mut props = HashMap::new();
for (k, v) in map {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
props.insert(k.clone(), s);
}
}
if !props.is_empty() {
schema.properties = Some(props);
}
}
schema.min_properties = obj.get("minProperties").and_then(Value::as_u64);
schema.max_properties = obj.get("maxProperties").and_then(Value::as_u64);
if let Some(map) = obj.get("patternProperties").and_then(Value::as_object) {
let mut pat_props = Vec::new();
for (k, v) in map {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
pat_props.push((k.clone(), s));
}
}
if !pat_props.is_empty() {
schema.pattern_properties = Some(pat_props);
}
}
}
fn parse_array_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
if let Some(arr) = obj.get("prefixItems").and_then(Value::as_array) {
let mut items = Vec::new();
for v in arr {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
items.push(s);
}
}
if !items.is_empty() {
schema.prefix_items = Some(items);
}
}
match obj.get("items") {
Some(Value::Array(arr)) if schema.prefix_items.is_none() => {
let mut items = Vec::new();
for v in arr {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
items.push(s);
}
}
if !items.is_empty() {
schema.prefix_items = Some(items);
}
}
Some(v) => {
schema.items = parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
.map(Box::new);
}
None => {}
}
if obj.get("items").is_some_and(Value::is_array) && obj.get("prefixItems").is_none() {
schema.additional_items = parse_additional_properties(
obj.get("additionalItems"),
root,
base_uri,
ctx.as_deref_mut(),
depth,
);
}
schema.contains = obj
.get("contains")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx, depth + 1))
.map(Box::new);
schema.min_items = obj.get("minItems").and_then(Value::as_u64);
schema.max_items = obj.get("maxItems").and_then(Value::as_u64);
schema.min_contains = obj.get("minContains").and_then(Value::as_u64);
schema.max_contains = obj.get("maxContains").and_then(Value::as_u64);
schema.unique_items = obj.get("uniqueItems").and_then(Value::as_bool);
}
fn parse_combinator_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
schema.all_of = parse_schema_array(obj.get("allOf"), root, base_uri, ctx.as_deref_mut(), depth);
schema.any_of = parse_schema_array(obj.get("anyOf"), root, base_uri, ctx.as_deref_mut(), depth);
schema.one_of = parse_schema_array(obj.get("oneOf"), root, base_uri, ctx.as_deref_mut(), depth);
schema.not = obj
.get("not")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
schema.if_schema = obj
.get("if")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
schema.then_schema = obj
.get("then")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
schema.else_schema = obj
.get("else")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx, depth + 1))
.map(Box::new);
}
fn parse_extension_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
schema.unevaluated_properties = parse_additional_properties(
obj.get("unevaluatedProperties"),
root,
base_uri,
ctx.as_deref_mut(),
depth,
);
schema.unevaluated_items = obj
.get("unevaluatedItems")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
let defs_04 = parse_definitions(
obj.get("definitions"),
root,
base_uri,
ctx.as_deref_mut(),
depth,
);
let defs_07 = parse_definitions(obj.get("$defs"), root, base_uri, ctx, depth);
schema.definitions = match (defs_04, defs_07) {
(Some(mut a), Some(b)) => {
a.extend(b);
Some(a)
}
(a, b) => a.or(b),
};
schema.vocabulary = obj
.get("$vocabulary")
.and_then(Value::as_object)
.map(|map| {
map.iter()
.filter_map(|(k, v)| v.as_bool().map(|b| (k.clone(), b)))
.collect()
});
}
fn resolve_uri(base: Option<&str>, relative: &str) -> Option<String> {
if Url::parse(relative).is_ok() {
return Some(relative.to_string());
}
let base_url = Url::parse(base?).ok()?;
base_url.join(relative).ok().map(|u| u.to_string())
}
fn detect_draft(uri: &str) -> SchemaDraft {
match uri {
"http://json-schema.org/draft-04/schema#" | "http://json-schema.org/draft-04/schema" => {
SchemaDraft::Draft04
}
"http://json-schema.org/draft-06/schema#" | "http://json-schema.org/draft-06/schema" => {
SchemaDraft::Draft06
}
"http://json-schema.org/draft-07/schema#" | "http://json-schema.org/draft-07/schema" => {
SchemaDraft::Draft07
}
"https://json-schema.org/draft/2019-09/schema" => SchemaDraft::Draft201909,
"https://json-schema.org/draft/2020-12/schema" => SchemaDraft::Draft202012,
_ => SchemaDraft::Unknown,
}
}
fn parse_schema_with_root(
value: &Value,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<JsonSchema> {
if depth > MAX_REF_DEPTH {
return None;
}
match value {
Value::Bool(true) => return Some(JsonSchema::default()),
Value::Bool(false)
| Value::Null
| Value::Number(_)
| Value::String(_)
| Value::Array(_) => {
return None;
}
Value::Object(_) => {}
}
let obj = value.as_object()?;
let mut schema = JsonSchema::default();
if let Some(Value::String(ref_str)) = obj.get("$ref") {
schema.ref_path = Some(ref_str.clone());
if let Some(resolved) = resolve_ref(ref_str, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
return Some(resolved);
}
return Some(schema);
}
if let Some(Value::String(ref_str)) = obj.get("$dynamicRef") {
if let Some(resolved) = resolve_ref(ref_str, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
return Some(resolved);
}
}
schema.draft = obj
.get("$schema")
.and_then(Value::as_str)
.map(detect_draft)
.unwrap_or_default();
let raw_id = obj
.get("$id")
.or_else(|| obj.get("id"))
.and_then(Value::as_str);
let effective_base: Option<String> = if let Some(raw) = raw_id {
let resolved = resolve_uri(base_uri, raw).unwrap_or_else(|| raw.to_string());
schema.id = Some(resolved.clone());
Some(resolved)
} else {
base_uri.map(String::from)
};
let effective_base = effective_base.as_deref();
schema.schema_type = parse_type(obj.get("type"));
parse_scalar_fields(obj, &mut schema);
schema.required = obj.get("required").and_then(Value::as_array).map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
});
parse_object_fields(
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
&mut schema,
);
parse_array_fields(
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
&mut schema,
);
schema.additional_properties = parse_additional_properties(
obj.get("additionalProperties"),
root,
effective_base,
ctx.as_deref_mut(),
depth,
);
schema.property_names = obj
.get("propertyNames")
.and_then(|v| {
parse_schema_with_root(v, root, effective_base, ctx.as_deref_mut(), depth + 1)
})
.map(Box::new);
let (dep_req, dep_sch) =
parse_dependencies(obj, root, effective_base, ctx.as_deref_mut(), depth);
schema.dependent_required = dep_req;
schema.dependent_schemas = dep_sch;
parse_combinator_fields(
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
&mut schema,
);
parse_extension_fields(obj, root, effective_base, ctx, depth, &mut schema);
Some(schema)
}
type ParsedDependencies = (
Option<HashMap<String, Vec<String>>>,
Option<HashMap<String, JsonSchema>>,
);
fn parse_dependencies(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> ParsedDependencies {
let mut dep_req: HashMap<String, Vec<String>> = HashMap::new();
let mut dep_sch: HashMap<String, JsonSchema> = HashMap::new();
if let Some(Value::Object(deps)) = obj.get("dependencies") {
for (key, val) in deps {
if let Some(arr) = val.as_array() {
let reqs: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
dep_req.insert(key.clone(), reqs);
} else if let Some(schema) =
parse_schema_with_root(val, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
dep_sch.insert(key.clone(), schema);
}
}
}
if let Some(Value::Object(dr)) = obj.get("dependentRequired") {
for (key, val) in dr {
if let Some(arr) = val.as_array() {
let reqs: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
dep_req.insert(key.clone(), reqs);
}
}
}
if let Some(Value::Object(ds)) = obj.get("dependentSchemas") {
for (key, val) in ds {
if let Some(schema) =
parse_schema_with_root(val, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
dep_sch.insert(key.clone(), schema);
}
}
}
let dep_req = if dep_req.is_empty() {
None
} else {
Some(dep_req)
};
let dep_sch = if dep_sch.is_empty() {
None
} else {
Some(dep_sch)
};
(dep_req, dep_sch)
}
fn parse_type(value: Option<&Value>) -> Option<SchemaType> {
match value? {
Value::String(s) => Some(SchemaType::Single(s.clone())),
Value::Array(arr) => {
let types: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if types.is_empty() {
None
} else {
Some(SchemaType::Multiple(types))
}
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::Object(_) => None,
}
}
fn string_field(obj: &serde_json::Map<String, Value>, key: &str) -> Option<String> {
obj.get(key)?.as_str().map(String::from)
}
fn parse_additional_properties(
value: Option<&Value>,
root: &Value,
base_uri: Option<&str>,
#[allow(unused_mut)] mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<AdditionalProperties> {
match value? {
Value::Bool(false) => Some(AdditionalProperties::Denied),
v @ (Value::Bool(true)
| Value::Null
| Value::Number(_)
| Value::String(_)
| Value::Array(_)
| Value::Object(_)) => parse_schema_with_root(v, root, base_uri, ctx, depth + 1)
.map(|s| AdditionalProperties::Schema(Box::new(s))),
}
}
fn parse_schema_array(
value: Option<&Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<Vec<JsonSchema>> {
let arr = value?.as_array()?;
let mut schemas = Vec::new();
for v in arr {
if let Some(s) = parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1) {
schemas.push(s);
}
}
if schemas.is_empty() {
None
} else {
Some(schemas)
}
}
fn parse_definitions(
value: Option<&Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<HashMap<String, JsonSchema>> {
let map = value?.as_object()?;
let mut result = HashMap::new();
for (k, v) in map {
if let Some(s) = parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1) {
result.insert(k.clone(), s);
}
}
if result.is_empty() {
None
} else {
Some(result)
}
}
fn resolve_ref(
ref_str: &str,
root: &Value,
base_uri: Option<&str>,
ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<JsonSchema> {
if depth > MAX_REF_DEPTH {
return None;
}
if let Some(pointer) = ref_str.strip_prefix('#') {
if pointer.is_empty() {
return parse_schema_with_root(root, root, None, None, depth + 1);
}
if pointer.starts_with('/') {
let target = root.pointer(pointer)?;
return parse_schema_with_root(target, root, None, None, depth + 1);
}
return find_anchor_in_value(pointer, root)
.and_then(|v| parse_schema_with_root(v, root, None, None, depth + 1));
}
let ctx = ctx?;
let (uri_part, fragment) = ref_str.find('#').map_or((ref_str, None), |pos| {
(&ref_str[..pos], Some(&ref_str[pos + 1..]))
});
let absolute_uri = resolve_uri(base_uri, uri_part)?;
let normalized = validate_and_normalize_url(&absolute_uri).ok()?;
if !ctx.cache.contains(&normalized) && !ctx.try_visit(&normalized) {
return None;
}
if !ctx.cache.contains(&normalized) {
let (value, schema) = fetch_schema_raw(&normalized, ctx.proxy).ok()?;
ctx.cache.insert(normalized.clone(), value, schema);
}
let (remote_value, _) = ctx.cache.get_raw(&normalized)?;
let remote_value = remote_value.clone();
match fragment {
None | Some("") => {
parse_schema_with_root(
&remote_value,
&remote_value,
Some(&normalized),
None,
depth + 1,
)
}
Some(frag) if frag.starts_with('/') => {
let target = remote_value.pointer(frag)?;
parse_schema_with_root(target, &remote_value, Some(&normalized), None, depth + 1)
}
Some(name) => {
find_anchor_in_value(name, &remote_value).and_then(|v| {
parse_schema_with_root(v, &remote_value, Some(&normalized), None, depth + 1)
})
}
}
}
fn find_anchor_in_value<'a>(name: &str, value: &'a Value) -> Option<&'a Value> {
match value {
Value::Object(obj) => {
let has_anchor = obj
.get("$anchor")
.and_then(Value::as_str)
.is_some_and(|a| a == name);
let has_dynamic = obj
.get("$dynamicAnchor")
.and_then(Value::as_str)
.is_some_and(|a| a == name);
if has_anchor || has_dynamic {
return Some(value);
}
for v in obj.values() {
if let Some(found) = find_anchor_in_value(name, v) {
return Some(found);
}
}
None
}
Value::Array(arr) => {
for v in arr {
if let Some(found) = find_anchor_in_value(name, v) {
return Some(found);
}
}
None
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => None,
}
}
#[must_use]
pub fn extract_schema_url(text: &str) -> Option<String> {
const PREFIX: &str = "# yaml-language-server: $schema=";
for line in text.lines().take(10) {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix(PREFIX) {
let url = rest.trim();
if !url.is_empty() {
return Some(url.to_string());
}
}
}
None
}
#[must_use]
pub fn extract_custom_tags(text: &str) -> Vec<String> {
const PREFIX: &str = "# yaml-language-server: $tags=";
for line in text.lines().take(10) {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix(PREFIX) {
return rest
.split(',')
.map(str::trim)
.filter(|s| !s.is_empty())
.map(str::to_string)
.collect();
}
}
Vec::new()
}
#[must_use]
pub fn detect_kubernetes_resource(docs: &[saphyr::YamlOwned]) -> Option<(String, String)> {
use saphyr::{ScalarOwned, YamlOwned};
let root = docs.first()?;
let YamlOwned::Mapping(map) = root else {
return None;
};
let mut api_version: Option<String> = None;
let mut kind: Option<String> = None;
for (k, v) in map {
let key = match k {
YamlOwned::Value(ScalarOwned::String(s)) => s.as_str(),
YamlOwned::Representation(..)
| YamlOwned::Value(_)
| YamlOwned::Sequence(_)
| YamlOwned::Mapping(_)
| YamlOwned::Tagged(..)
| YamlOwned::Alias(_)
| YamlOwned::BadValue => continue,
};
let val = match v {
YamlOwned::Value(ScalarOwned::String(s)) => s.clone(),
YamlOwned::Representation(..)
| YamlOwned::Value(_)
| YamlOwned::Sequence(_)
| YamlOwned::Mapping(_)
| YamlOwned::Tagged(..)
| YamlOwned::Alias(_)
| YamlOwned::BadValue => continue,
};
match key {
"apiVersion" => api_version = Some(val),
"kind" => kind = Some(val),
_ => {}
}
}
Some((api_version?, kind?))
}
#[must_use]
pub fn kubernetes_schema_url(api_version: &str, kind: &str, k8s_version: &str) -> String {
let kind_lower = kind.to_lowercase();
let filename = if let Some((group, version)) = api_version.split_once('/') {
format!("{kind_lower}-{group}-{version}.json")
} else {
format!("{kind_lower}-{api_version}.json")
};
let dir_prefix = if k8s_version == "master" {
"master-standalone-strict".to_string()
} else {
format!("v{k8s_version}-standalone-strict")
};
format!(
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{dir_prefix}/{filename}"
)
}
#[must_use]
pub fn match_schema_by_filename(
filename: &str,
associations: &[SchemaAssociation],
) -> Option<String> {
associations
.iter()
.find(|a| glob_matches(&a.pattern, filename))
.map(|a| a.url.clone())
}
fn glob_matches(pattern: &str, text: &str) -> bool {
glob_matches_inner(pattern.as_bytes(), text.as_bytes())
}
fn glob_matches_inner(pattern: &[u8], text: &[u8]) -> bool {
match (pattern.first(), text.first()) {
(None, None) => true,
(Some(&b'*'), _) if pattern.get(1) == Some(&b'*') => {
let rest_pattern = pattern.get(2..).unwrap_or(&[]);
let rest_pattern = rest_pattern.strip_prefix(b"/").unwrap_or(rest_pattern);
for i in 0..=text.len() {
if glob_matches_inner(rest_pattern, text.get(i..).unwrap_or(&[])) {
return true;
}
}
false
}
(Some(&b'*'), _) => {
let rest_pattern = pattern.get(1..).unwrap_or(&[]);
for i in 0..=text.len() {
if text.get(..i).is_some_and(|prefix| !prefix.contains(&b'/'))
&& glob_matches_inner(rest_pattern, text.get(i..).unwrap_or(&[]))
{
return true;
}
}
false
}
(Some(&pc), Some(&tc)) => {
if pc == tc {
glob_matches_inner(
pattern.get(1..).unwrap_or(&[]),
text.get(1..).unwrap_or(&[]),
)
} else {
false
}
}
(None, Some(_)) | (Some(_), None) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn schema_type_str(s: &JsonSchema) -> Option<&str> {
match s.schema_type.as_ref()? {
SchemaType::Single(t) => Some(t.as_str()),
SchemaType::Multiple(_) => None,
}
}
#[test]
fn should_extract_url_from_modeline_on_first_line() {
let text = "# yaml-language-server: $schema=https://example.com/schema.json\nkey: value\n";
assert_eq!(
extract_schema_url(text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_url_from_modeline_on_second_line() {
let text = "key: value\n# yaml-language-server: $schema=https://example.com/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_url_from_modeline_on_tenth_line() {
let mut text = String::new();
for _ in 0..9 {
text.push_str("key: value\n");
}
text.push_str("# yaml-language-server: $schema=https://example.com/schema.json\n");
assert_eq!(
extract_schema_url(&text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_return_none_when_modeline_beyond_tenth_line() {
let mut text = String::new();
for _ in 0..10 {
text.push_str("key: value\n");
}
text.push_str("# yaml-language-server: $schema=https://example.com/schema.json\n");
assert_eq!(extract_schema_url(&text), None);
}
#[test]
fn should_return_none_when_no_modeline_present() {
let text = "key: value\nother: stuff\n";
assert_eq!(extract_schema_url(text), None);
}
#[test]
fn should_return_none_for_malformed_modeline_missing_equals() {
let text = "# yaml-language-server: $schema https://example.com/schema.json\n";
assert_eq!(extract_schema_url(text), None);
}
#[test]
fn should_return_none_for_modeline_with_wrong_prefix() {
let text = "# yaml-ls: $schema=https://example.com/schema.json\n";
assert_eq!(extract_schema_url(text), None);
}
#[test]
fn should_handle_modeline_with_extra_leading_whitespace_in_url() {
let text = "# yaml-language-server: $schema= https://example.com/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_http_url() {
let text = "# yaml-language-server: $schema=http://example.com/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("http://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_file_url() {
let text = "# yaml-language-server: $schema=file:///path/to/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("file:///path/to/schema.json".to_string())
);
}
#[test]
fn should_return_none_for_empty_input() {
assert_eq!(extract_schema_url(""), None);
}
#[test]
fn should_extract_none_sentinel_lowercase() {
let text = "# yaml-language-server: $schema=none\nkey: value\n";
assert_eq!(extract_schema_url(text), Some("none".to_string()));
}
#[test]
fn should_extract_none_sentinel_mixed_case() {
let text = "# yaml-language-server: $schema=None\nkey: value\n";
assert_eq!(extract_schema_url(text), Some("None".to_string()));
}
#[test]
fn should_extract_none_sentinel_uppercase() {
let text = "# yaml-language-server: $schema=NONE\nkey: value\n";
assert_eq!(extract_schema_url(text), Some("NONE".to_string()));
}
#[test]
fn should_extract_single_tag_from_modeline() {
let text = "# yaml-language-server: $tags=!include\nkey: value\n";
assert_eq!(extract_custom_tags(text), vec!["!include"]);
}
#[test]
fn should_extract_multiple_tags_from_modeline() {
let text = "# yaml-language-server: $tags=!include,!ref,!Ref\nkey: value\n";
assert_eq!(extract_custom_tags(text), vec!["!include", "!ref", "!Ref"]);
}
#[test]
fn should_trim_whitespace_around_tags() {
let text = "# yaml-language-server: $tags= !include , !ref \nkey: value\n";
assert_eq!(extract_custom_tags(text), vec!["!include", "!ref"]);
}
#[test]
fn should_return_empty_vec_when_no_tags_modeline() {
let text = "key: value\nother: stuff\n";
assert_eq!(extract_custom_tags(text), Vec::<String>::new());
}
#[test]
fn should_return_empty_vec_when_tags_modeline_beyond_line_10() {
let mut text = String::new();
for _ in 0..10 {
text.push_str("key: value\n");
}
text.push_str("# yaml-language-server: $tags=!include\n");
assert_eq!(extract_custom_tags(&text), Vec::<String>::new());
}
#[test]
fn should_return_empty_vec_for_empty_input() {
assert_eq!(extract_custom_tags(""), Vec::<String>::new());
}
#[test]
fn should_extract_tags_from_modeline_on_second_line() {
let text = "key: value\n# yaml-language-server: $tags=!include,!ref\n";
assert_eq!(extract_custom_tags(text), vec!["!include", "!ref"]);
}
fn assoc(pattern: &str, url: &str) -> SchemaAssociation {
SchemaAssociation {
pattern: pattern.to_string(),
url: url.to_string(),
}
}
#[test]
fn should_return_url_for_exact_filename_match() {
let associations = [assoc(
"config.yaml",
"https://example.com/config-schema.json",
)];
assert_eq!(
match_schema_by_filename("config.yaml", &associations),
Some("https://example.com/config-schema.json".to_string())
);
}
#[test]
fn should_return_url_for_glob_wildcard_match() {
let associations = [assoc("*.yaml", "https://example.com/generic.json")];
assert_eq!(
match_schema_by_filename("myfile.yaml", &associations),
Some("https://example.com/generic.json".to_string())
);
}
#[test]
fn should_return_url_for_double_star_glob_match() {
let associations = [assoc(
"configs/**/*.yaml",
"https://example.com/schema.json",
)];
assert_eq!(
match_schema_by_filename("configs/nested/file.yaml", &associations),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_return_none_when_no_association_matches() {
let associations = [assoc("*.json", "https://example.com/schema.json")];
assert_eq!(match_schema_by_filename("myfile.yaml", &associations), None);
}
#[test]
fn should_return_none_for_empty_associations() {
assert_eq!(match_schema_by_filename("myfile.yaml", &[]), None);
}
#[test]
fn should_return_first_matching_association_when_multiple_match() {
let associations = [
assoc("*.yaml", "https://example.com/first.json"),
assoc("*.yaml", "https://example.com/second.json"),
];
assert_eq!(
match_schema_by_filename("test.yaml", &associations),
Some("https://example.com/first.json".to_string())
);
}
#[test]
fn should_not_match_partial_filename() {
let associations = [assoc("config.yaml", "https://example.com/schema.json")];
assert_eq!(
match_schema_by_filename("my-config.yaml", &associations),
None
);
}
#[test]
fn should_parse_minimal_object_schema() {
let v = json!({"type": "object"});
let s = parse_schema(&v).expect("should parse");
assert_eq!(schema_type_str(&s), Some("object"));
}
#[test]
fn should_parse_schema_with_properties() {
let v = json!({"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}});
let s = parse_schema(&v).expect("should parse");
let props = s.properties.as_ref().expect("should have properties");
assert_eq!(
schema_type_str(props.get("name").expect("name")),
Some("string")
);
assert_eq!(
schema_type_str(props.get("age").expect("age")),
Some("integer")
);
}
#[test]
fn should_parse_required_fields() {
let v = json!({"type": "object", "required": ["name", "age"]});
let s = parse_schema(&v).expect("should parse");
let req = s.required.as_ref().expect("should have required");
assert!(req.contains(&"name".to_string()));
assert!(req.contains(&"age".to_string()));
}
#[test]
fn should_parse_enum_values() {
let v = json!({"type": "string", "enum": ["alpha", "beta", "gamma"]});
let s = parse_schema(&v).expect("should parse");
let enums = s.enum_values.as_ref().expect("should have enum");
assert_eq!(enums.len(), 3);
assert!(enums.contains(&json!("alpha")));
assert!(enums.contains(&json!("beta")));
assert!(enums.contains(&json!("gamma")));
}
#[test]
fn should_parse_description() {
let v = json!({"type": "string", "description": "A human-readable name"});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.description.as_deref(), Some("A human-readable name"));
}
#[test]
fn should_parse_default_value() {
let v = json!({"type": "integer", "default": 42});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.default, Some(json!(42)));
}
#[test]
fn should_parse_array_schema_with_items() {
let v = json!({"type": "array", "items": {"type": "string"}});
let s = parse_schema(&v).expect("should parse");
let items = s.items.as_ref().expect("should have items");
assert_eq!(schema_type_str(items), Some("string"));
}
#[test]
fn should_parse_additional_properties_false() {
let v = json!({"type": "object", "additionalProperties": false});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_properties,
Some(AdditionalProperties::Denied)
));
}
#[test]
fn should_parse_additional_properties_as_schema() {
let v = json!({"type": "object", "additionalProperties": {"type": "string"}});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_properties,
Some(AdditionalProperties::Schema(_))
));
}
#[test]
fn should_parse_min_properties_and_max_properties() {
let v = json!({"type": "object", "minProperties": 1, "maxProperties": 5});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.min_properties, Some(1));
assert_eq!(s.max_properties, Some(5));
}
#[test]
fn should_parse_additional_items_false() {
let v = json!({"items": [{"type": "string"}], "additionalItems": false});
let s = parse_schema(&v).expect("should parse");
assert!(s.prefix_items.is_some());
assert!(matches!(
s.additional_items,
Some(AdditionalProperties::Denied)
));
}
#[test]
fn should_parse_additional_items_schema() {
let v = json!({"items": [{"type": "string"}], "additionalItems": {"type": "integer"}});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_items,
Some(AdditionalProperties::Schema(_))
));
}
#[test]
fn should_not_parse_additional_items_when_prefix_items_set_from_prefix_items_key() {
let v = json!({"prefixItems": [{"type": "string"}], "additionalItems": false});
let s = parse_schema(&v).expect("should parse");
assert!(s.additional_items.is_none());
}
#[test]
fn should_not_parse_additional_items_when_no_array_items() {
let v = json!({"type": "array", "additionalItems": false});
let s = parse_schema(&v).expect("should parse");
assert!(s.additional_items.is_none());
}
#[test]
fn should_parse_all_of() {
let v = json!({"allOf": [{"type": "object"}, {"required": ["name"]}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.all_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_parse_any_of() {
let v = json!({"anyOf": [{"type": "string"}, {"type": "integer"}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.any_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_parse_one_of() {
let v = json!({"oneOf": [{"type": "string"}, {"type": "null"}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.one_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_return_none_for_null_input() {
assert!(parse_schema(&Value::Null).is_none());
}
#[test]
fn should_return_none_for_non_object_json() {
assert!(parse_schema(&Value::String("not a schema".into())).is_none());
}
#[test]
fn should_parse_empty_object_as_permissive_schema() {
let v = json!({});
let s = parse_schema(&v).expect("should parse");
assert!(s.schema_type.is_none());
assert!(s.properties.is_none());
assert!(s.required.is_none());
}
#[test]
fn should_parse_boolean_true_schema() {
let s = parse_schema(&Value::Bool(true)).expect("should return Some for true");
assert!(s.schema_type.is_none());
}
#[test]
fn should_parse_boolean_false_schema() {
assert!(parse_schema(&Value::Bool(false)).is_none());
}
#[test]
fn should_parse_draft04_definitions() {
let v = json!({"definitions": {"addr": {"type": "string"}}});
let s = parse_schema(&v).expect("should parse");
let defs = s.definitions.as_ref().expect("should have definitions");
assert!(defs.contains_key("addr"));
}
#[test]
fn should_parse_draft07_defs() {
let v = json!({"$defs": {"addr": {"type": "string"}}});
let s = parse_schema(&v).expect("should parse");
let defs = s.definitions.as_ref().expect("should have $defs");
assert!(defs.contains_key("addr"));
}
#[test]
fn should_parse_deprecated_true() {
let v = json!({"type": "string", "deprecated": true});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.deprecated, Some(true));
}
#[test]
fn should_resolve_simple_local_ref() {
let v = json!({
"$ref": "#/definitions/MyType",
"definitions": {"MyType": {"type": "string"}}
});
let s = parse_schema(&v).expect("should resolve");
assert_eq!(schema_type_str(&s), Some("string"));
}
#[test]
fn should_return_none_for_missing_ref_target() {
let v = json!({"$ref": "#/definitions/Missing"});
let _ = parse_schema(&v);
}
#[test]
fn should_handle_nested_ref_resolution() {
let v = json!({
"type": "object",
"properties": {
"foo": {"$ref": "#/definitions/Bar"}
},
"definitions": {"Bar": {"type": "integer"}}
});
let s = parse_schema(&v).expect("should parse");
let props = s.properties.as_ref().expect("should have properties");
let foo = props.get("foo").expect("should have foo");
assert_eq!(schema_type_str(foo), Some("integer"));
}
#[test]
fn should_not_infinite_loop_on_circular_ref() {
let v = json!({
"$ref": "#/definitions/A",
"definitions": {
"A": {"$ref": "#/definitions/A"}
}
});
let _ = parse_schema(&v);
}
#[test]
fn should_return_none_on_cache_miss() {
let cache = SchemaCache::new();
assert!(cache.get("https://example.com/schema.json").is_none());
}
#[test]
fn should_return_cached_schema_on_cache_hit() {
let mut cache = SchemaCache::new();
let mut schema = JsonSchema::default();
schema.description = Some("test".to_string());
cache.insert(
"https://example.com/schema.json".to_string(),
Value::Null,
schema,
);
let result = cache
.get("https://example.com/schema.json")
.expect("should be cached");
assert_eq!(result.description.as_deref(), Some("test"));
}
#[test]
fn should_not_overwrite_existing_cache_entry() {
let mut cache = SchemaCache::new();
let mut schema_a = JsonSchema::default();
schema_a.description = Some("first".to_string());
let mut schema_b = JsonSchema::default();
schema_b.description = Some("second".to_string());
cache.insert(
"https://example.com/schema.json".to_string(),
Value::Null,
schema_a,
);
cache.insert(
"https://example.com/schema.json".to_string(),
Value::Null,
schema_b,
);
let result = cache
.get("https://example.com/schema.json")
.expect("should be cached");
assert_eq!(result.description.as_deref(), Some("first"));
}
#[test]
fn should_return_error_for_unreachable_url() {
let result = fetch_schema("http://127.0.0.1:19999/nonexistent.json", None);
assert!(result.is_err());
}
#[test]
fn should_parse_fetched_schema_from_valid_response() {
let body =
r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
let buf = body.as_bytes();
let value: Value = serde_json::from_slice(buf).expect("valid JSON should deserialise");
check_json_depth(&value, 0).expect("shallow schema should pass depth check");
let schema = parse_schema(&value).expect("should produce a schema");
assert_eq!(schema_type_str(&schema), Some("object"));
let props = schema.properties.as_ref().expect("should have properties");
assert!(props.contains_key("name"));
assert_eq!(
schema_type_str(props.get("name").expect("name")),
Some("string")
);
let req = schema.required.as_ref().expect("should have required");
assert!(req.contains(&"name".to_string()));
}
#[test]
fn should_reject_file_scheme_url() {
let result = validate_and_normalize_url("file:///etc/passwd");
assert!(result.is_err());
}
#[test]
fn should_reject_localhost_url() {
let result = validate_and_normalize_url("http://localhost/schema.json");
assert!(result.is_err());
}
#[test]
fn should_reject_link_local_ip_url() {
let result = validate_and_normalize_url("http://169.254.169.254/latest/meta-data/");
assert!(result.is_err());
}
#[test]
fn should_reject_loopback_ip_in_fetch() {
let result = fetch_schema("http://127.0.0.1:8080/schema.json", None);
assert!(result.is_err());
}
#[test]
fn should_reject_url_exceeding_max_length() {
let long_url = format!("https://example.com/{}", "a".repeat(2050));
let result = validate_and_normalize_url(&long_url);
assert!(result.is_err());
}
#[test]
fn should_normalize_cache_key_url() {
let a = validate_and_normalize_url("https://example.com/schema").expect("valid");
let b = validate_and_normalize_url("HTTPS://EXAMPLE.COM/schema").expect("valid");
assert_eq!(a, b, "scheme+host should be normalized to lowercase");
}
#[test]
fn should_reject_excessively_nested_schema() {
let mut v = json!({"type": "string"});
for _ in 0..100 {
v = json!({"type": "object", "properties": {"x": v}});
}
let _ = parse_schema(&v);
}
#[test]
fn should_reject_file_scheme_url_47() {
assert!(validate_and_normalize_url("file:///etc/passwd").is_err());
}
#[test]
fn should_reject_localhost_url_48() {
assert!(validate_and_normalize_url("http://localhost/schema.json").is_err());
}
#[test]
fn should_reject_loopback_ip_url() {
assert!(validate_and_normalize_url("http://127.0.0.1/schema.json").is_err());
}
#[test]
fn should_reject_ipv6_loopback_url() {
assert!(validate_and_normalize_url("http://[::1]/schema.json").is_err());
}
#[test]
fn should_reject_link_local_aws_metadata_url() {
assert!(validate_and_normalize_url("http://169.254.169.254/latest/meta-data/").is_err());
}
#[test]
fn should_reject_url_exceeding_max_length_52() {
let long_url = format!("https://example.com/{}", "a".repeat(2048));
assert!(validate_and_normalize_url(&long_url).is_err());
}
#[test]
fn should_accept_valid_https_url() {
let result = validate_and_normalize_url(
"https://schemastore.azurewebsites.net/schemas/json/package.json",
);
assert!(result.is_ok(), "valid https URL should be accepted");
}
#[test]
fn should_accept_valid_http_url() {
let result = validate_and_normalize_url("http://json.schemastore.org/package");
assert!(result.is_ok(), "valid http URL should be accepted");
}
#[test]
fn should_return_error_when_response_exceeds_size_limit() {
let buf = vec![b'x'; MAX_SCHEMA_BYTES as usize];
assert!(
buf.len() as u64 <= MAX_SCHEMA_BYTES,
"exactly MAX_SCHEMA_BYTES bytes must not trigger ResponseTooLarge"
);
}
#[test]
fn should_return_error_when_response_exceeds_size_limit_over() {
use std::io::Read as _;
let body = vec![b'x'; MAX_SCHEMA_BYTES as usize + 1];
let cursor = std::io::Cursor::new(&body);
let mut limited = cursor.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited.read_to_end(&mut buf).expect("read succeeds");
assert!(
buf.len() as u64 > MAX_SCHEMA_BYTES,
"over-limit read should trigger ResponseTooLarge condition"
);
}
#[test]
fn should_reject_schema_exceeding_nesting_depth() {
let mut v = json!({"type": "string"});
for _ in 0..60 {
v = json!({"type": "object", "properties": {"child": v}});
}
let _ = parse_schema(&v);
}
#[test]
fn should_accept_schema_within_nesting_depth() {
let mut v = json!({"type": "string"});
for _ in 0..10 {
v = json!({"type": "object", "properties": {"child": v}});
}
let result = parse_schema(&v);
assert!(
result.is_some(),
"schema within depth limit should be accepted"
);
}
#[test]
fn should_not_hang_on_two_node_circular_ref() {
let v = json!({
"$ref": "#/definitions/A",
"definitions": {
"A": {"$ref": "#/definitions/B"},
"B": {"$ref": "#/definitions/A"}
}
});
let _ = parse_schema(&v);
}
#[test]
fn should_normalize_cache_key_trailing_slash() {
let key_no_slash = validate_and_normalize_url("https://example.com/schema").expect("valid");
let key_with_slash =
validate_and_normalize_url("https://example.com/schema/").expect("valid");
assert_ne!(
key_no_slash, key_with_slash,
"trailing-slash variants are distinct paths and must not share a cache key"
);
}
#[test]
fn should_normalize_cache_key_host_case() {
let key_upper = validate_and_normalize_url("https://EXAMPLE.COM/schema").expect("valid");
let key_lower = validate_and_normalize_url("https://example.com/schema").expect("valid");
assert_eq!(
key_upper, key_lower,
"host should be normalized to lowercase in cache key"
);
}
#[test]
fn should_not_follow_redirects() {
let server = tiny_http::Server::http("127.0.0.1:0").unwrap();
let addr = server.server_addr().to_ip().unwrap();
let url = format!("http://{addr}/schema.json");
let redirect_target = format!("http://{addr}/redirected");
std::thread::spawn(move || {
if let Ok(req) = server.recv() {
let location =
tiny_http::Header::from_bytes(b"Location", redirect_target.as_bytes()).unwrap();
let response = tiny_http::Response::empty(302).with_header(location);
let _ = req.respond(response);
}
});
let agent = build_agent(None);
let response = agent.get(&url).call().expect("request should succeed");
assert_eq!(
response.status(),
302,
"agent must return 302 without following the redirect"
);
}
#[test]
fn schema_error_display_fetch_failed() {
let e = SchemaError::FetchFailed("connection refused".to_string());
let msg = e.to_string();
assert!(msg.contains("fetch failed"), "got: {msg}");
assert!(msg.contains("connection refused"), "got: {msg}");
}
#[test]
fn schema_error_display_response_too_large() {
let e = SchemaError::ResponseTooLarge;
let msg = e.to_string();
assert!(msg.contains("size limit"), "got: {msg}");
}
#[test]
fn schema_error_display_parse_failed() {
let e = SchemaError::ParseFailed("unexpected token".to_string());
let msg = e.to_string();
assert!(msg.contains("parse failed"), "got: {msg}");
assert!(msg.contains("unexpected token"), "got: {msg}");
}
#[test]
fn schema_error_display_too_deep() {
let e = SchemaError::TooDeep;
let msg = e.to_string();
assert!(msg.contains("depth"), "got: {msg}");
}
#[test]
fn schema_error_display_url_not_permitted() {
let e = SchemaError::UrlNotPermitted("ftp://bad".to_string());
let msg = e.to_string();
assert!(msg.contains("not permitted"), "got: {msg}");
}
#[test]
fn should_reject_private_ipv4_10_range() {
let result = validate_and_normalize_url("http://10.0.0.1/schema.json");
assert!(result.is_err(), "private 10.x.x.x must be rejected");
}
#[test]
fn should_reject_private_ipv4_192_168_range() {
let result = validate_and_normalize_url("http://192.168.1.1/schema.json");
assert!(result.is_err(), "private 192.168.x.x must be rejected");
}
#[test]
fn should_reject_private_ipv4_172_16_range() {
let result = validate_and_normalize_url("http://172.16.0.1/schema.json");
assert!(result.is_err(), "private 172.16.x.x must be rejected");
}
#[test]
fn should_reject_unspecified_ipv4_0_0_0_0() {
let result = validate_and_normalize_url("http://0.0.0.0/schema.json");
assert!(result.is_err(), "unspecified 0.0.0.0 must be rejected");
}
#[test]
fn should_reject_ipv6_unspecified_double_colon() {
let result = validate_and_normalize_url("http://[::]/schema.json");
assert!(result.is_err(), "IPv6 unspecified :: must be rejected");
}
#[test]
fn should_reject_ipv6_link_local_fe80() {
let result = validate_and_normalize_url("http://[fe80::1]/schema.json");
assert!(result.is_err(), "IPv6 link-local fe80:: must be rejected");
}
#[test]
fn should_reject_ftp_scheme() {
let result = validate_and_normalize_url("ftp://example.com/schema.json");
assert!(result.is_err(), "ftp:// scheme must be rejected");
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("ftp"),
"error message should mention the scheme, got: {msg}"
);
}
#[test]
fn should_reject_unparseable_url() {
let result = validate_and_normalize_url("not a url at all");
assert!(result.is_err(), "unparseable string must be rejected");
}
#[test]
fn should_reject_ipv6_ula_fd00() {
let result = validate_and_normalize_url("http://[fd00::1]/schema.json");
assert!(result.is_err(), "IPv6 ULA fd00:: must be rejected");
}
#[test]
fn should_reject_ipv6_ula_fc00() {
let result = validate_and_normalize_url("http://[fc00::1]/schema.json");
assert!(result.is_err(), "IPv6 ULA fc00:: must be rejected");
}
#[test]
fn should_reject_ipv4_mapped_private() {
let result = validate_and_normalize_url("http://[::ffff:192.168.1.1]/schema.json");
assert!(
result.is_err(),
"IPv4-mapped private address must be rejected"
);
}
#[test]
fn should_reject_ipv4_mapped_loopback() {
let result = validate_and_normalize_url("http://[::ffff:127.0.0.1]/schema.json");
assert!(
result.is_err(),
"IPv4-mapped loopback address must be rejected"
);
}
#[test]
fn should_allow_ipv4_mapped_public() {
let result = validate_and_normalize_url("http://[::ffff:8.8.8.8]/schema.json");
assert!(
result.is_ok(),
"IPv4-mapped public address must be allowed: {result:?}"
);
}
#[test]
fn parse_type_returns_none_for_non_string_non_array() {
let v = json!({"type": 42});
let s = parse_schema(&v).expect("should parse as object schema");
assert!(
s.schema_type.is_none(),
"non-string/non-array type should yield None"
);
}
#[test]
fn parse_type_returns_none_for_empty_type_array() {
let v = json!({"type": []});
let s = parse_schema(&v).expect("should parse");
assert!(
s.schema_type.is_none(),
"empty type array should yield None schema_type"
);
}
#[test]
fn parse_type_filters_non_string_items_from_array() {
let v = json!({"type": [42, "string"]});
let s = parse_schema(&v).expect("should parse");
assert!(
s.schema_type.is_some(),
"string item should survive filtering"
);
}
#[test]
fn ref_pointing_to_root_returns_parsed_root() {
let v = json!({
"definitions": {
"Root": {"$ref": "#"}
},
"type": "object"
});
let s = parse_schema(&v).expect("should parse");
assert_eq!(schema_type_str(&s), Some("object"));
}
#[test]
fn ref_without_hash_prefix_yields_ref_path_only() {
let v = json!({"$ref": "http://example.com/other-schema.json"});
let result = parse_schema(&v);
if let Some(s) = result {
assert_eq!(
s.ref_path.as_deref(),
Some("http://example.com/other-schema.json")
);
}
}
#[test]
fn empty_all_of_array_yields_none() {
let v = json!({"allOf": []});
let s = parse_schema(&v).expect("should parse");
assert!(s.all_of.is_none(), "empty allOf should yield None");
}
#[test]
fn all_of_with_non_object_entries_filtered_out_yields_none() {
let v = json!({"allOf": ["not a schema"]});
let s = parse_schema(&v).expect("should parse");
assert!(
s.all_of.is_none(),
"allOf with only invalid entries should yield None"
);
}
#[test]
fn empty_definitions_object_yields_none() {
let v = json!({"definitions": {}});
let s = parse_schema(&v).expect("should parse");
assert!(
s.definitions.is_none(),
"empty definitions should yield None"
);
}
#[test]
fn both_definitions_and_defs_are_merged() {
let v = json!({
"definitions": {"TypeA": {"type": "string"}},
"$defs": {"TypeB": {"type": "integer"}}
});
let s = parse_schema(&v).expect("should parse");
let defs = s
.definitions
.as_ref()
.expect("should have merged definitions");
assert!(
defs.contains_key("TypeA"),
"TypeA from definitions should be present"
);
assert!(
defs.contains_key("TypeB"),
"TypeB from $defs should be present"
);
}
#[test]
fn additional_properties_true_parsed_as_permissive_schema() {
let v = json!({"type": "object", "additionalProperties": true});
let s = parse_schema(&v).expect("should parse");
assert!(
matches!(
s.additional_properties,
Some(AdditionalProperties::Schema(_))
),
"additionalProperties: true should yield Schema variant"
);
}
#[test]
fn check_json_depth_rejects_deeply_nested_array() {
let mut v = json!("leaf");
for _ in 0..55 {
v = json!([v]);
}
let result = check_json_depth(&v, 0);
assert!(
result.is_err(),
"deeply nested array should exceed depth limit"
);
}
#[test]
fn check_json_depth_accepts_shallow_array() {
let v = json!(["a", "b", "c"]);
assert!(check_json_depth(&v, 0).is_ok());
}
#[test]
fn required_with_non_string_values_filtered() {
let v = json!({"required": [42, "name", true]});
let s = parse_schema(&v).expect("should parse");
let req = s.required.as_ref().expect("should have required");
assert_eq!(req.len(), 1, "only string 'name' should survive filtering");
assert!(req.contains(&"name".to_string()));
}
fn parse_docs(text: &str) -> Vec<saphyr::YamlOwned> {
use saphyr::LoadableYamlNode;
saphyr::YamlOwned::load_from_str(text).unwrap_or_default()
}
#[test]
fn should_detect_core_api_pod_and_build_url() {
let docs = parse_docs("apiVersion: v1\nkind: Pod\n");
let result = detect_kubernetes_resource(&docs);
assert_eq!(result, Some(("v1".to_string(), "Pod".to_string())));
let url = kubernetes_schema_url("v1", "Pod", "1.29.0");
assert_eq!(
url,
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/v1.29.0-standalone-strict/pod-v1.json"
);
}
#[test]
fn should_detect_grouped_api_deployment_and_build_url() {
let docs = parse_docs("apiVersion: apps/v1\nkind: Deployment\n");
let result = detect_kubernetes_resource(&docs);
assert_eq!(
result,
Some(("apps/v1".to_string(), "Deployment".to_string()))
);
let url = kubernetes_schema_url("apps/v1", "Deployment", "1.29.0");
assert_eq!(
url,
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/v1.29.0-standalone-strict/deployment-apps-v1.json"
);
}
#[test]
fn should_detect_hpa_and_build_url() {
let docs = parse_docs("apiVersion: autoscaling/v2\nkind: HorizontalPodAutoscaler\n");
let result = detect_kubernetes_resource(&docs);
assert_eq!(
result,
Some((
"autoscaling/v2".to_string(),
"HorizontalPodAutoscaler".to_string()
))
);
let url = kubernetes_schema_url("autoscaling/v2", "HorizontalPodAutoscaler", "1.29.0");
assert_eq!(
url,
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/v1.29.0-standalone-strict/horizontalpodautoscaler-autoscaling-v2.json"
);
}
#[test]
fn should_return_none_when_api_version_missing() {
let docs = parse_docs("kind: Pod\nmetadata:\n name: test\n");
assert_eq!(detect_kubernetes_resource(&docs), None);
}
#[test]
fn should_return_none_when_kind_missing() {
let docs = parse_docs("apiVersion: v1\nmetadata:\n name: test\n");
assert_eq!(detect_kubernetes_resource(&docs), None);
}
#[test]
fn should_return_none_for_empty_docs() {
assert_eq!(detect_kubernetes_resource(&[]), None);
}
#[test]
fn should_inspect_only_first_document() {
let docs = parse_docs("other: value\n---\napiVersion: v1\nkind: Pod\n");
assert_eq!(detect_kubernetes_resource(&docs), None);
}
#[test]
fn should_return_none_when_api_version_or_kind_is_non_string() {
let docs = parse_docs("apiVersion:\n nested: true\nkind:\n - item\n");
assert_eq!(detect_kubernetes_resource(&docs), None);
}
#[test]
fn should_build_url_with_master_standalone_strict_for_core_api() {
let url = kubernetes_schema_url("v1", "Pod", "master");
assert_eq!(
url,
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/master-standalone-strict/pod-v1.json"
);
}
#[test]
fn should_build_url_with_master_standalone_strict_for_grouped_api() {
let url = kubernetes_schema_url("apps/v1", "Deployment", "master");
assert_eq!(
url,
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/master-standalone-strict/deployment-apps-v1.json"
);
}
#[test]
fn should_treat_capitalised_master_as_versioned_prefix() {
let url = kubernetes_schema_url("v1", "Pod", "Master");
assert!(
url.contains("vMaster-standalone-strict/"),
"expected vMaster-standalone-strict/ in URL, got: {url}"
);
}
fn make_catalog_json(schemas: &[(&str, &[&str])]) -> Value {
let schemas_json: Vec<Value> = schemas
.iter()
.map(|(url, patterns)| {
json!({
"name": "Schema Name",
"url": url,
"fileMatch": patterns
})
})
.collect();
json!({ "schemas": schemas_json })
}
#[test]
fn should_parse_catalog_entry_with_yaml_pattern() {
let v = make_catalog_json(&[("https://example.com/schema.json", &["*.yaml"])]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 1);
assert_eq!(catalog.entries[0].url, "https://example.com/schema.json");
assert_eq!(catalog.entries[0].file_match, vec!["*.yaml"]);
}
#[test]
fn should_filter_out_entry_with_only_json_patterns() {
let v = make_catalog_json(&[("https://example.com/schema.json", &["*.json"])]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(
catalog.entries.len(),
0,
"JSON-only entries must be excluded"
);
}
#[test]
fn should_keep_entry_with_mixed_yaml_and_json_patterns() {
let v = make_catalog_json(&[(
"https://example.com/schema.json",
&["*.json", "docker-compose.yml"],
)]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 1);
assert_eq!(catalog.entries[0].file_match, vec!["docker-compose.yml"]);
}
#[test]
fn should_keep_entry_with_yml_extension_pattern() {
let v = make_catalog_json(&[("https://example.com/schema.json", &["*.yml"])]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 1);
}
#[test]
fn should_skip_entry_without_file_match() {
let v = json!({
"schemas": [
{ "name": "No FileMatch", "url": "https://example.com/schema.json" }
]
});
let catalog = parse_schemastore_catalog(&v).expect("should parse catalog");
assert_eq!(
catalog.entries.len(),
0,
"entry missing fileMatch must be skipped"
);
}
#[test]
fn should_parse_empty_catalog() {
let v = json!({ "schemas": [] });
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 0);
}
#[test]
fn should_return_none_for_non_object_catalog() {
let v = json!(["not", "an", "object"]);
assert!(parse_schemastore_catalog(&v).is_none());
}
#[test]
fn should_return_none_for_catalog_missing_schemas_key() {
let v = json!({ "other": "data" });
assert!(parse_schemastore_catalog(&v).is_none());
}
#[test]
fn should_skip_entry_with_empty_url() {
let v = json!({
"schemas": [
{ "name": "Empty URL", "url": "", "fileMatch": ["*.yaml"] }
]
});
let catalog = parse_schemastore_catalog(&v).expect("should parse catalog");
assert_eq!(
catalog.entries.len(),
0,
"entry with empty url must be skipped"
);
}
#[test]
fn should_filter_multiple_entries_correctly() {
let v = make_catalog_json(&[
(
"https://example.com/workflow.json",
&["**/.github/workflows/*.yml"],
),
("https://example.com/compose.json", &["docker-compose.yaml"]),
("https://example.com/package.json", &["package.json"]),
]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 2);
}
#[test]
fn should_return_url_for_matching_filename() {
let catalog = SchemaStoreCatalog {
entries: vec![SchemaStoreEntry {
url: "https://example.com/workflow.json".to_string(),
file_match: vec!["**/.github/workflows/*.yml".to_string()],
}],
};
let result = match_schemastore(".github/workflows/ci.yml", &catalog);
assert_eq!(
result,
Some("https://example.com/workflow.json".to_string())
);
}
#[test]
fn should_return_none_when_no_catalog_entry_matches() {
let catalog = SchemaStoreCatalog {
entries: vec![SchemaStoreEntry {
url: "https://example.com/workflow.json".to_string(),
file_match: vec!["**/.github/workflows/*.yml".to_string()],
}],
};
let result = match_schemastore("docker-compose.yaml", &catalog);
assert_eq!(result, None);
}
#[test]
fn should_return_first_matching_catalog_entry() {
let catalog = SchemaStoreCatalog {
entries: vec![
SchemaStoreEntry {
url: "https://example.com/first.json".to_string(),
file_match: vec!["*.yaml".to_string()],
},
SchemaStoreEntry {
url: "https://example.com/second.json".to_string(),
file_match: vec!["*.yaml".to_string()],
},
],
};
let result = match_schemastore("config.yaml", &catalog);
assert_eq!(result, Some("https://example.com/first.json".to_string()));
}
#[test]
fn should_return_none_for_empty_catalog() {
let catalog = SchemaStoreCatalog { entries: vec![] };
let result = match_schemastore("config.yaml", &catalog);
assert_eq!(result, None);
}
#[test]
fn should_match_if_any_file_match_pattern_matches() {
let catalog = SchemaStoreCatalog {
entries: vec![SchemaStoreEntry {
url: "https://example.com/compose.json".to_string(),
file_match: vec![
"docker-compose.yml".to_string(),
"docker-compose.yaml".to_string(),
"compose.yaml".to_string(),
],
}],
};
let result = match_schemastore("docker-compose.yaml", &catalog);
assert_eq!(result, Some("https://example.com/compose.json".to_string()));
}
#[test]
fn build_agent_without_proxy_does_not_panic() {
let _agent = build_agent(None);
}
#[test]
fn build_agent_with_valid_proxy_does_not_panic() {
let _agent = build_agent(Some("http://proxy.example.com:8080"));
}
#[test]
fn build_agent_with_invalid_proxy_falls_back_gracefully() {
let _agent = build_agent(Some("not-a-valid-proxy-url"));
}
#[test]
fn draft04_dependencies_array_maps_to_dependent_required() {
let value = json!({
"type": "object",
"dependencies": {
"credit_card": ["billing_address", "billing_zip"]
}
});
let schema = parse_schema(&value).unwrap();
let dep_req = schema.dependent_required.unwrap();
let reqs = dep_req.get("credit_card").unwrap();
assert!(reqs.contains(&"billing_address".to_string()));
assert!(reqs.contains(&"billing_zip".to_string()));
assert!(schema.dependent_schemas.is_none());
}
#[test]
fn draft04_dependencies_object_maps_to_dependent_schemas() {
let value = json!({
"type": "object",
"dependencies": {
"name": { "required": ["age"] }
}
});
let schema = parse_schema(&value).unwrap();
let dep_sch = schema.dependent_schemas.unwrap();
let dep = dep_sch.get("name").unwrap();
assert_eq!(dep.required, Some(vec!["age".to_string()]));
assert!(schema.dependent_required.is_none());
}
#[test]
fn draft2019_dependent_required_overrides_draft04() {
let value = json!({
"dependencies": {
"a": ["b"]
},
"dependentRequired": {
"a": ["c"] }
});
let schema = parse_schema(&value).unwrap();
let dep_req = schema.dependent_required.unwrap();
assert_eq!(dep_req.get("a").unwrap(), &vec!["c".to_string()]);
}
#[test]
fn ref_resolves_named_anchor() {
let value = json!({
"type": "object",
"properties": {
"foo": { "$ref": "#item" }
},
"$defs": {
"Item": {
"$anchor": "item",
"type": "string"
}
}
});
let schema = parse_schema(&value).unwrap();
let foo = schema.properties.unwrap();
let foo_schema = foo.get("foo").unwrap();
assert_eq!(
foo_schema.schema_type,
Some(SchemaType::Single("string".to_string()))
);
}
#[test]
fn ref_resolves_dynamic_anchor() {
let value = json!({
"type": "object",
"properties": {
"bar": { "$ref": "#loop" }
},
"$defs": {
"Node": {
"$dynamicAnchor": "loop",
"type": "integer"
}
}
});
let schema = parse_schema(&value).unwrap();
let bar_schema = schema.properties.unwrap();
let bar = bar_schema.get("bar").unwrap();
assert_eq!(
bar.schema_type,
Some(SchemaType::Single("integer".to_string()))
);
}
#[test]
fn dynamic_ref_resolves_to_dynamic_anchor() {
let value = json!({
"type": "object",
"properties": {
"val": { "$dynamicRef": "#node" }
},
"$defs": {
"Node": {
"$dynamicAnchor": "node",
"type": "boolean"
}
}
});
let schema = parse_schema(&value).unwrap();
let val_schema = schema.properties.unwrap();
let val = val_schema.get("val").unwrap();
assert_eq!(
val.schema_type,
Some(SchemaType::Single("boolean".to_string()))
);
}
#[test]
fn ref_returns_schema_with_ref_path_when_anchor_not_found() {
let value = json!({ "$ref": "#nonexistent" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.ref_path, Some("#nonexistent".to_string()));
assert!(schema.schema_type.is_none());
}
#[test]
fn ref_resolves_anchor_nested_inside_definitions() {
let value = json!({
"$defs": {
"outer": {
"type": "object",
"properties": {
"inner": {
"$anchor": "nested",
"type": "number"
}
}
}
},
"properties": {
"x": { "$ref": "#nested" }
}
});
let schema = parse_schema(&value).unwrap();
let x = schema.properties.unwrap();
let x_schema = x.get("x").unwrap();
assert_eq!(
x_schema.schema_type,
Some(SchemaType::Single("number".to_string()))
);
}
#[test]
fn json_pointer_ref_still_resolves_correctly() {
let value = json!({
"properties": {
"name": { "$ref": "#/$defs/Name" }
},
"$defs": {
"Name": { "type": "string" }
}
});
let schema = parse_schema(&value).unwrap();
let name = schema.properties.unwrap();
let name_schema = name.get("name").unwrap();
assert_eq!(
name_schema.schema_type,
Some(SchemaType::Single("string".to_string()))
);
}
#[test]
fn anchor_field_stored_on_schema() {
let value = json!({ "$anchor": "myanchor", "type": "string" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.anchor, Some("myanchor".to_string()));
}
#[test]
fn dynamic_anchor_field_stored_on_schema() {
let value = json!({ "$dynamicAnchor": "myloop", "type": "array" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.dynamic_anchor, Some("myloop".to_string()));
}
#[test]
fn known_vocabularies_produce_no_warnings() {
let value = json!({
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true,
"https://json-schema.org/draft/2020-12/vocab/validation": true,
"https://json-schema.org/draft/2020-12/vocab/applicator": false
}
});
let schema = parse_schema(&value).unwrap();
assert!(schema.vocabulary.is_some());
let warnings = check_vocabulary(&schema);
assert!(warnings.is_empty());
}
#[test]
fn unknown_required_vocabulary_produces_warning() {
let value = json!({
"$vocabulary": {
"https://example.com/my-custom-vocab": true
}
});
let schema = parse_schema(&value).unwrap();
let warnings = check_vocabulary(&schema);
assert_eq!(warnings.len(), 1);
assert!(warnings[0].contains("https://example.com/my-custom-vocab"));
}
#[test]
fn unknown_optional_vocabulary_produces_no_warning() {
let value = json!({
"$vocabulary": {
"https://example.com/optional-vocab": false
}
});
let schema = parse_schema(&value).unwrap();
let warnings = check_vocabulary(&schema);
assert!(warnings.is_empty());
}
#[test]
fn schema_without_vocabulary_has_none_and_no_warnings() {
let value = json!({ "type": "object" });
let schema = parse_schema(&value).unwrap();
assert!(schema.vocabulary.is_none());
let warnings = check_vocabulary(&schema);
assert!(warnings.is_empty());
}
#[test]
fn vocabulary_does_not_interfere_with_other_fields() {
let value = json!({
"$vocabulary": {
"https://json-schema.org/draft/2020-12/vocab/core": true
},
"type": "object",
"properties": {
"name": { "type": "string" }
}
});
let schema = parse_schema(&value).unwrap();
assert!(schema.vocabulary.is_some());
assert!(schema.properties.is_some());
assert_eq!(
schema.schema_type,
Some(SchemaType::Single("object".to_string()))
);
}
#[test]
fn draft04_uri_with_hash_is_detected() {
let value = json!({
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Draft04);
}
#[test]
fn draft07_uri_with_hash_is_detected() {
let value = json!({
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Draft07);
}
#[test]
fn draft202012_uri_is_detected() {
let value = json!({
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Draft202012);
}
#[test]
fn schema_without_dollar_schema_defaults_to_unknown() {
let value = json!({ "type": "object" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Unknown);
}
#[test]
fn unrecognized_schema_uri_defaults_to_unknown() {
let value = json!({
"$schema": "https://example.com/my-custom-schema",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Unknown);
}
#[test]
fn sub_schema_dollar_schema_is_parsed() {
let value = json!({
"type": "object",
"properties": {
"nested": {
"$schema": "https://json-schema.org/draft/2019-09/schema",
"type": "string"
}
}
});
let schema = parse_schema(&value).unwrap();
let props = schema.properties.as_ref().unwrap();
let nested = props.get("nested").unwrap();
assert_eq!(nested.draft, SchemaDraft::Draft201909);
}
#[test]
fn draft04_uri_without_hash_is_detected() {
let value = json!({
"$schema": "http://json-schema.org/draft-04/schema",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Draft04);
}
#[test]
fn draft06_uri_with_hash_is_detected() {
let value = json!({
"$schema": "http://json-schema.org/draft-06/schema#",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Draft06);
}
#[test]
fn draft201909_uri_is_detected() {
let value = json!({
"$schema": "https://json-schema.org/draft/2019-09/schema",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.draft, SchemaDraft::Draft201909);
}
#[test]
fn absolute_dollar_id_is_stored() {
let value = json!({
"$id": "https://example.com/schema.json",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(
schema.id,
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn relative_dollar_id_is_resolved_against_base_uri() {
let value = json!({ "$id": "sub.json", "type": "object" });
let schema = parse_schema_with_root(
&value,
&value,
Some("https://example.com/root.json"),
None,
0,
)
.unwrap();
assert_eq!(schema.id, Some("https://example.com/sub.json".to_string()));
}
#[test]
fn nested_dollar_id_overrides_parent_base() {
let value = json!({
"$id": "https://example.com/root.json",
"properties": {
"child": {
"$id": "child.json",
"type": "string"
}
}
});
let schema = parse_schema(&value).unwrap();
let child = schema.properties.as_ref().unwrap().get("child").unwrap();
assert_eq!(child.id, Some("https://example.com/child.json".to_string()));
}
#[test]
fn draft04_id_without_dollar_is_parsed() {
let value = json!({
"id": "https://example.com/schema.json",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(
schema.id,
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn dollar_id_takes_precedence_over_id() {
let value = json!({
"$id": "https://example.com/preferred.json",
"id": "https://example.com/ignored.json",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(
schema.id,
Some("https://example.com/preferred.json".to_string())
);
}
#[test]
fn schema_without_dollar_id_propagates_parent_base() {
let value = json!({
"$id": "https://example.com/root.json",
"properties": {
"middle": {
"type": "object",
"properties": {
"leaf": {
"$id": "leaf.json",
"type": "string"
}
}
}
}
});
let schema = parse_schema(&value).unwrap();
let middle = schema.properties.as_ref().unwrap().get("middle").unwrap();
assert!(middle.id.is_none(), "middle has no $id");
let leaf = middle.properties.as_ref().unwrap().get("leaf").unwrap();
assert_eq!(leaf.id, Some("https://example.com/leaf.json".to_string()));
}
#[test]
fn remote_ref_to_loopback_is_blocked_by_ssrf_guard() {
let value = json!({ "$ref": "http://127.0.0.1/evil.json" });
let mut cache = SchemaCache::new();
let schema = parse_schema_with_remote(&value, &mut cache, None).unwrap();
assert_eq!(
schema.ref_path.as_deref(),
Some("http://127.0.0.1/evil.json")
);
assert!(cache.get("http://127.0.0.1/evil.json").is_none());
}
#[test]
fn relative_ref_resolved_against_base_uri_before_ssrf_check() {
let value = json!({ "$ref": "evil.json" });
let mut cache = SchemaCache::new();
let schema = parse_schema_with_root(
&value,
&value,
Some("http://127.0.0.1/"),
Some(&mut ParseContext::new(&mut cache, None)),
0,
)
.unwrap();
assert_eq!(schema.ref_path.as_deref(), Some("evil.json"));
assert!(cache.get("http://127.0.0.1/evil.json").is_none());
}
#[test]
fn circular_remote_refs_are_deduplicated() {
let schema_a_value = json!({ "$ref": "https://example.com/b.json" });
let schema_b_value = json!({ "$ref": "https://example.com/a.json" });
let schema_a = parse_schema(&schema_a_value).unwrap();
let schema_b = parse_schema(&schema_b_value).unwrap();
let mut cache = SchemaCache::new();
cache.insert(
"https://example.com/a.json".to_string(),
schema_a_value.clone(),
schema_a,
);
cache.insert(
"https://example.com/b.json".to_string(),
schema_b_value.clone(),
schema_b,
);
let mut ctx = ParseContext::new(&mut cache, None);
let result = resolve_ref(
"https://example.com/a.json",
&schema_a_value,
None,
Some(&mut ctx),
0,
);
let _ = result;
}
#[test]
fn breadth_fan_out_stops_at_max_fetch_count() {
let mut cache = SchemaCache::new();
let mut ctx = ParseContext::new(&mut cache, None);
for i in 0..MAX_REMOTE_FETCH_COUNT {
let url = format!("https://example.com/schema{i}.json");
assert!(ctx.try_visit(&url), "should accept visit {i}");
}
assert!(
!ctx.try_visit("https://example.com/one-too-many.json"),
"visit beyond MAX_REMOTE_FETCH_COUNT must be rejected"
);
}
#[test]
fn fetch_schema_rejects_non_json_content_type() {
let server = tiny_http::Server::http("127.0.0.1:0").unwrap();
let addr = server.server_addr().to_ip().unwrap();
let url = format!("http://{addr}/schema.json");
std::thread::spawn(move || {
if let Ok(req) = server.recv() {
let ct = tiny_http::Header::from_bytes(b"Content-Type", b"text/html").unwrap();
let response =
tiny_http::Response::from_string("<html>not json</html>").with_header(ct);
let _ = req.respond(response);
}
});
let agent = build_agent(None);
let response = agent.get(&url).call().expect("request should succeed");
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
assert!(
!content_type.contains("application/json"),
"server returned non-JSON content type: {content_type}"
);
let is_json = content_type.contains("application/json")
|| content_type.contains("application/schema");
assert!(!is_json, "guard should reject this content type");
}
#[test]
fn dollar_id_spoofing_cannot_overwrite_cache_entry() {
let mut cache = SchemaCache::new();
let mut legitimate = JsonSchema::default();
legitimate.description = Some("legitimate".to_string());
cache.insert(
"https://json-schema.org/draft/2020-12/schema".to_string(),
Value::Null,
legitimate,
);
let mut malicious = JsonSchema::default();
malicious.description = Some("malicious".to_string());
cache.insert(
"https://json-schema.org/draft/2020-12/schema".to_string(),
Value::Null,
malicious,
);
let cached = cache
.get("https://json-schema.org/draft/2020-12/schema")
.unwrap();
assert_eq!(
cached.description.as_deref(),
Some("legitimate"),
"first-write-wins must prevent $id spoofing overwrite"
);
}
#[test]
fn file_scheme_ref_is_blocked_by_ssrf_guard() {
let value = json!({ "$ref": "file:///etc/passwd" });
let mut cache = SchemaCache::new();
let schema = parse_schema_with_remote(&value, &mut cache, None).unwrap();
assert_eq!(schema.ref_path.as_deref(), Some("file:///etc/passwd"));
assert!(cache.get("file:///etc/passwd").is_none());
}
#[test]
fn remote_ref_absolute_url_resolves_to_fetched_schema() {
let ref_url = "https://example.com/other.json";
let remote_value: Value = json!({ "type": "string", "description": "remote schema" });
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
cache.insert(ref_url.to_string(), remote_value.clone(), remote_schema);
let root = json!({});
let mut ctx = ParseContext::new(&mut cache, None);
let resolved = resolve_ref(ref_url, &root, None, Some(&mut ctx), 0);
assert!(resolved.is_some(), "remote ref should resolve from cache");
let s = resolved.unwrap();
assert_eq!(s.description.as_deref(), Some("remote schema"));
}
#[test]
fn remote_ref_with_fragment_navigates_fetched_document() {
let remote_value = json!({
"definitions": {
"Address": {
"type": "object",
"description": "an address"
}
}
});
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
let url = "https://example.com/types.json".to_string();
cache.insert(url.clone(), remote_value.clone(), remote_schema);
let root = json!({});
let ref_str = format!("{url}#/definitions/Address");
let mut ctx = ParseContext::new(&mut cache, None);
let resolved = resolve_ref(&ref_str, &root, None, Some(&mut ctx), 0);
assert!(
resolved.is_some(),
"fragment ref into remote doc should resolve"
);
let s = resolved.unwrap();
assert_eq!(s.description.as_deref(), Some("an address"));
}
#[test]
fn remote_ref_inside_properties_resolves_via_ctx() {
let ref_url = "https://example.com/address.json";
let remote_value: Value = json!({ "type": "object", "description": "an address" });
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
cache.insert(ref_url.to_string(), remote_value.clone(), remote_schema);
let root = json!({
"type": "object",
"properties": {
"address": { "$ref": ref_url }
}
});
let schema = parse_schema_with_remote(&root, &mut cache, None);
assert!(schema.is_some(), "outer schema should parse");
let schema = schema.unwrap();
let props = schema
.properties
.as_ref()
.expect("properties should be present");
let address = props
.get("address")
.expect("address property should be present");
assert_eq!(
address.description.as_deref(),
Some("an address"),
"address property should be resolved from remote cache"
);
}
#[test]
fn schema_error_display_too_many_remote_fetches() {
let e = SchemaError::TooManyRemoteFetches;
let msg = e.to_string();
assert!(msg.contains("remote fetch count"), "got: {msg}");
}
#[test]
fn schema_error_display_unexpected_content_type() {
let e = SchemaError::UnexpectedContentType("text/html".to_string());
let msg = e.to_string();
assert!(msg.contains("unexpected content type"), "got: {msg}");
assert!(msg.contains("text/html"), "got: {msg}");
}
}