use std::collections::{HashMap, HashSet};
use serde_json::Value;
use tower_lsp::lsp_types::Url;
pub mod association;
pub use association::*;
pub(crate) const MAX_SCHEMA_BYTES: u64 = 5 * 1024 * 1024;
const MAX_URL_LENGTH: usize = 2048;
const MAX_JSON_DEPTH: usize = 50;
const MAX_REF_DEPTH: usize = 32;
const MAX_REMOTE_FETCH_COUNT: usize = 20;
#[derive(Debug)]
pub enum SchemaError {
UrlNotPermitted(String),
FetchFailed(String),
ResponseTooLarge,
ParseFailed(String),
TooDeep,
TooManyRemoteFetches,
UnexpectedContentType(String),
}
impl std::fmt::Display for SchemaError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UrlNotPermitted(u) => write!(f, "URL not permitted: {u}"),
Self::FetchFailed(e) => write!(f, "Fetch failed: {e}"),
Self::ResponseTooLarge => write!(f, "Schema response exceeded size limit"),
Self::ParseFailed(e) => write!(f, "Schema parse failed: {e}"),
Self::TooDeep => write!(f, "Schema nesting depth exceeded limit"),
Self::TooManyRemoteFetches => {
write!(
f,
"Remote fetch count exceeded limit ({MAX_REMOTE_FETCH_COUNT})"
)
}
Self::UnexpectedContentType(ct) => {
write!(f, "Unexpected content type: {ct}")
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SchemaType {
Single(String),
Multiple(Vec<String>),
}
#[derive(Debug, Clone)]
pub enum AdditionalProperties {
Denied,
Schema(Box<JsonSchema>),
}
#[derive(Debug, Clone, Default)]
pub struct JsonSchema {
pub id: Option<String>,
pub schema_type: Option<SchemaType>,
pub title: Option<String>,
pub description: Option<String>,
pub format: Option<String>,
pub content_encoding: Option<String>,
pub content_media_type: Option<String>,
pub content_schema: Option<Box<Self>>,
pub properties: Option<HashMap<String, Self>>,
pub required: Option<Vec<String>>,
pub enum_values: Option<Vec<Value>>,
pub default: Option<Value>,
pub examples: Option<Vec<Value>>,
pub items: Option<Box<Self>>,
pub prefix_items: Option<Vec<Self>>,
pub contains: Option<Box<Self>>,
pub min_items: Option<u64>,
pub max_items: Option<u64>,
pub max_contains: Option<u64>,
pub min_contains: Option<u64>,
pub unique_items: Option<bool>,
pub additional_properties: Option<AdditionalProperties>,
pub additional_items: Option<AdditionalProperties>,
pub min_properties: Option<u64>,
pub max_properties: Option<u64>,
pub pattern_properties: Option<Vec<(String, Self)>>,
pub property_names: Option<Box<Self>>,
pub all_of: Option<Vec<Self>>,
pub any_of: Option<Vec<Self>>,
pub one_of: Option<Vec<Self>>,
pub not: Option<Box<Self>>,
pub if_schema: Option<Box<Self>>,
pub then_schema: Option<Box<Self>>,
pub else_schema: Option<Box<Self>>,
pub ref_path: Option<String>,
pub anchor: Option<String>,
pub dynamic_anchor: Option<String>,
pub pattern: Option<String>,
pub minimum: Option<f64>,
pub maximum: Option<f64>,
pub min_length: Option<u64>,
pub max_length: Option<u64>,
pub exclusive_minimum: Option<f64>,
pub exclusive_maximum: Option<f64>,
pub exclusive_minimum_draft04: Option<bool>,
pub exclusive_maximum_draft04: Option<bool>,
pub multiple_of: Option<f64>,
pub const_value: Option<serde_json::Value>,
pub dependent_required: Option<HashMap<String, Vec<String>>>,
pub dependent_schemas: Option<HashMap<String, Self>>,
pub definitions: Option<HashMap<String, Self>>,
pub deprecated: Option<bool>,
pub unevaluated_properties: Option<AdditionalProperties>,
pub unevaluated_items: Option<Box<Self>>,
}
#[derive(Debug, Clone)]
pub struct SchemaAssociation {
pub pattern: String,
pub url: String,
}
#[derive(Debug, Clone)]
pub struct SchemaStoreEntry {
pub url: String,
pub file_match: Vec<String>,
}
#[derive(Debug, Clone, Default)]
pub struct SchemaStoreCatalog {
pub entries: Vec<SchemaStoreEntry>,
}
#[derive(Debug, Default)]
pub struct SchemaCache {
inner: HashMap<String, (Value, JsonSchema)>,
}
impl SchemaCache {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn get(&self, url: &str) -> Option<&JsonSchema> {
self.inner.get(url).map(|(_, s)| s)
}
pub fn insert(&mut self, url: String, value: Value, schema: JsonSchema) {
self.inner.entry(url).or_insert((value, schema));
}
#[must_use]
fn get_raw(&self, url: &str) -> Option<&(Value, JsonSchema)> {
self.inner.get(url)
}
pub fn get_or_fetch(
&mut self,
url: &str,
proxy: Option<&str>,
) -> Result<&JsonSchema, SchemaError> {
if !self.inner.contains_key(url) {
let (value, schema) = fetch_schema_raw(url, proxy, None)?;
self.inner.insert(url.to_string(), (value, schema));
}
let Some((_, schema)) = self.inner.get(url) else {
return Err(SchemaError::FetchFailed(
"cache miss after insert".to_string(),
));
};
Ok(schema)
}
#[must_use]
pub fn contains(&self, url: &str) -> bool {
self.inner.contains_key(url)
}
pub(crate) fn into_inner(self) -> HashMap<String, (Value, JsonSchema)> {
self.inner
}
}
pub fn validate_and_normalize_url(raw: &str) -> Result<String, SchemaError> {
if raw.len() > MAX_URL_LENGTH {
return Err(SchemaError::UrlNotPermitted(
"URL exceeds maximum length".to_string(),
));
}
let url =
Url::parse(raw).map_err(|e| SchemaError::UrlNotPermitted(format!("invalid URL: {e}")))?;
match url.scheme() {
"http" | "https" => {}
s => {
return Err(SchemaError::UrlNotPermitted(format!(
"scheme '{s}' is not permitted"
)));
}
}
if let Some(host) = url.host_str()
&& is_ssrf_blocked_host(host)
{
return Err(SchemaError::UrlNotPermitted(format!(
"host '{host}' is not permitted"
)));
}
Ok(url.to_string())
}
fn is_ssrf_blocked_host(host: &str) -> bool {
use std::net::IpAddr;
if host.eq_ignore_ascii_case("localhost") {
return true;
}
let bare = host
.strip_prefix('[')
.and_then(|s| s.strip_suffix(']'))
.unwrap_or(host);
if let Ok(ip) = bare.parse::<IpAddr>() {
return match ip {
IpAddr::V4(v4) => {
v4.is_loopback() || v4.is_link_local() || v4.is_private() || v4.is_unspecified() }
IpAddr::V6(v6) => {
v6.is_loopback() || v6.is_unspecified() || v6.segments().first().is_some_and(|s| (s & 0xffc0) == 0xfe80)
|| v6.segments().first().is_some_and(|s| (s & 0xfe00) == 0xfc00)
|| v6.to_ipv4_mapped().is_some_and(|v4| {
v4.is_loopback()
|| v4.is_link_local()
|| v4.is_private()
|| v4.is_unspecified()
})
}
};
}
false
}
fn build_agent(proxy: Option<&str>) -> ureq::Agent {
let mut builder = ureq::Agent::config_builder()
.max_redirects(0)
.timeout_connect(Some(std::time::Duration::from_secs(5)))
.timeout_global(Some(std::time::Duration::from_secs(15)));
if let Some(url) = proxy {
if let Ok(p) = ureq::Proxy::new(url) {
builder = builder.proxy(Some(p));
}
}
builder.build().new_agent()
}
fn sanitize_content_type(raw: &str) -> String {
raw.chars()
.filter(|c| c.is_ascii_graphic() || *c == ' ')
.take(256)
.collect()
}
pub fn fetch_schema_raw(
url: &str,
proxy: Option<&str>,
ctx: Option<&mut ParseContext<'_>>,
) -> Result<(Value, JsonSchema), SchemaError> {
use std::io::Read as _;
validate_and_normalize_url(url)?;
let agent = build_agent(proxy);
let response = agent
.get(url)
.call()
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if !content_type.contains("application/json") && !content_type.contains("application/schema") {
return Err(SchemaError::UnexpectedContentType(sanitize_content_type(
content_type,
)));
}
let mut limited = response
.into_body()
.into_reader()
.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited
.read_to_end(&mut buf)
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
if buf.len() as u64 > MAX_SCHEMA_BYTES {
return Err(SchemaError::ResponseTooLarge);
}
let value: Value =
serde_json::from_slice(&buf).map_err(|e| SchemaError::ParseFailed(e.to_string()))?;
check_json_depth(&value, 0)?;
let schema = ctx
.map_or_else(
|| parse_schema(&value),
|ctx| parse_schema_with_root(&value, &value, Some(url), Some(ctx), 0),
)
.ok_or_else(|| SchemaError::ParseFailed("not a JSON Schema".to_string()))?;
Ok((value, schema))
}
const SCHEMASTORE_CATALOG_URL: &str = "https://www.schemastore.org/api/json/catalog.json";
pub fn fetch_schemastore_catalog(proxy: Option<&str>) -> Result<SchemaStoreCatalog, SchemaError> {
use std::io::Read as _;
let agent = build_agent(proxy);
let response = agent
.get(SCHEMASTORE_CATALOG_URL)
.call()
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
let mut limited = response
.into_body()
.into_reader()
.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited
.read_to_end(&mut buf)
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
if buf.len() as u64 > MAX_SCHEMA_BYTES {
return Err(SchemaError::ResponseTooLarge);
}
let value: Value =
serde_json::from_slice(&buf).map_err(|e| SchemaError::ParseFailed(e.to_string()))?;
parse_schemastore_catalog(&value)
.ok_or_else(|| SchemaError::ParseFailed("not a SchemaStore catalog".to_string()))
}
fn parse_schemastore_catalog(value: &Value) -> Option<SchemaStoreCatalog> {
let obj = value.as_object()?;
let schemas = obj.get("schemas")?.as_array()?;
let entries = schemas
.iter()
.filter_map(|entry| {
let entry_obj = entry.as_object()?;
let url = entry_obj.get("url")?.as_str()?.to_string();
if url.is_empty() {
return None;
}
let file_match: Vec<String> = entry_obj
.get("fileMatch")?
.as_array()?
.iter()
.filter_map(|v| v.as_str().map(String::from))
.filter(|p| {
std::path::Path::new(p.as_str())
.extension()
.is_some_and(|ext| {
ext.eq_ignore_ascii_case("yml") || ext.eq_ignore_ascii_case("yaml")
})
})
.collect();
if file_match.is_empty() {
None
} else {
Some(SchemaStoreEntry { url, file_match })
}
})
.collect();
Some(SchemaStoreCatalog { entries })
}
#[must_use]
pub fn match_schemastore(filename: &str, catalog: &SchemaStoreCatalog) -> Option<String> {
catalog.entries.iter().find_map(|entry| {
let matches = entry
.file_match
.iter()
.any(|pattern| association::glob_matches(pattern, filename));
if matches {
Some(entry.url.clone())
} else {
None
}
})
}
fn check_json_depth(value: &Value, depth: usize) -> Result<(), SchemaError> {
if depth > MAX_JSON_DEPTH {
return Err(SchemaError::TooDeep);
}
match value {
Value::Object(map) => {
for v in map.values() {
check_json_depth(v, depth + 1)?;
}
}
Value::Array(arr) => {
for v in arr {
check_json_depth(v, depth + 1)?;
}
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => {}
}
Ok(())
}
pub struct ParseContext<'a> {
cache: &'a mut SchemaCache,
proxy: Option<&'a str>,
visited: HashSet<String>,
}
impl<'a> ParseContext<'a> {
pub fn new(cache: &'a mut SchemaCache, proxy: Option<&'a str>) -> Self {
Self {
cache,
proxy,
visited: HashSet::new(),
}
}
fn try_visit(&mut self, url: &str) -> bool {
if self.visited.len() >= MAX_REMOTE_FETCH_COUNT {
return false;
}
self.visited.insert(url.to_string())
}
}
#[must_use]
pub fn parse_schema(value: &Value) -> Option<JsonSchema> {
parse_schema_with_root(value, value, None, None, 0)
}
fn parse_scalar_fields(obj: &serde_json::Map<String, Value>, schema: &mut JsonSchema) {
schema.title = string_field(obj, "title");
schema.description = string_field(obj, "description");
schema.pattern = string_field(obj, "pattern");
schema.anchor = string_field(obj, "$anchor");
schema.dynamic_anchor = string_field(obj, "$dynamicAnchor");
schema.deprecated = obj.get("deprecated").and_then(Value::as_bool);
schema.minimum = obj.get("minimum").and_then(Value::as_f64);
schema.maximum = obj.get("maximum").and_then(Value::as_f64);
schema.min_length = obj.get("minLength").and_then(Value::as_u64);
schema.max_length = obj.get("maxLength").and_then(Value::as_u64);
if let Some(excl_min) = obj.get("exclusiveMinimum") {
if excl_min.is_number() {
schema.exclusive_minimum = excl_min.as_f64();
} else if excl_min.is_boolean() {
schema.exclusive_minimum_draft04 = excl_min.as_bool();
}
}
if let Some(excl_max) = obj.get("exclusiveMaximum") {
if excl_max.is_number() {
schema.exclusive_maximum = excl_max.as_f64();
} else if excl_max.is_boolean() {
schema.exclusive_maximum_draft04 = excl_max.as_bool();
}
}
schema.multiple_of = obj.get("multipleOf").and_then(Value::as_f64);
schema.const_value = obj.get("const").cloned();
schema.default = obj.get("default").cloned();
schema.examples = obj.get("examples").and_then(Value::as_array).cloned();
schema.enum_values = obj.get("enum").and_then(Value::as_array).cloned();
schema.format = string_field(obj, "format");
schema.content_encoding = string_field(obj, "contentEncoding");
schema.content_media_type = string_field(obj, "contentMediaType");
}
fn parse_content_schema(
schema: &mut JsonSchema,
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) {
if let Some(cs) = obj.get("contentSchema") {
schema.content_schema =
parse_schema_with_root(cs, root, base_uri, ctx, depth + 1).map(Box::new);
}
}
fn parse_object_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
if let Some(map) = obj.get("properties").and_then(Value::as_object) {
let mut props = HashMap::new();
for (k, v) in map {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
props.insert(k.clone(), s);
}
}
if !props.is_empty() {
schema.properties = Some(props);
}
}
schema.min_properties = obj.get("minProperties").and_then(Value::as_u64);
schema.max_properties = obj.get("maxProperties").and_then(Value::as_u64);
if let Some(map) = obj.get("patternProperties").and_then(Value::as_object) {
let mut pat_props = Vec::new();
for (k, v) in map {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
pat_props.push((k.clone(), s));
}
}
if !pat_props.is_empty() {
schema.pattern_properties = Some(pat_props);
}
}
}
fn parse_array_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
if let Some(arr) = obj.get("prefixItems").and_then(Value::as_array) {
let mut items = Vec::new();
for v in arr {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
items.push(s);
}
}
if !items.is_empty() {
schema.prefix_items = Some(items);
}
}
match obj.get("items") {
Some(Value::Array(arr)) if schema.prefix_items.is_none() => {
let mut items = Vec::new();
for v in arr {
if let Some(s) =
parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
items.push(s);
}
}
if !items.is_empty() {
schema.prefix_items = Some(items);
}
}
Some(v) => {
schema.items = parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1)
.map(Box::new);
}
None => {}
}
if obj.get("items").is_some_and(Value::is_array) && obj.get("prefixItems").is_none() {
schema.additional_items = parse_additional_properties(
obj.get("additionalItems"),
root,
base_uri,
ctx.as_deref_mut(),
depth,
);
}
schema.contains = obj
.get("contains")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx, depth + 1))
.map(Box::new);
schema.min_items = obj.get("minItems").and_then(Value::as_u64);
schema.max_items = obj.get("maxItems").and_then(Value::as_u64);
schema.min_contains = obj.get("minContains").and_then(Value::as_u64);
schema.max_contains = obj.get("maxContains").and_then(Value::as_u64);
schema.unique_items = obj.get("uniqueItems").and_then(Value::as_bool);
}
fn parse_combinator_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
schema.all_of = parse_schema_array(obj.get("allOf"), root, base_uri, ctx.as_deref_mut(), depth);
schema.any_of = parse_schema_array(obj.get("anyOf"), root, base_uri, ctx.as_deref_mut(), depth);
schema.one_of = parse_schema_array(obj.get("oneOf"), root, base_uri, ctx.as_deref_mut(), depth);
schema.not = obj
.get("not")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
schema.if_schema = obj
.get("if")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
schema.then_schema = obj
.get("then")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
schema.else_schema = obj
.get("else")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx, depth + 1))
.map(Box::new);
}
fn parse_extension_fields(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
schema: &mut JsonSchema,
) {
schema.unevaluated_properties = parse_additional_properties(
obj.get("unevaluatedProperties"),
root,
base_uri,
ctx.as_deref_mut(),
depth,
);
schema.unevaluated_items = obj
.get("unevaluatedItems")
.and_then(|v| parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1))
.map(Box::new);
let defs_04 = parse_definitions(
obj.get("definitions"),
root,
base_uri,
ctx.as_deref_mut(),
depth,
);
let defs_07 = parse_definitions(obj.get("$defs"), root, base_uri, ctx, depth);
schema.definitions = match (defs_04, defs_07) {
(Some(mut a), Some(b)) => {
a.extend(b);
Some(a)
}
(a, b) => a.or(b),
};
}
fn resolve_uri(base: Option<&str>, relative: &str) -> Option<String> {
if Url::parse(relative).is_ok() {
return Some(relative.to_string());
}
let base_url = Url::parse(base?).ok()?;
base_url.join(relative).ok().map(|u| u.to_string())
}
fn parse_schema_with_root(
value: &Value,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<JsonSchema> {
if depth > MAX_REF_DEPTH {
return None;
}
match value {
Value::Bool(true) => return Some(JsonSchema::default()),
Value::Bool(false)
| Value::Null
| Value::Number(_)
| Value::String(_)
| Value::Array(_) => {
return None;
}
Value::Object(_) => {}
}
let obj = value.as_object()?;
let mut schema = JsonSchema::default();
if let Some(Value::String(ref_str)) = obj.get("$ref") {
schema.ref_path = Some(ref_str.clone());
if let Some(resolved) = resolve_ref(ref_str, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
return Some(resolved);
}
return Some(schema);
}
if let Some(Value::String(ref_str)) = obj.get("$dynamicRef") {
if let Some(resolved) = resolve_ref(ref_str, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
return Some(resolved);
}
}
let raw_id = obj
.get("$id")
.or_else(|| obj.get("id"))
.and_then(Value::as_str);
let effective_base: Option<String> = if let Some(raw) = raw_id {
let resolved = resolve_uri(base_uri, raw).unwrap_or_else(|| raw.to_string());
schema.id = Some(resolved.clone());
Some(resolved)
} else {
base_uri.map(String::from)
};
let effective_base = effective_base.as_deref();
schema.schema_type = parse_type(obj.get("type"));
parse_scalar_fields(obj, &mut schema);
parse_content_schema(
&mut schema,
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
);
schema.required = obj.get("required").and_then(Value::as_array).map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
});
parse_object_fields(
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
&mut schema,
);
parse_array_fields(
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
&mut schema,
);
schema.additional_properties = parse_additional_properties(
obj.get("additionalProperties"),
root,
effective_base,
ctx.as_deref_mut(),
depth,
);
schema.property_names = obj
.get("propertyNames")
.and_then(|v| {
parse_schema_with_root(v, root, effective_base, ctx.as_deref_mut(), depth + 1)
})
.map(Box::new);
let (dep_req, dep_sch) =
parse_dependencies(obj, root, effective_base, ctx.as_deref_mut(), depth);
schema.dependent_required = dep_req;
schema.dependent_schemas = dep_sch;
parse_combinator_fields(
obj,
root,
effective_base,
ctx.as_deref_mut(),
depth,
&mut schema,
);
parse_extension_fields(obj, root, effective_base, ctx, depth, &mut schema);
Some(schema)
}
type ParsedDependencies = (
Option<HashMap<String, Vec<String>>>,
Option<HashMap<String, JsonSchema>>,
);
fn parse_dependencies(
obj: &serde_json::Map<String, Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> ParsedDependencies {
let mut dep_req: HashMap<String, Vec<String>> = HashMap::new();
let mut dep_sch: HashMap<String, JsonSchema> = HashMap::new();
if let Some(Value::Object(deps)) = obj.get("dependencies") {
for (key, val) in deps {
if let Some(arr) = val.as_array() {
let reqs: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
dep_req.insert(key.clone(), reqs);
} else if let Some(schema) =
parse_schema_with_root(val, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
dep_sch.insert(key.clone(), schema);
}
}
}
if let Some(Value::Object(dr)) = obj.get("dependentRequired") {
for (key, val) in dr {
if let Some(arr) = val.as_array() {
let reqs: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
dep_req.insert(key.clone(), reqs);
}
}
}
if let Some(Value::Object(ds)) = obj.get("dependentSchemas") {
for (key, val) in ds {
if let Some(schema) =
parse_schema_with_root(val, root, base_uri, ctx.as_deref_mut(), depth + 1)
{
dep_sch.insert(key.clone(), schema);
}
}
}
let dep_req = if dep_req.is_empty() {
None
} else {
Some(dep_req)
};
let dep_sch = if dep_sch.is_empty() {
None
} else {
Some(dep_sch)
};
(dep_req, dep_sch)
}
fn parse_type(value: Option<&Value>) -> Option<SchemaType> {
match value? {
Value::String(s) => Some(SchemaType::Single(s.clone())),
Value::Array(arr) => {
let types: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if types.is_empty() {
None
} else {
Some(SchemaType::Multiple(types))
}
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::Object(_) => None,
}
}
fn string_field(obj: &serde_json::Map<String, Value>, key: &str) -> Option<String> {
obj.get(key)?.as_str().map(String::from)
}
fn parse_additional_properties(
value: Option<&Value>,
root: &Value,
base_uri: Option<&str>,
#[expect(
unused_mut,
reason = "ctx is only mutated in some branches; parameter signature is stable"
)]
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<AdditionalProperties> {
match value? {
Value::Bool(false) => Some(AdditionalProperties::Denied),
v @ (Value::Bool(true)
| Value::Null
| Value::Number(_)
| Value::String(_)
| Value::Array(_)
| Value::Object(_)) => parse_schema_with_root(v, root, base_uri, ctx, depth + 1)
.map(|s| AdditionalProperties::Schema(Box::new(s))),
}
}
fn parse_schema_array(
value: Option<&Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<Vec<JsonSchema>> {
let arr = value?.as_array()?;
let mut schemas = Vec::new();
for v in arr {
if let Some(s) = parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1) {
schemas.push(s);
}
}
if schemas.is_empty() {
None
} else {
Some(schemas)
}
}
fn parse_definitions(
value: Option<&Value>,
root: &Value,
base_uri: Option<&str>,
mut ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<HashMap<String, JsonSchema>> {
let map = value?.as_object()?;
let mut result = HashMap::new();
for (k, v) in map {
if let Some(s) = parse_schema_with_root(v, root, base_uri, ctx.as_deref_mut(), depth + 1) {
result.insert(k.clone(), s);
}
}
if result.is_empty() {
None
} else {
Some(result)
}
}
fn resolve_ref(
ref_str: &str,
root: &Value,
base_uri: Option<&str>,
ctx: Option<&mut ParseContext<'_>>,
depth: usize,
) -> Option<JsonSchema> {
if depth > MAX_REF_DEPTH {
return None;
}
if let Some(pointer) = ref_str.strip_prefix('#') {
if pointer.is_empty() {
return parse_schema_with_root(root, root, None, None, depth + 1);
}
if pointer.starts_with('/') {
let target = root.pointer(pointer)?;
return parse_schema_with_root(target, root, None, None, depth + 1);
}
return find_anchor_in_value(pointer, root)
.and_then(|v| parse_schema_with_root(v, root, None, None, depth + 1));
}
let ctx = ctx?;
let (uri_part, fragment) = ref_str.find('#').map_or((ref_str, None), |pos| {
(&ref_str[..pos], Some(&ref_str[pos + 1..]))
});
let absolute_uri = resolve_uri(base_uri, uri_part)?;
let normalized = validate_and_normalize_url(&absolute_uri).ok()?;
if !ctx.cache.contains(&normalized) && !ctx.try_visit(&normalized) {
return None;
}
if !ctx.cache.contains(&normalized) {
let (value, schema) = fetch_schema_raw(&normalized, ctx.proxy, None).ok()?;
ctx.cache.insert(normalized.clone(), value, schema);
}
let (remote_value, _) = ctx.cache.get_raw(&normalized)?;
let remote_value = remote_value.clone();
match fragment {
None | Some("") => {
parse_schema_with_root(
&remote_value,
&remote_value,
Some(&normalized),
None,
depth + 1,
)
}
Some(frag) if frag.starts_with('/') => {
let target = remote_value.pointer(frag)?;
parse_schema_with_root(target, &remote_value, Some(&normalized), None, depth + 1)
}
Some(name) => {
find_anchor_in_value(name, &remote_value).and_then(|v| {
parse_schema_with_root(v, &remote_value, Some(&normalized), None, depth + 1)
})
}
}
}
fn find_anchor_in_value<'a>(name: &str, value: &'a Value) -> Option<&'a Value> {
match value {
Value::Object(obj) => {
let has_anchor = obj
.get("$anchor")
.and_then(Value::as_str)
.is_some_and(|a| a == name);
let has_dynamic = obj
.get("$dynamicAnchor")
.and_then(Value::as_str)
.is_some_and(|a| a == name);
if has_anchor || has_dynamic {
return Some(value);
}
for v in obj.values() {
if let Some(found) = find_anchor_in_value(name, v) {
return Some(found);
}
}
None
}
Value::Array(arr) => {
for v in arr {
if let Some(found) = find_anchor_in_value(name, v) {
return Some(found);
}
}
None
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => None,
}
}
#[cfg(test)]
#[expect(
clippy::indexing_slicing,
clippy::expect_used,
clippy::unwrap_used,
clippy::cast_possible_truncation,
reason = "test code"
)]
mod tests {
use rstest::rstest;
use super::*;
use serde_json::json;
fn schema_type_str(s: &JsonSchema) -> Option<&str> {
match s.schema_type.as_ref()? {
SchemaType::Single(t) => Some(t.as_str()),
SchemaType::Multiple(_) => None,
}
}
#[test]
fn should_parse_minimal_object_schema() {
let v = json!({"type": "object"});
let s = parse_schema(&v).expect("should parse");
assert_eq!(schema_type_str(&s), Some("object"));
}
#[test]
fn should_parse_schema_with_properties() {
let v = json!({"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}});
let s = parse_schema(&v).expect("should parse");
let props = s.properties.as_ref().expect("should have properties");
assert_eq!(
schema_type_str(props.get("name").expect("name")),
Some("string")
);
assert_eq!(
schema_type_str(props.get("age").expect("age")),
Some("integer")
);
}
#[test]
fn should_parse_required_fields() {
let v = json!({"type": "object", "required": ["name", "age"]});
let s = parse_schema(&v).expect("should parse");
let req = s.required.as_ref().expect("should have required");
assert!(req.contains(&"name".to_string()));
assert!(req.contains(&"age".to_string()));
}
#[test]
fn should_parse_enum_values() {
let v = json!({"type": "string", "enum": ["alpha", "beta", "gamma"]});
let s = parse_schema(&v).expect("should parse");
let enums = s.enum_values.as_ref().expect("should have enum");
assert_eq!(enums.len(), 3);
assert!(enums.contains(&json!("alpha")));
assert!(enums.contains(&json!("beta")));
assert!(enums.contains(&json!("gamma")));
}
#[test]
fn should_parse_description() {
let v = json!({"type": "string", "description": "A human-readable name"});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.description.as_deref(), Some("A human-readable name"));
}
#[test]
fn should_parse_default_value() {
let v = json!({"type": "integer", "default": 42});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.default, Some(json!(42)));
}
#[test]
fn should_parse_array_schema_with_items() {
let v = json!({"type": "array", "items": {"type": "string"}});
let s = parse_schema(&v).expect("should parse");
let items = s.items.as_ref().expect("should have items");
assert_eq!(schema_type_str(items), Some("string"));
}
#[test]
fn should_parse_additional_properties_false() {
let v = json!({"type": "object", "additionalProperties": false});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_properties,
Some(AdditionalProperties::Denied)
));
}
#[test]
fn should_parse_additional_properties_as_schema() {
let v = json!({"type": "object", "additionalProperties": {"type": "string"}});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_properties,
Some(AdditionalProperties::Schema(_))
));
}
#[test]
fn should_parse_min_properties_and_max_properties() {
let v = json!({"type": "object", "minProperties": 1, "maxProperties": 5});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.min_properties, Some(1));
assert_eq!(s.max_properties, Some(5));
}
#[test]
fn should_parse_additional_items_false() {
let v = json!({"items": [{"type": "string"}], "additionalItems": false});
let s = parse_schema(&v).expect("should parse");
assert!(s.prefix_items.is_some());
assert!(matches!(
s.additional_items,
Some(AdditionalProperties::Denied)
));
}
#[test]
fn should_parse_additional_items_schema() {
let v = json!({"items": [{"type": "string"}], "additionalItems": {"type": "integer"}});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_items,
Some(AdditionalProperties::Schema(_))
));
}
#[test]
fn should_not_parse_additional_items_when_prefix_items_set_from_prefix_items_key() {
let v = json!({"prefixItems": [{"type": "string"}], "additionalItems": false});
let s = parse_schema(&v).expect("should parse");
assert!(s.additional_items.is_none());
}
#[test]
fn should_not_parse_additional_items_when_no_array_items() {
let v = json!({"type": "array", "additionalItems": false});
let s = parse_schema(&v).expect("should parse");
assert!(s.additional_items.is_none());
}
#[test]
fn should_parse_all_of() {
let v = json!({"allOf": [{"type": "object"}, {"required": ["name"]}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.all_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_parse_any_of() {
let v = json!({"anyOf": [{"type": "string"}, {"type": "integer"}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.any_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_parse_one_of() {
let v = json!({"oneOf": [{"type": "string"}, {"type": "null"}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.one_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_return_none_for_null_input() {
assert!(parse_schema(&Value::Null).is_none());
}
#[test]
fn should_return_none_for_non_object_json() {
assert!(parse_schema(&Value::String("not a schema".into())).is_none());
}
#[test]
fn should_parse_empty_object_as_permissive_schema() {
let v = json!({});
let s = parse_schema(&v).expect("should parse");
assert!(s.schema_type.is_none());
assert!(s.properties.is_none());
assert!(s.required.is_none());
}
#[test]
fn should_parse_boolean_true_schema() {
let s = parse_schema(&Value::Bool(true)).expect("should return Some for true");
assert!(s.schema_type.is_none());
}
#[test]
fn should_parse_boolean_false_schema() {
assert!(parse_schema(&Value::Bool(false)).is_none());
}
#[test]
fn should_parse_draft04_definitions() {
let v = json!({"definitions": {"addr": {"type": "string"}}});
let s = parse_schema(&v).expect("should parse");
let defs = s.definitions.as_ref().expect("should have definitions");
assert!(defs.contains_key("addr"));
}
#[test]
fn should_parse_draft07_defs() {
let v = json!({"$defs": {"addr": {"type": "string"}}});
let s = parse_schema(&v).expect("should parse");
let defs = s.definitions.as_ref().expect("should have $defs");
assert!(defs.contains_key("addr"));
}
#[test]
fn should_parse_deprecated_true() {
let v = json!({"type": "string", "deprecated": true});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.deprecated, Some(true));
}
#[test]
fn should_resolve_simple_local_ref() {
let v = json!({
"$ref": "#/definitions/MyType",
"definitions": {"MyType": {"type": "string"}}
});
let s = parse_schema(&v).expect("should resolve");
assert_eq!(schema_type_str(&s), Some("string"));
}
#[test]
fn should_return_none_for_missing_ref_target() {
let v = json!({"$ref": "#/definitions/Missing"});
let _ = parse_schema(&v);
}
#[test]
fn should_handle_nested_ref_resolution() {
let v = json!({
"type": "object",
"properties": {
"foo": {"$ref": "#/definitions/Bar"}
},
"definitions": {"Bar": {"type": "integer"}}
});
let s = parse_schema(&v).expect("should parse");
let props = s.properties.as_ref().expect("should have properties");
let foo = props.get("foo").expect("should have foo");
assert_eq!(schema_type_str(foo), Some("integer"));
}
#[test]
fn should_not_infinite_loop_on_circular_ref() {
let v = json!({
"$ref": "#/definitions/A",
"definitions": {
"A": {"$ref": "#/definitions/A"}
}
});
let _ = parse_schema(&v);
}
#[test]
fn should_return_none_on_cache_miss() {
let cache = SchemaCache::new();
assert!(cache.get("https://example.com/schema.json").is_none());
}
#[test]
fn should_return_cached_schema_on_cache_hit() {
let mut cache = SchemaCache::new();
let schema = JsonSchema {
description: Some("test".to_string()),
..JsonSchema::default()
};
cache.insert(
"https://example.com/schema.json".to_string(),
Value::Null,
schema,
);
let result = cache
.get("https://example.com/schema.json")
.expect("should be cached");
assert_eq!(result.description.as_deref(), Some("test"));
}
#[test]
fn should_not_overwrite_existing_cache_entry() {
let mut cache = SchemaCache::new();
let schema_a = JsonSchema {
description: Some("first".to_string()),
..JsonSchema::default()
};
let schema_b = JsonSchema {
description: Some("second".to_string()),
..JsonSchema::default()
};
cache.insert(
"https://example.com/schema.json".to_string(),
Value::Null,
schema_a,
);
cache.insert(
"https://example.com/schema.json".to_string(),
Value::Null,
schema_b,
);
let result = cache
.get("https://example.com/schema.json")
.expect("should be cached");
assert_eq!(result.description.as_deref(), Some("first"));
}
#[test]
fn should_return_error_for_unreachable_url() {
let result = fetch_schema_raw("http://127.0.0.1:19999/nonexistent.json", None, None);
assert!(result.is_err());
}
#[test]
fn should_parse_fetched_schema_from_valid_response() {
let body =
r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
let buf = body.as_bytes();
let value: Value = serde_json::from_slice(buf).expect("valid JSON should deserialise");
check_json_depth(&value, 0).expect("shallow schema should pass depth check");
let schema = parse_schema(&value).expect("should produce a schema");
assert_eq!(schema_type_str(&schema), Some("object"));
let props = schema.properties.as_ref().expect("should have properties");
assert!(props.contains_key("name"));
assert_eq!(
schema_type_str(props.get("name").expect("name")),
Some("string")
);
let req = schema.required.as_ref().expect("should have required");
assert!(req.contains(&"name".to_string()));
}
#[test]
fn should_reject_file_scheme_url() {
let result = validate_and_normalize_url("file:///etc/passwd");
assert!(result.is_err());
}
#[test]
fn should_reject_localhost_url() {
let result = validate_and_normalize_url("http://localhost/schema.json");
assert!(result.is_err());
}
#[test]
fn should_reject_link_local_ip_url() {
let result = validate_and_normalize_url("http://169.254.169.254/latest/meta-data/");
assert!(result.is_err());
}
#[test]
fn should_reject_loopback_ip_in_fetch() {
let result = fetch_schema_raw("http://127.0.0.1:8080/schema.json", None, None);
assert!(result.is_err());
}
#[test]
fn should_reject_url_exceeding_max_length() {
let long_url = format!("https://example.com/{}", "a".repeat(2050));
let result = validate_and_normalize_url(&long_url);
assert!(result.is_err());
}
#[test]
fn should_normalize_cache_key_url() {
let a = validate_and_normalize_url("https://example.com/schema").expect("valid");
let b = validate_and_normalize_url("HTTPS://EXAMPLE.COM/schema").expect("valid");
assert_eq!(a, b, "scheme+host should be normalized to lowercase");
}
#[test]
fn should_reject_excessively_nested_schema() {
let mut v = json!({"type": "string"});
for _ in 0..100 {
v = json!({"type": "object", "properties": {"x": v}});
}
let _ = parse_schema(&v);
}
#[test]
fn should_reject_file_scheme_url_47() {
assert!(validate_and_normalize_url("file:///etc/passwd").is_err());
}
#[test]
fn should_reject_localhost_url_48() {
assert!(validate_and_normalize_url("http://localhost/schema.json").is_err());
}
#[test]
fn should_reject_loopback_ip_url() {
assert!(validate_and_normalize_url("http://127.0.0.1/schema.json").is_err());
}
#[test]
fn should_reject_ipv6_loopback_url() {
assert!(validate_and_normalize_url("http://[::1]/schema.json").is_err());
}
#[test]
fn should_reject_link_local_aws_metadata_url() {
assert!(validate_and_normalize_url("http://169.254.169.254/latest/meta-data/").is_err());
}
#[test]
fn should_reject_url_exceeding_max_length_52() {
let long_url = format!("https://example.com/{}", "a".repeat(2048));
assert!(validate_and_normalize_url(&long_url).is_err());
}
#[test]
fn should_accept_valid_https_url() {
let result = validate_and_normalize_url(
"https://schemastore.azurewebsites.net/schemas/json/package.json",
);
assert!(result.is_ok(), "valid https URL should be accepted");
}
#[test]
fn should_accept_valid_http_url() {
let result = validate_and_normalize_url("http://json.schemastore.org/package");
assert!(result.is_ok(), "valid http URL should be accepted");
}
#[test]
fn should_return_error_when_response_exceeds_size_limit() {
let buf = vec![b'x'; MAX_SCHEMA_BYTES as usize];
assert!(
buf.len() as u64 <= MAX_SCHEMA_BYTES,
"exactly MAX_SCHEMA_BYTES bytes must not trigger ResponseTooLarge"
);
}
#[test]
fn should_return_error_when_response_exceeds_size_limit_over() {
use std::io::Read as _;
let body = vec![b'x'; MAX_SCHEMA_BYTES as usize + 1];
let cursor = std::io::Cursor::new(&body);
let mut limited = cursor.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited.read_to_end(&mut buf).expect("read succeeds");
assert!(
buf.len() as u64 > MAX_SCHEMA_BYTES,
"over-limit read should trigger ResponseTooLarge condition"
);
}
#[test]
fn should_reject_schema_exceeding_nesting_depth() {
let mut v = json!({"type": "string"});
for _ in 0..60 {
v = json!({"type": "object", "properties": {"child": v}});
}
let _ = parse_schema(&v);
}
#[test]
fn should_accept_schema_within_nesting_depth() {
let mut v = json!({"type": "string"});
for _ in 0..10 {
v = json!({"type": "object", "properties": {"child": v}});
}
let result = parse_schema(&v);
assert!(
result.is_some(),
"schema within depth limit should be accepted"
);
}
#[test]
fn should_not_hang_on_two_node_circular_ref() {
let v = json!({
"$ref": "#/definitions/A",
"definitions": {
"A": {"$ref": "#/definitions/B"},
"B": {"$ref": "#/definitions/A"}
}
});
let _ = parse_schema(&v);
}
#[test]
fn should_normalize_cache_key_trailing_slash() {
let key_no_slash = validate_and_normalize_url("https://example.com/schema").expect("valid");
let key_with_slash =
validate_and_normalize_url("https://example.com/schema/").expect("valid");
assert_ne!(
key_no_slash, key_with_slash,
"trailing-slash variants are distinct paths and must not share a cache key"
);
}
#[test]
fn should_normalize_cache_key_host_case() {
let key_upper = validate_and_normalize_url("https://EXAMPLE.COM/schema").expect("valid");
let key_lower = validate_and_normalize_url("https://example.com/schema").expect("valid");
assert_eq!(
key_upper, key_lower,
"host should be normalized to lowercase in cache key"
);
}
#[test]
fn should_not_follow_redirects() {
let server = tiny_http::Server::http("127.0.0.1:0").unwrap();
let addr = server.server_addr().to_ip().unwrap();
let url = format!("http://{addr}/schema.json");
let redirect_target = format!("http://{addr}/redirected");
std::thread::spawn(move || {
if let Ok(req) = server.recv() {
let location =
tiny_http::Header::from_bytes(b"Location", redirect_target.as_bytes()).unwrap();
let response = tiny_http::Response::empty(302).with_header(location);
let _ = req.respond(response);
}
});
let agent = build_agent(None);
let response = agent.get(&url).call().expect("request should succeed");
assert_eq!(
response.status(),
302,
"agent must return 302 without following the redirect"
);
}
#[test]
fn schema_error_display_fetch_failed() {
let e = SchemaError::FetchFailed("connection refused".to_string());
let msg = e.to_string();
assert!(msg.contains("Fetch failed"), "got: {msg}");
assert!(msg.contains("connection refused"), "got: {msg}");
}
#[test]
fn schema_error_display_response_too_large() {
let e = SchemaError::ResponseTooLarge;
let msg = e.to_string();
assert!(msg.contains("size limit"), "got: {msg}");
}
#[test]
fn schema_error_display_parse_failed() {
let e = SchemaError::ParseFailed("unexpected token".to_string());
let msg = e.to_string();
assert!(msg.contains("parse failed"), "got: {msg}");
assert!(msg.contains("unexpected token"), "got: {msg}");
}
#[test]
fn schema_error_display_too_deep() {
let e = SchemaError::TooDeep;
let msg = e.to_string();
assert!(msg.contains("depth"), "got: {msg}");
}
#[test]
fn schema_error_display_url_not_permitted() {
let e = SchemaError::UrlNotPermitted("ftp://bad".to_string());
let msg = e.to_string();
assert!(msg.contains("not permitted"), "got: {msg}");
}
#[test]
fn should_reject_private_ipv4_10_range() {
let result = validate_and_normalize_url("http://10.0.0.1/schema.json");
assert!(result.is_err(), "private 10.x.x.x must be rejected");
}
#[test]
fn should_reject_private_ipv4_192_168_range() {
let result = validate_and_normalize_url("http://192.168.1.1/schema.json");
assert!(result.is_err(), "private 192.168.x.x must be rejected");
}
#[test]
fn should_reject_private_ipv4_172_16_range() {
let result = validate_and_normalize_url("http://172.16.0.1/schema.json");
assert!(result.is_err(), "private 172.16.x.x must be rejected");
}
#[test]
fn should_reject_unspecified_ipv4_0_0_0_0() {
let result = validate_and_normalize_url("http://0.0.0.0/schema.json");
assert!(result.is_err(), "unspecified 0.0.0.0 must be rejected");
}
#[test]
fn should_reject_ipv6_unspecified_double_colon() {
let result = validate_and_normalize_url("http://[::]/schema.json");
assert!(result.is_err(), "IPv6 unspecified :: must be rejected");
}
#[test]
fn should_reject_ipv6_link_local_fe80() {
let result = validate_and_normalize_url("http://[fe80::1]/schema.json");
assert!(result.is_err(), "IPv6 link-local fe80:: must be rejected");
}
#[test]
fn should_reject_ftp_scheme() {
let result = validate_and_normalize_url("ftp://example.com/schema.json");
assert!(result.is_err(), "ftp:// scheme must be rejected");
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("ftp"),
"error message should mention the scheme, got: {msg}"
);
}
#[test]
fn should_reject_unparseable_url() {
let result = validate_and_normalize_url("not a url at all");
assert!(result.is_err(), "unparseable string must be rejected");
}
#[test]
fn should_reject_ipv6_ula_fd00() {
let result = validate_and_normalize_url("http://[fd00::1]/schema.json");
assert!(result.is_err(), "IPv6 ULA fd00:: must be rejected");
}
#[test]
fn should_reject_ipv6_ula_fc00() {
let result = validate_and_normalize_url("http://[fc00::1]/schema.json");
assert!(result.is_err(), "IPv6 ULA fc00:: must be rejected");
}
#[test]
fn should_reject_ipv4_mapped_private() {
let result = validate_and_normalize_url("http://[::ffff:192.168.1.1]/schema.json");
assert!(
result.is_err(),
"IPv4-mapped private address must be rejected"
);
}
#[test]
fn should_reject_ipv4_mapped_loopback() {
let result = validate_and_normalize_url("http://[::ffff:127.0.0.1]/schema.json");
assert!(
result.is_err(),
"IPv4-mapped loopback address must be rejected"
);
}
#[test]
fn should_allow_ipv4_mapped_public() {
let result = validate_and_normalize_url("http://[::ffff:8.8.8.8]/schema.json");
assert!(
result.is_ok(),
"IPv4-mapped public address must be allowed: {result:?}"
);
}
#[test]
fn parse_type_returns_none_for_non_string_non_array() {
let v = json!({"type": 42});
let s = parse_schema(&v).expect("should parse as object schema");
assert!(
s.schema_type.is_none(),
"non-string/non-array type should yield None"
);
}
#[test]
fn parse_type_returns_none_for_empty_type_array() {
let v = json!({"type": []});
let s = parse_schema(&v).expect("should parse");
assert!(
s.schema_type.is_none(),
"empty type array should yield None schema_type"
);
}
#[test]
fn parse_type_filters_non_string_items_from_array() {
let v = json!({"type": [42, "string"]});
let s = parse_schema(&v).expect("should parse");
assert!(
s.schema_type.is_some(),
"string item should survive filtering"
);
}
#[test]
fn ref_pointing_to_root_returns_parsed_root() {
let v = json!({
"definitions": {
"Root": {"$ref": "#"}
},
"type": "object"
});
let s = parse_schema(&v).expect("should parse");
assert_eq!(schema_type_str(&s), Some("object"));
}
#[test]
fn ref_without_hash_prefix_yields_ref_path_only() {
let v = json!({"$ref": "http://example.com/other-schema.json"});
let result = parse_schema(&v);
if let Some(s) = result {
assert_eq!(
s.ref_path.as_deref(),
Some("http://example.com/other-schema.json")
);
}
}
#[test]
fn empty_all_of_array_yields_none() {
let v = json!({"allOf": []});
let s = parse_schema(&v).expect("should parse");
assert!(s.all_of.is_none(), "empty allOf should yield None");
}
#[test]
fn all_of_with_non_object_entries_filtered_out_yields_none() {
let v = json!({"allOf": ["not a schema"]});
let s = parse_schema(&v).expect("should parse");
assert!(
s.all_of.is_none(),
"allOf with only invalid entries should yield None"
);
}
#[test]
fn empty_definitions_object_yields_none() {
let v = json!({"definitions": {}});
let s = parse_schema(&v).expect("should parse");
assert!(
s.definitions.is_none(),
"empty definitions should yield None"
);
}
#[test]
fn both_definitions_and_defs_are_merged() {
let v = json!({
"definitions": {"TypeA": {"type": "string"}},
"$defs": {"TypeB": {"type": "integer"}}
});
let s = parse_schema(&v).expect("should parse");
let defs = s
.definitions
.as_ref()
.expect("should have merged definitions");
assert!(
defs.contains_key("TypeA"),
"TypeA from definitions should be present"
);
assert!(
defs.contains_key("TypeB"),
"TypeB from $defs should be present"
);
}
#[test]
fn additional_properties_true_parsed_as_permissive_schema() {
let v = json!({"type": "object", "additionalProperties": true});
let s = parse_schema(&v).expect("should parse");
assert!(
matches!(
s.additional_properties,
Some(AdditionalProperties::Schema(_))
),
"additionalProperties: true should yield Schema variant"
);
}
#[test]
fn check_json_depth_rejects_deeply_nested_array() {
let mut v = json!("leaf");
for _ in 0..55 {
v = json!([v]);
}
let result = check_json_depth(&v, 0);
assert!(
result.is_err(),
"deeply nested array should exceed depth limit"
);
}
#[test]
fn check_json_depth_accepts_shallow_array() {
let v = json!(["a", "b", "c"]);
assert!(check_json_depth(&v, 0).is_ok());
}
#[test]
fn required_with_non_string_values_filtered() {
let v = json!({"required": [42, "name", true]});
let s = parse_schema(&v).expect("should parse");
let req = s.required.as_ref().expect("should have required");
assert_eq!(req.len(), 1, "only string 'name' should survive filtering");
assert!(req.contains(&"name".to_string()));
}
fn make_catalog_json(schemas: &[(&str, &[&str])]) -> Value {
let schemas_json: Vec<Value> = schemas
.iter()
.map(|(url, patterns)| {
json!({
"name": "Schema Name",
"url": url,
"fileMatch": patterns
})
})
.collect();
json!({ "schemas": schemas_json })
}
#[test]
fn should_parse_catalog_entry_with_yaml_pattern() {
let v = make_catalog_json(&[("https://example.com/schema.json", &["*.yaml"])]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 1);
assert_eq!(catalog.entries[0].url, "https://example.com/schema.json");
assert_eq!(catalog.entries[0].file_match, vec!["*.yaml"]);
}
#[test]
fn should_keep_entry_with_mixed_yaml_and_json_patterns() {
let v = make_catalog_json(&[(
"https://example.com/schema.json",
&["*.json", "docker-compose.yml"],
)]);
let catalog = parse_schemastore_catalog(&v).expect("should parse");
assert_eq!(catalog.entries.len(), 1);
assert_eq!(catalog.entries[0].file_match, vec!["docker-compose.yml"]);
}
#[rstest]
#[case::json_only_entry_filtered(
make_catalog_json(&[("https://example.com/schema.json", &["*.json"])]),
0
)]
#[case::yml_extension_kept(
make_catalog_json(&[("https://example.com/schema.json", &["*.yml"])]),
1
)]
#[case::empty_schemas_array(json!({ "schemas": [] }), 0)]
#[case::empty_url_skipped(
json!({"schemas": [{"name": "Empty URL", "url": "", "fileMatch": ["*.yaml"]}]}),
0
)]
#[case::missing_file_match_skipped(
json!({"schemas": [{"name": "No FileMatch", "url": "https://example.com/schema.json"}]}),
0
)]
#[case::two_yaml_kept_one_json_filtered(
make_catalog_json(&[
("https://example.com/workflow.json", &["**/.github/workflows/*.yml"]),
("https://example.com/compose.json", &["docker-compose.yaml"]),
("https://example.com/package.json", &["package.json"]),
]),
2
)]
fn parse_schemastore_catalog_entry_count(#[case] input: Value, #[case] expected_len: usize) {
let catalog = parse_schemastore_catalog(&input).expect("should parse");
assert_eq!(catalog.entries.len(), expected_len);
}
#[rstest]
#[case::non_object_input(json!(["not", "an", "object"]))]
#[case::missing_schemas_key(json!({ "other": "data" }))]
fn parse_schemastore_catalog_returns_none(#[case] input: Value) {
assert!(parse_schemastore_catalog(&input).is_none());
}
#[test]
fn should_return_url_for_matching_filename() {
let catalog = SchemaStoreCatalog {
entries: vec![SchemaStoreEntry {
url: "https://example.com/workflow.json".to_string(),
file_match: vec!["**/.github/workflows/*.yml".to_string()],
}],
};
let result = match_schemastore(".github/workflows/ci.yml", &catalog);
assert_eq!(
result,
Some("https://example.com/workflow.json".to_string())
);
}
#[test]
fn should_return_none_when_no_catalog_entry_matches() {
let catalog = SchemaStoreCatalog {
entries: vec![SchemaStoreEntry {
url: "https://example.com/workflow.json".to_string(),
file_match: vec!["**/.github/workflows/*.yml".to_string()],
}],
};
let result = match_schemastore("docker-compose.yaml", &catalog);
assert_eq!(result, None);
}
#[test]
fn should_return_first_matching_catalog_entry() {
let catalog = SchemaStoreCatalog {
entries: vec![
SchemaStoreEntry {
url: "https://example.com/first.json".to_string(),
file_match: vec!["*.yaml".to_string()],
},
SchemaStoreEntry {
url: "https://example.com/second.json".to_string(),
file_match: vec!["*.yaml".to_string()],
},
],
};
let result = match_schemastore("config.yaml", &catalog);
assert_eq!(result, Some("https://example.com/first.json".to_string()));
}
#[test]
fn should_return_none_for_empty_catalog() {
let catalog = SchemaStoreCatalog { entries: vec![] };
let result = match_schemastore("config.yaml", &catalog);
assert_eq!(result, None);
}
#[test]
fn should_match_if_any_file_match_pattern_matches() {
let catalog = SchemaStoreCatalog {
entries: vec![SchemaStoreEntry {
url: "https://example.com/compose.json".to_string(),
file_match: vec![
"docker-compose.yml".to_string(),
"docker-compose.yaml".to_string(),
"compose.yaml".to_string(),
],
}],
};
let result = match_schemastore("docker-compose.yaml", &catalog);
assert_eq!(result, Some("https://example.com/compose.json".to_string()));
}
#[test]
fn build_agent_without_proxy_does_not_panic() {
let _agent = build_agent(None);
}
#[test]
fn build_agent_with_valid_proxy_does_not_panic() {
let _agent = build_agent(Some("http://proxy.example.com:8080"));
}
#[test]
fn build_agent_with_invalid_proxy_falls_back_gracefully() {
let _agent = build_agent(Some("not-a-valid-proxy-url"));
}
#[test]
fn draft04_dependencies_array_maps_to_dependent_required() {
let value = json!({
"type": "object",
"dependencies": {
"credit_card": ["billing_address", "billing_zip"]
}
});
let schema = parse_schema(&value).unwrap();
let dep_req = schema.dependent_required.unwrap();
let reqs = dep_req.get("credit_card").unwrap();
assert!(reqs.contains(&"billing_address".to_string()));
assert!(reqs.contains(&"billing_zip".to_string()));
assert!(schema.dependent_schemas.is_none());
}
#[test]
fn draft04_dependencies_object_maps_to_dependent_schemas() {
let value = json!({
"type": "object",
"dependencies": {
"name": { "required": ["age"] }
}
});
let schema = parse_schema(&value).unwrap();
let dep_sch = schema.dependent_schemas.unwrap();
let dep = dep_sch.get("name").unwrap();
assert_eq!(dep.required, Some(vec!["age".to_string()]));
assert!(schema.dependent_required.is_none());
}
#[test]
fn draft2019_dependent_required_overrides_draft04() {
let value = json!({
"dependencies": {
"a": ["b"]
},
"dependentRequired": {
"a": ["c"] }
});
let schema = parse_schema(&value).unwrap();
let dep_req = schema.dependent_required.unwrap();
assert_eq!(dep_req.get("a").unwrap(), &vec!["c".to_string()]);
}
#[test]
fn ref_resolves_named_anchor() {
let value = json!({
"type": "object",
"properties": {
"foo": { "$ref": "#item" }
},
"$defs": {
"Item": {
"$anchor": "item",
"type": "string"
}
}
});
let schema = parse_schema(&value).unwrap();
let foo = schema.properties.unwrap();
let foo_schema = foo.get("foo").unwrap();
assert_eq!(
foo_schema.schema_type,
Some(SchemaType::Single("string".to_string()))
);
}
#[test]
fn ref_resolves_dynamic_anchor() {
let value = json!({
"type": "object",
"properties": {
"bar": { "$ref": "#loop" }
},
"$defs": {
"Node": {
"$dynamicAnchor": "loop",
"type": "integer"
}
}
});
let schema = parse_schema(&value).unwrap();
let bar_schema = schema.properties.unwrap();
let bar = bar_schema.get("bar").unwrap();
assert_eq!(
bar.schema_type,
Some(SchemaType::Single("integer".to_string()))
);
}
#[test]
fn dynamic_ref_resolves_to_dynamic_anchor() {
let value = json!({
"type": "object",
"properties": {
"val": { "$dynamicRef": "#node" }
},
"$defs": {
"Node": {
"$dynamicAnchor": "node",
"type": "boolean"
}
}
});
let schema = parse_schema(&value).unwrap();
let val_schema = schema.properties.unwrap();
let val = val_schema.get("val").unwrap();
assert_eq!(
val.schema_type,
Some(SchemaType::Single("boolean".to_string()))
);
}
#[test]
fn ref_returns_schema_with_ref_path_when_anchor_not_found() {
let value = json!({ "$ref": "#nonexistent" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.ref_path, Some("#nonexistent".to_string()));
assert!(schema.schema_type.is_none());
}
#[test]
fn ref_resolves_anchor_nested_inside_definitions() {
let value = json!({
"$defs": {
"outer": {
"type": "object",
"properties": {
"inner": {
"$anchor": "nested",
"type": "number"
}
}
}
},
"properties": {
"x": { "$ref": "#nested" }
}
});
let schema = parse_schema(&value).unwrap();
let x = schema.properties.unwrap();
let x_schema = x.get("x").unwrap();
assert_eq!(
x_schema.schema_type,
Some(SchemaType::Single("number".to_string()))
);
}
#[test]
fn json_pointer_ref_still_resolves_correctly() {
let value = json!({
"properties": {
"name": { "$ref": "#/$defs/Name" }
},
"$defs": {
"Name": { "type": "string" }
}
});
let schema = parse_schema(&value).unwrap();
let name = schema.properties.unwrap();
let name_schema = name.get("name").unwrap();
assert_eq!(
name_schema.schema_type,
Some(SchemaType::Single("string".to_string()))
);
}
#[test]
fn anchor_field_stored_on_schema() {
let value = json!({ "$anchor": "myanchor", "type": "string" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.anchor, Some("myanchor".to_string()));
}
#[test]
fn dynamic_anchor_field_stored_on_schema() {
let value = json!({ "$dynamicAnchor": "myloop", "type": "array" });
let schema = parse_schema(&value).unwrap();
assert_eq!(schema.dynamic_anchor, Some("myloop".to_string()));
}
#[test]
fn absolute_dollar_id_is_stored() {
let value = json!({
"$id": "https://example.com/schema.json",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(
schema.id,
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn relative_dollar_id_is_resolved_against_base_uri() {
let value = json!({ "$id": "sub.json", "type": "object" });
let schema = parse_schema_with_root(
&value,
&value,
Some("https://example.com/root.json"),
None,
0,
)
.unwrap();
assert_eq!(schema.id, Some("https://example.com/sub.json".to_string()));
}
#[test]
fn nested_dollar_id_overrides_parent_base() {
let value = json!({
"$id": "https://example.com/root.json",
"properties": {
"child": {
"$id": "child.json",
"type": "string"
}
}
});
let schema = parse_schema(&value).unwrap();
let child = schema.properties.as_ref().unwrap().get("child").unwrap();
assert_eq!(child.id, Some("https://example.com/child.json".to_string()));
}
#[test]
fn draft04_id_without_dollar_is_parsed() {
let value = json!({
"id": "https://example.com/schema.json",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(
schema.id,
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn dollar_id_takes_precedence_over_id() {
let value = json!({
"$id": "https://example.com/preferred.json",
"id": "https://example.com/ignored.json",
"type": "object"
});
let schema = parse_schema(&value).unwrap();
assert_eq!(
schema.id,
Some("https://example.com/preferred.json".to_string())
);
}
#[test]
fn schema_without_dollar_id_propagates_parent_base() {
let value = json!({
"$id": "https://example.com/root.json",
"properties": {
"middle": {
"type": "object",
"properties": {
"leaf": {
"$id": "leaf.json",
"type": "string"
}
}
}
}
});
let schema = parse_schema(&value).unwrap();
let middle = schema.properties.as_ref().unwrap().get("middle").unwrap();
assert!(middle.id.is_none(), "middle has no $id");
let leaf = middle.properties.as_ref().unwrap().get("leaf").unwrap();
assert_eq!(leaf.id, Some("https://example.com/leaf.json".to_string()));
}
#[test]
fn remote_ref_to_loopback_is_blocked_by_ssrf_guard() {
let value = json!({ "$ref": "http://127.0.0.1/evil.json" });
let mut cache = SchemaCache::new();
let mut ctx = ParseContext::new(&mut cache, None);
let schema = parse_schema_with_root(&value, &value, None, Some(&mut ctx), 0).unwrap();
assert_eq!(
schema.ref_path.as_deref(),
Some("http://127.0.0.1/evil.json")
);
assert!(cache.get("http://127.0.0.1/evil.json").is_none());
}
#[test]
fn relative_ref_resolved_against_base_uri_before_ssrf_check() {
let value = json!({ "$ref": "evil.json" });
let mut cache = SchemaCache::new();
let schema = parse_schema_with_root(
&value,
&value,
Some("http://127.0.0.1/"),
Some(&mut ParseContext::new(&mut cache, None)),
0,
)
.unwrap();
assert_eq!(schema.ref_path.as_deref(), Some("evil.json"));
assert!(cache.get("http://127.0.0.1/evil.json").is_none());
}
#[test]
fn circular_remote_refs_are_deduplicated() {
let json_a = json!({ "$ref": "https://example.com/b.json" });
let json_b = json!({ "$ref": "https://example.com/a.json" });
let schema_a = parse_schema(&json_a).unwrap();
let schema_b = parse_schema(&json_b).unwrap();
let mut cache = SchemaCache::new();
cache.insert(
"https://example.com/a.json".to_string(),
json_a.clone(),
schema_a,
);
cache.insert("https://example.com/b.json".to_string(), json_b, schema_b);
let mut ctx = ParseContext::new(&mut cache, None);
let result = resolve_ref(
"https://example.com/a.json",
&json_a,
None,
Some(&mut ctx),
0,
);
let _ = result;
}
#[test]
fn breadth_fan_out_stops_at_max_fetch_count() {
let mut cache = SchemaCache::new();
let mut ctx = ParseContext::new(&mut cache, None);
for i in 0..MAX_REMOTE_FETCH_COUNT {
let url = format!("https://example.com/schema{i}.json");
assert!(ctx.try_visit(&url), "should accept visit {i}");
}
assert!(
!ctx.try_visit("https://example.com/one-too-many.json"),
"visit beyond MAX_REMOTE_FETCH_COUNT must be rejected"
);
}
#[test]
fn fetch_schema_rejects_non_json_content_type() {
let server = tiny_http::Server::http("127.0.0.1:0").unwrap();
let addr = server.server_addr().to_ip().unwrap();
let url = format!("http://{addr}/schema.json");
std::thread::spawn(move || {
if let Ok(req) = server.recv() {
let ct = tiny_http::Header::from_bytes(b"Content-Type", b"text/html").unwrap();
let response =
tiny_http::Response::from_string("<html>not json</html>").with_header(ct);
let _ = req.respond(response);
}
});
let agent = build_agent(None);
let response = agent.get(&url).call().expect("request should succeed");
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
assert!(
!content_type.contains("application/json"),
"server returned non-JSON content type: {content_type}"
);
let is_json = content_type.contains("application/json")
|| content_type.contains("application/schema");
assert!(!is_json, "guard should reject this content type");
}
#[test]
fn dollar_id_spoofing_cannot_overwrite_cache_entry() {
let mut cache = SchemaCache::new();
cache.insert(
"https://json-schema.org/draft/2020-12/schema".to_string(),
Value::Null,
JsonSchema {
description: Some("legitimate".to_string()),
..JsonSchema::default()
},
);
cache.insert(
"https://json-schema.org/draft/2020-12/schema".to_string(),
Value::Null,
JsonSchema {
description: Some("malicious".to_string()),
..JsonSchema::default()
},
);
let cached = cache
.get("https://json-schema.org/draft/2020-12/schema")
.unwrap();
assert_eq!(
cached.description.as_deref(),
Some("legitimate"),
"first-write-wins must prevent $id spoofing overwrite"
);
}
#[test]
fn file_scheme_ref_is_blocked_by_ssrf_guard() {
let value = json!({ "$ref": "file:///etc/passwd" });
let mut cache = SchemaCache::new();
let mut ctx = ParseContext::new(&mut cache, None);
let schema = parse_schema_with_root(&value, &value, None, Some(&mut ctx), 0).unwrap();
assert_eq!(schema.ref_path.as_deref(), Some("file:///etc/passwd"));
assert!(cache.get("file:///etc/passwd").is_none());
}
#[test]
fn remote_ref_absolute_url_resolves_to_fetched_schema() {
let ref_url = "https://example.com/other.json";
let remote_value: Value = json!({ "type": "string", "description": "remote schema" });
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
cache.insert(ref_url.to_string(), remote_value, remote_schema);
let root = json!({});
let mut ctx = ParseContext::new(&mut cache, None);
let resolved = resolve_ref(ref_url, &root, None, Some(&mut ctx), 0);
assert!(resolved.is_some(), "remote ref should resolve from cache");
let s = resolved.unwrap();
assert_eq!(s.description.as_deref(), Some("remote schema"));
}
#[test]
fn remote_ref_with_fragment_navigates_fetched_document() {
let remote_value = json!({
"definitions": {
"Address": {
"type": "object",
"description": "an address"
}
}
});
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
let url = "https://example.com/types.json".to_string();
cache.insert(url.clone(), remote_value, remote_schema);
let root = json!({});
let ref_str = format!("{url}#/definitions/Address");
let mut ctx = ParseContext::new(&mut cache, None);
let resolved = resolve_ref(&ref_str, &root, None, Some(&mut ctx), 0);
assert!(
resolved.is_some(),
"fragment ref into remote doc should resolve"
);
let s = resolved.unwrap();
assert_eq!(s.description.as_deref(), Some("an address"));
}
#[test]
fn remote_ref_inside_properties_resolves_via_ctx() {
let ref_url = "https://example.com/address.json";
let remote_value: Value = json!({ "type": "object", "description": "an address" });
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
cache.insert(ref_url.to_string(), remote_value, remote_schema);
let root = json!({
"type": "object",
"properties": {
"address": { "$ref": ref_url }
}
});
let mut ctx = ParseContext::new(&mut cache, None);
let schema = parse_schema_with_root(&root, &root, None, Some(&mut ctx), 0);
assert!(schema.is_some(), "outer schema should parse");
let schema = schema.unwrap();
let props = schema
.properties
.as_ref()
.expect("properties should be present");
let address = props
.get("address")
.expect("address property should be present");
assert_eq!(
address.description.as_deref(),
Some("an address"),
"address property should be resolved from remote cache"
);
}
#[test]
fn schema_error_display_too_many_remote_fetches() {
let e = SchemaError::TooManyRemoteFetches;
let msg = e.to_string();
assert!(msg.contains("Remote fetch count"), "got: {msg}");
}
#[test]
fn schema_error_display_unexpected_content_type() {
let e = SchemaError::UnexpectedContentType("text/html".to_string());
let msg = e.to_string();
assert!(msg.contains("Unexpected content type"), "got: {msg}");
assert!(msg.contains("text/html"), "got: {msg}");
}
#[test]
fn fetch_schema_raw_ctx_resolves_remote_ref_in_fetched_body() {
let ref_url = "https://example.com/address.json";
let top_level_url = "https://example.com/schema.json";
let body: Value = json!({
"type": "object",
"properties": {
"home": { "$ref": ref_url }
}
});
let remote_value: Value = json!({ "type": "object", "description": "an address" });
let remote_schema = parse_schema(&remote_value).unwrap();
let mut cache = SchemaCache::new();
cache.insert(ref_url.to_string(), remote_value, remote_schema);
let mut ctx = ParseContext::new(&mut cache, None);
let schema = parse_schema_with_root(&body, &body, Some(top_level_url), Some(&mut ctx), 0)
.expect("should parse top-level schema");
let props = schema.properties.as_ref().expect("should have properties");
let home = props.get("home").expect("should have home property");
assert_eq!(
home.description.as_deref(),
Some("an address"),
"remote $ref should resolve to the cached schema"
);
assert!(
cache.get(ref_url).is_some(),
"ctx cache should contain the resolved $ref target"
);
}
#[test]
fn fetch_schema_raw_no_ctx_leaves_remote_ref_unresolved() {
let ref_url = "https://example.com/address.json";
let body: Value = json!({
"type": "object",
"properties": {
"home": { "$ref": ref_url }
}
});
let schema = parse_schema(&body).expect("should parse");
let props = schema.properties.as_ref().expect("should have properties");
let home = props.get("home").expect("should have home property");
assert_eq!(
home.ref_path.as_deref(),
Some(ref_url),
"without ctx the $ref is unresolved, only ref_path is set"
);
assert!(
home.description.is_none(),
"without ctx the remote schema description should not be present"
);
}
#[test]
fn sanitize_content_type_strips_control_characters() {
let raw = "text/html\x00\x01\x1f\x7f";
let result = sanitize_content_type(raw);
assert_eq!(result, "text/html", "control chars must be stripped");
}
#[test]
fn sanitize_content_type_truncates_at_256_chars() {
let raw = "a".repeat(300);
let result = sanitize_content_type(&raw);
assert_eq!(result.len(), 256, "result must be truncated to 256 chars");
}
#[test]
fn sanitize_content_type_preserves_printable_content() {
let raw = "application/json; charset=utf-8";
let result = sanitize_content_type(raw);
assert_eq!(result, raw, "printable content must be preserved");
}
#[test]
fn fetch_schema_raw_sanitizes_content_type_in_error() {
let server = tiny_http::Server::http("127.0.0.1:0").unwrap();
let addr = server.server_addr().to_ip().unwrap();
let url = format!("http://{addr}/schema.json");
std::thread::spawn(move || {
if let Ok(req) = server.recv() {
let ct_value = format!("text/html; x={}", "a".repeat(300));
let ct =
tiny_http::Header::from_bytes(b"Content-Type", ct_value.as_bytes()).unwrap();
let response = tiny_http::Response::from_string("not json").with_header(ct);
let _ = req.respond(response);
}
});
let agent = build_agent(None);
let response = agent.get(&url).call().expect("request should succeed");
let content_type = response
.headers()
.get("content-type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
assert!(
content_type.len() > 256,
"server must have sent an oversized Content-Type"
);
let sanitized = sanitize_content_type(content_type);
assert!(
sanitized.len() <= 256,
"sanitized Content-Type must be truncated to ≤256 chars"
);
}
}