use std::collections::HashMap;
use serde_json::Value;
use tower_lsp::lsp_types::Url;
pub(crate) const MAX_SCHEMA_BYTES: u64 = 5 * 1024 * 1024;
const MAX_URL_LENGTH: usize = 2048;
const MAX_JSON_DEPTH: usize = 50;
const MAX_REF_DEPTH: usize = 32;
#[derive(Debug)]
pub enum SchemaError {
UrlNotPermitted(String),
FetchFailed(String),
ResponseTooLarge,
ParseFailed(String),
TooDeep,
}
impl std::fmt::Display for SchemaError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UrlNotPermitted(u) => write!(f, "URL not permitted: {u}"),
Self::FetchFailed(e) => write!(f, "fetch failed: {e}"),
Self::ResponseTooLarge => write!(f, "schema response exceeded size limit"),
Self::ParseFailed(e) => write!(f, "schema parse failed: {e}"),
Self::TooDeep => write!(f, "schema nesting depth exceeded limit"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SchemaType {
Single(String),
Multiple(Vec<String>),
}
#[derive(Debug, Clone)]
pub enum AdditionalProperties {
Denied,
Schema(Box<JsonSchema>),
}
#[derive(Debug, Clone, Default)]
pub struct JsonSchema {
pub schema_type: Option<SchemaType>,
pub title: Option<String>,
pub description: Option<String>,
pub properties: Option<HashMap<String, Self>>,
pub required: Option<Vec<String>>,
pub enum_values: Option<Vec<Value>>,
pub default: Option<Value>,
pub examples: Option<Vec<Value>>,
pub items: Option<Box<Self>>,
pub additional_properties: Option<AdditionalProperties>,
pub all_of: Option<Vec<Self>>,
pub any_of: Option<Vec<Self>>,
pub one_of: Option<Vec<Self>>,
pub ref_path: Option<String>,
pub pattern: Option<String>,
pub minimum: Option<f64>,
pub maximum: Option<f64>,
pub min_length: Option<u64>,
pub max_length: Option<u64>,
pub definitions: Option<HashMap<String, Self>>,
pub deprecated: Option<bool>,
}
#[derive(Debug, Clone)]
pub struct SchemaAssociation {
pub pattern: String,
pub url: String,
}
#[derive(Debug, Default)]
pub struct SchemaCache {
inner: HashMap<String, JsonSchema>,
}
impl SchemaCache {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn get(&self, url: &str) -> Option<&JsonSchema> {
self.inner.get(url)
}
pub fn insert(&mut self, url: String, schema: JsonSchema) {
self.inner.entry(url).or_insert(schema);
}
pub fn get_or_fetch(&mut self, url: &str) -> Result<&JsonSchema, SchemaError> {
if !self.inner.contains_key(url) {
let schema = fetch_schema(url)?;
self.inner.insert(url.to_string(), schema);
}
Ok(self.inner.get(url).expect("just inserted"))
}
}
pub fn validate_and_normalize_url(raw: &str) -> Result<String, SchemaError> {
if raw.len() > MAX_URL_LENGTH {
return Err(SchemaError::UrlNotPermitted(
"URL exceeds maximum length".to_string(),
));
}
let url =
Url::parse(raw).map_err(|e| SchemaError::UrlNotPermitted(format!("invalid URL: {e}")))?;
match url.scheme() {
"http" | "https" => {}
s => {
return Err(SchemaError::UrlNotPermitted(format!(
"scheme '{s}' is not permitted"
)));
}
}
if let Some(host) = url.host_str()
&& is_ssrf_blocked_host(host)
{
return Err(SchemaError::UrlNotPermitted(format!(
"host '{host}' is not permitted"
)));
}
Ok(url.to_string())
}
fn is_ssrf_blocked_host(host: &str) -> bool {
use std::net::IpAddr;
if host.eq_ignore_ascii_case("localhost") {
return true;
}
let bare = host
.strip_prefix('[')
.and_then(|s| s.strip_suffix(']'))
.unwrap_or(host);
if let Ok(ip) = bare.parse::<IpAddr>() {
return match ip {
IpAddr::V4(v4) => {
v4.is_loopback() || v4.is_link_local() || v4.is_private() || v4.is_unspecified() }
IpAddr::V6(v6) => {
v6.is_loopback() || v6.is_unspecified() || v6.segments().first().is_some_and(|s| (s & 0xffc0) == 0xfe80)
}
};
}
false
}
pub fn fetch_schema(url: &str) -> Result<JsonSchema, SchemaError> {
use std::io::Read as _;
validate_and_normalize_url(url)?;
let agent = ureq::Agent::config_builder()
.max_redirects(0)
.build()
.new_agent();
let response = agent
.get(url)
.call()
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
let mut limited = response
.into_body()
.into_reader()
.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited
.read_to_end(&mut buf)
.map_err(|e| SchemaError::FetchFailed(e.to_string()))?;
if buf.len() as u64 > MAX_SCHEMA_BYTES {
return Err(SchemaError::ResponseTooLarge);
}
let value: Value =
serde_json::from_slice(&buf).map_err(|e| SchemaError::ParseFailed(e.to_string()))?;
check_json_depth(&value, 0)?;
parse_schema(&value).ok_or_else(|| SchemaError::ParseFailed("not a JSON Schema".to_string()))
}
fn check_json_depth(value: &Value, depth: usize) -> Result<(), SchemaError> {
if depth > MAX_JSON_DEPTH {
return Err(SchemaError::TooDeep);
}
match value {
Value::Object(map) => {
for v in map.values() {
check_json_depth(v, depth + 1)?;
}
}
Value::Array(arr) => {
for v in arr {
check_json_depth(v, depth + 1)?;
}
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => {}
}
Ok(())
}
#[must_use]
pub fn parse_schema(value: &Value) -> Option<JsonSchema> {
parse_schema_with_root(value, value, 0)
}
fn parse_schema_with_root(value: &Value, root: &Value, depth: usize) -> Option<JsonSchema> {
if depth > MAX_REF_DEPTH {
return None;
}
match value {
Value::Bool(true) => return Some(JsonSchema::default()),
Value::Bool(false)
| Value::Null
| Value::Number(_)
| Value::String(_)
| Value::Array(_) => {
return None;
}
Value::Object(_) => {}
}
let obj = value.as_object()?;
let mut schema = JsonSchema::default();
if let Some(Value::String(ref_str)) = obj.get("$ref") {
schema.ref_path = Some(ref_str.clone());
if let Some(resolved) = resolve_ref(ref_str, root, depth + 1) {
return Some(resolved);
}
return Some(schema);
}
schema.schema_type = parse_type(obj.get("type"));
schema.title = string_field(obj, "title");
schema.description = string_field(obj, "description");
schema.pattern = string_field(obj, "pattern");
schema.deprecated = obj.get("deprecated").and_then(Value::as_bool);
schema.minimum = obj.get("minimum").and_then(Value::as_f64);
schema.maximum = obj.get("maximum").and_then(Value::as_f64);
schema.min_length = obj.get("minLength").and_then(Value::as_u64);
schema.max_length = obj.get("maxLength").and_then(Value::as_u64);
schema.default = obj.get("default").cloned();
schema.examples = obj.get("examples").and_then(Value::as_array).cloned();
schema.enum_values = obj.get("enum").and_then(Value::as_array).cloned();
schema.required = obj.get("required").and_then(Value::as_array).map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
});
schema.properties = obj.get("properties").and_then(Value::as_object).map(|map| {
map.iter()
.filter_map(|(k, v)| parse_schema_with_root(v, root, depth + 1).map(|s| (k.clone(), s)))
.collect()
});
schema.items = obj
.get("items")
.and_then(|v| parse_schema_with_root(v, root, depth + 1))
.map(Box::new);
schema.additional_properties =
parse_additional_properties(obj.get("additionalProperties"), root, depth);
schema.all_of = parse_schema_array(obj.get("allOf"), root, depth);
schema.any_of = parse_schema_array(obj.get("anyOf"), root, depth);
schema.one_of = parse_schema_array(obj.get("oneOf"), root, depth);
let defs_04 = parse_definitions(obj.get("definitions"), root, depth);
let defs_07 = parse_definitions(obj.get("$defs"), root, depth);
schema.definitions = match (defs_04, defs_07) {
(Some(mut a), Some(b)) => {
a.extend(b);
Some(a)
}
(a, b) => a.or(b),
};
Some(schema)
}
fn parse_type(value: Option<&Value>) -> Option<SchemaType> {
match value? {
Value::String(s) => Some(SchemaType::Single(s.clone())),
Value::Array(arr) => {
let types: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if types.is_empty() {
None
} else {
Some(SchemaType::Multiple(types))
}
}
Value::Null | Value::Bool(_) | Value::Number(_) | Value::Object(_) => None,
}
}
fn string_field(obj: &serde_json::Map<String, Value>, key: &str) -> Option<String> {
obj.get(key)?.as_str().map(String::from)
}
fn parse_additional_properties(
value: Option<&Value>,
root: &Value,
depth: usize,
) -> Option<AdditionalProperties> {
match value? {
Value::Bool(false) => Some(AdditionalProperties::Denied),
v @ (Value::Bool(true)
| Value::Null
| Value::Number(_)
| Value::String(_)
| Value::Array(_)
| Value::Object(_)) => parse_schema_with_root(v, root, depth + 1)
.map(|s| AdditionalProperties::Schema(Box::new(s))),
}
}
fn parse_schema_array(
value: Option<&Value>,
root: &Value,
depth: usize,
) -> Option<Vec<JsonSchema>> {
let arr = value?.as_array()?;
let schemas: Vec<JsonSchema> = arr
.iter()
.filter_map(|v| parse_schema_with_root(v, root, depth + 1))
.collect();
if schemas.is_empty() {
None
} else {
Some(schemas)
}
}
fn parse_definitions(
value: Option<&Value>,
root: &Value,
depth: usize,
) -> Option<HashMap<String, JsonSchema>> {
let map = value?.as_object()?;
let result: HashMap<String, JsonSchema> = map
.iter()
.filter_map(|(k, v)| parse_schema_with_root(v, root, depth + 1).map(|s| (k.clone(), s)))
.collect();
if result.is_empty() {
None
} else {
Some(result)
}
}
fn resolve_ref(ref_str: &str, root: &Value, depth: usize) -> Option<JsonSchema> {
if depth > MAX_REF_DEPTH {
return None;
}
let pointer = ref_str.strip_prefix('#')?;
let target = if pointer.is_empty() {
root
} else {
root.pointer(pointer)?
};
parse_schema_with_root(target, root, depth + 1)
}
#[must_use]
pub fn extract_schema_url(text: &str) -> Option<String> {
const PREFIX: &str = "# yaml-language-server: $schema=";
for line in text.lines().take(10) {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix(PREFIX) {
let url = rest.trim();
if !url.is_empty() {
return Some(url.to_string());
}
}
}
None
}
#[must_use]
pub fn extract_custom_tags(text: &str) -> Vec<String> {
const PREFIX: &str = "# yaml-language-server: $tags=";
for line in text.lines().take(10) {
let trimmed = line.trim();
if let Some(rest) = trimmed.strip_prefix(PREFIX) {
return rest
.split(',')
.map(str::trim)
.filter(|s| !s.is_empty())
.map(str::to_string)
.collect();
}
}
Vec::new()
}
#[must_use]
pub fn match_schema_by_filename(
filename: &str,
associations: &[SchemaAssociation],
) -> Option<String> {
associations
.iter()
.find(|a| glob_matches(&a.pattern, filename))
.map(|a| a.url.clone())
}
fn glob_matches(pattern: &str, text: &str) -> bool {
glob_matches_inner(pattern.as_bytes(), text.as_bytes())
}
fn glob_matches_inner(pattern: &[u8], text: &[u8]) -> bool {
match (pattern.first(), text.first()) {
(None, None) => true,
(Some(&b'*'), _) if pattern.get(1) == Some(&b'*') => {
let rest_pattern = pattern.get(2..).unwrap_or(&[]);
let rest_pattern = rest_pattern.strip_prefix(b"/").unwrap_or(rest_pattern);
for i in 0..=text.len() {
if glob_matches_inner(rest_pattern, text.get(i..).unwrap_or(&[])) {
return true;
}
}
false
}
(Some(&b'*'), _) => {
let rest_pattern = pattern.get(1..).unwrap_or(&[]);
for i in 0..=text.len() {
if text.get(..i).is_some_and(|prefix| !prefix.contains(&b'/'))
&& glob_matches_inner(rest_pattern, text.get(i..).unwrap_or(&[]))
{
return true;
}
}
false
}
(Some(&pc), Some(&tc)) => {
if pc == tc {
glob_matches_inner(
pattern.get(1..).unwrap_or(&[]),
text.get(1..).unwrap_or(&[]),
)
} else {
false
}
}
(None, Some(_)) | (Some(_), None) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn schema_type_str(s: &JsonSchema) -> Option<&str> {
match s.schema_type.as_ref()? {
SchemaType::Single(t) => Some(t.as_str()),
SchemaType::Multiple(_) => None,
}
}
#[test]
fn should_extract_url_from_modeline_on_first_line() {
let text = "# yaml-language-server: $schema=https://example.com/schema.json\nkey: value\n";
assert_eq!(
extract_schema_url(text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_url_from_modeline_on_second_line() {
let text = "key: value\n# yaml-language-server: $schema=https://example.com/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_url_from_modeline_on_tenth_line() {
let mut text = String::new();
for _ in 0..9 {
text.push_str("key: value\n");
}
text.push_str("# yaml-language-server: $schema=https://example.com/schema.json\n");
assert_eq!(
extract_schema_url(&text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_return_none_when_modeline_beyond_tenth_line() {
let mut text = String::new();
for _ in 0..10 {
text.push_str("key: value\n");
}
text.push_str("# yaml-language-server: $schema=https://example.com/schema.json\n");
assert_eq!(extract_schema_url(&text), None);
}
#[test]
fn should_return_none_when_no_modeline_present() {
let text = "key: value\nother: stuff\n";
assert_eq!(extract_schema_url(text), None);
}
#[test]
fn should_return_none_for_malformed_modeline_missing_equals() {
let text = "# yaml-language-server: $schema https://example.com/schema.json\n";
assert_eq!(extract_schema_url(text), None);
}
#[test]
fn should_return_none_for_modeline_with_wrong_prefix() {
let text = "# yaml-ls: $schema=https://example.com/schema.json\n";
assert_eq!(extract_schema_url(text), None);
}
#[test]
fn should_handle_modeline_with_extra_leading_whitespace_in_url() {
let text = "# yaml-language-server: $schema= https://example.com/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_http_url() {
let text = "# yaml-language-server: $schema=http://example.com/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("http://example.com/schema.json".to_string())
);
}
#[test]
fn should_extract_file_url() {
let text = "# yaml-language-server: $schema=file:///path/to/schema.json\n";
assert_eq!(
extract_schema_url(text),
Some("file:///path/to/schema.json".to_string())
);
}
#[test]
fn should_return_none_for_empty_input() {
assert_eq!(extract_schema_url(""), None);
}
#[test]
fn should_extract_none_sentinel_lowercase() {
let text = "# yaml-language-server: $schema=none\nkey: value\n";
assert_eq!(extract_schema_url(text), Some("none".to_string()));
}
#[test]
fn should_extract_none_sentinel_mixed_case() {
let text = "# yaml-language-server: $schema=None\nkey: value\n";
assert_eq!(extract_schema_url(text), Some("None".to_string()));
}
#[test]
fn should_extract_none_sentinel_uppercase() {
let text = "# yaml-language-server: $schema=NONE\nkey: value\n";
assert_eq!(extract_schema_url(text), Some("NONE".to_string()));
}
#[test]
fn should_extract_single_tag_from_modeline() {
let text = "# yaml-language-server: $tags=!include\nkey: value\n";
assert_eq!(extract_custom_tags(text), vec!["!include"]);
}
#[test]
fn should_extract_multiple_tags_from_modeline() {
let text = "# yaml-language-server: $tags=!include,!ref,!Ref\nkey: value\n";
assert_eq!(extract_custom_tags(text), vec!["!include", "!ref", "!Ref"]);
}
#[test]
fn should_trim_whitespace_around_tags() {
let text = "# yaml-language-server: $tags= !include , !ref \nkey: value\n";
assert_eq!(extract_custom_tags(text), vec!["!include", "!ref"]);
}
#[test]
fn should_return_empty_vec_when_no_tags_modeline() {
let text = "key: value\nother: stuff\n";
assert_eq!(extract_custom_tags(text), Vec::<String>::new());
}
#[test]
fn should_return_empty_vec_when_tags_modeline_beyond_line_10() {
let mut text = String::new();
for _ in 0..10 {
text.push_str("key: value\n");
}
text.push_str("# yaml-language-server: $tags=!include\n");
assert_eq!(extract_custom_tags(&text), Vec::<String>::new());
}
#[test]
fn should_return_empty_vec_for_empty_input() {
assert_eq!(extract_custom_tags(""), Vec::<String>::new());
}
#[test]
fn should_extract_tags_from_modeline_on_second_line() {
let text = "key: value\n# yaml-language-server: $tags=!include,!ref\n";
assert_eq!(extract_custom_tags(text), vec!["!include", "!ref"]);
}
fn assoc(pattern: &str, url: &str) -> SchemaAssociation {
SchemaAssociation {
pattern: pattern.to_string(),
url: url.to_string(),
}
}
#[test]
fn should_return_url_for_exact_filename_match() {
let associations = [assoc(
"config.yaml",
"https://example.com/config-schema.json",
)];
assert_eq!(
match_schema_by_filename("config.yaml", &associations),
Some("https://example.com/config-schema.json".to_string())
);
}
#[test]
fn should_return_url_for_glob_wildcard_match() {
let associations = [assoc("*.yaml", "https://example.com/generic.json")];
assert_eq!(
match_schema_by_filename("myfile.yaml", &associations),
Some("https://example.com/generic.json".to_string())
);
}
#[test]
fn should_return_url_for_double_star_glob_match() {
let associations = [assoc(
"configs/**/*.yaml",
"https://example.com/schema.json",
)];
assert_eq!(
match_schema_by_filename("configs/nested/file.yaml", &associations),
Some("https://example.com/schema.json".to_string())
);
}
#[test]
fn should_return_none_when_no_association_matches() {
let associations = [assoc("*.json", "https://example.com/schema.json")];
assert_eq!(match_schema_by_filename("myfile.yaml", &associations), None);
}
#[test]
fn should_return_none_for_empty_associations() {
assert_eq!(match_schema_by_filename("myfile.yaml", &[]), None);
}
#[test]
fn should_return_first_matching_association_when_multiple_match() {
let associations = [
assoc("*.yaml", "https://example.com/first.json"),
assoc("*.yaml", "https://example.com/second.json"),
];
assert_eq!(
match_schema_by_filename("test.yaml", &associations),
Some("https://example.com/first.json".to_string())
);
}
#[test]
fn should_not_match_partial_filename() {
let associations = [assoc("config.yaml", "https://example.com/schema.json")];
assert_eq!(
match_schema_by_filename("my-config.yaml", &associations),
None
);
}
#[test]
fn should_parse_minimal_object_schema() {
let v = json!({"type": "object"});
let s = parse_schema(&v).expect("should parse");
assert_eq!(schema_type_str(&s), Some("object"));
}
#[test]
fn should_parse_schema_with_properties() {
let v = json!({"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}});
let s = parse_schema(&v).expect("should parse");
let props = s.properties.as_ref().expect("should have properties");
assert_eq!(
schema_type_str(props.get("name").expect("name")),
Some("string")
);
assert_eq!(
schema_type_str(props.get("age").expect("age")),
Some("integer")
);
}
#[test]
fn should_parse_required_fields() {
let v = json!({"type": "object", "required": ["name", "age"]});
let s = parse_schema(&v).expect("should parse");
let req = s.required.as_ref().expect("should have required");
assert!(req.contains(&"name".to_string()));
assert!(req.contains(&"age".to_string()));
}
#[test]
fn should_parse_enum_values() {
let v = json!({"type": "string", "enum": ["alpha", "beta", "gamma"]});
let s = parse_schema(&v).expect("should parse");
let enums = s.enum_values.as_ref().expect("should have enum");
assert_eq!(enums.len(), 3);
assert!(enums.contains(&json!("alpha")));
assert!(enums.contains(&json!("beta")));
assert!(enums.contains(&json!("gamma")));
}
#[test]
fn should_parse_description() {
let v = json!({"type": "string", "description": "A human-readable name"});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.description.as_deref(), Some("A human-readable name"));
}
#[test]
fn should_parse_default_value() {
let v = json!({"type": "integer", "default": 42});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.default, Some(json!(42)));
}
#[test]
fn should_parse_array_schema_with_items() {
let v = json!({"type": "array", "items": {"type": "string"}});
let s = parse_schema(&v).expect("should parse");
let items = s.items.as_ref().expect("should have items");
assert_eq!(schema_type_str(items), Some("string"));
}
#[test]
fn should_parse_additional_properties_false() {
let v = json!({"type": "object", "additionalProperties": false});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_properties,
Some(AdditionalProperties::Denied)
));
}
#[test]
fn should_parse_additional_properties_as_schema() {
let v = json!({"type": "object", "additionalProperties": {"type": "string"}});
let s = parse_schema(&v).expect("should parse");
assert!(matches!(
s.additional_properties,
Some(AdditionalProperties::Schema(_))
));
}
#[test]
fn should_parse_all_of() {
let v = json!({"allOf": [{"type": "object"}, {"required": ["name"]}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.all_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_parse_any_of() {
let v = json!({"anyOf": [{"type": "string"}, {"type": "integer"}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.any_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_parse_one_of() {
let v = json!({"oneOf": [{"type": "string"}, {"type": "null"}]});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.one_of.as_ref().map(Vec::len), Some(2));
}
#[test]
fn should_return_none_for_null_input() {
assert!(parse_schema(&Value::Null).is_none());
}
#[test]
fn should_return_none_for_non_object_json() {
assert!(parse_schema(&Value::String("not a schema".into())).is_none());
}
#[test]
fn should_parse_empty_object_as_permissive_schema() {
let v = json!({});
let s = parse_schema(&v).expect("should parse");
assert!(s.schema_type.is_none());
assert!(s.properties.is_none());
assert!(s.required.is_none());
}
#[test]
fn should_parse_boolean_true_schema() {
let s = parse_schema(&Value::Bool(true)).expect("should return Some for true");
assert!(s.schema_type.is_none());
}
#[test]
fn should_parse_boolean_false_schema() {
assert!(parse_schema(&Value::Bool(false)).is_none());
}
#[test]
fn should_parse_draft04_definitions() {
let v = json!({"definitions": {"addr": {"type": "string"}}});
let s = parse_schema(&v).expect("should parse");
let defs = s.definitions.as_ref().expect("should have definitions");
assert!(defs.contains_key("addr"));
}
#[test]
fn should_parse_draft07_defs() {
let v = json!({"$defs": {"addr": {"type": "string"}}});
let s = parse_schema(&v).expect("should parse");
let defs = s.definitions.as_ref().expect("should have $defs");
assert!(defs.contains_key("addr"));
}
#[test]
fn should_parse_deprecated_true() {
let v = json!({"type": "string", "deprecated": true});
let s = parse_schema(&v).expect("should parse");
assert_eq!(s.deprecated, Some(true));
}
#[test]
fn should_resolve_simple_local_ref() {
let v = json!({
"$ref": "#/definitions/MyType",
"definitions": {"MyType": {"type": "string"}}
});
let s = parse_schema(&v).expect("should resolve");
assert_eq!(schema_type_str(&s), Some("string"));
}
#[test]
fn should_return_none_for_missing_ref_target() {
let v = json!({"$ref": "#/definitions/Missing"});
let _ = parse_schema(&v);
}
#[test]
fn should_handle_nested_ref_resolution() {
let v = json!({
"type": "object",
"properties": {
"foo": {"$ref": "#/definitions/Bar"}
},
"definitions": {"Bar": {"type": "integer"}}
});
let s = parse_schema(&v).expect("should parse");
let props = s.properties.as_ref().expect("should have properties");
let foo = props.get("foo").expect("should have foo");
assert_eq!(schema_type_str(foo), Some("integer"));
}
#[test]
fn should_not_infinite_loop_on_circular_ref() {
let v = json!({
"$ref": "#/definitions/A",
"definitions": {
"A": {"$ref": "#/definitions/A"}
}
});
let _ = parse_schema(&v);
}
#[test]
fn should_return_none_on_cache_miss() {
let cache = SchemaCache::new();
assert!(cache.get("https://example.com/schema.json").is_none());
}
#[test]
fn should_return_cached_schema_on_cache_hit() {
let mut cache = SchemaCache::new();
let mut schema = JsonSchema::default();
schema.description = Some("test".to_string());
cache.insert("https://example.com/schema.json".to_string(), schema);
let result = cache
.get("https://example.com/schema.json")
.expect("should be cached");
assert_eq!(result.description.as_deref(), Some("test"));
}
#[test]
fn should_not_overwrite_existing_cache_entry() {
let mut cache = SchemaCache::new();
let mut schema_a = JsonSchema::default();
schema_a.description = Some("first".to_string());
let mut schema_b = JsonSchema::default();
schema_b.description = Some("second".to_string());
cache.insert("https://example.com/schema.json".to_string(), schema_a);
cache.insert("https://example.com/schema.json".to_string(), schema_b);
let result = cache
.get("https://example.com/schema.json")
.expect("should be cached");
assert_eq!(result.description.as_deref(), Some("first"));
}
#[test]
fn should_return_error_for_unreachable_url() {
let result = fetch_schema("http://127.0.0.1:19999/nonexistent.json");
assert!(result.is_err());
}
#[test]
fn should_parse_fetched_schema_from_valid_response() {
let body =
r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
let buf = body.as_bytes();
let value: Value = serde_json::from_slice(buf).expect("valid JSON should deserialise");
check_json_depth(&value, 0).expect("shallow schema should pass depth check");
let schema = parse_schema(&value).expect("should produce a schema");
assert_eq!(schema_type_str(&schema), Some("object"));
let props = schema.properties.as_ref().expect("should have properties");
assert!(props.contains_key("name"));
assert_eq!(
schema_type_str(props.get("name").expect("name")),
Some("string")
);
let req = schema.required.as_ref().expect("should have required");
assert!(req.contains(&"name".to_string()));
}
#[test]
fn should_reject_file_scheme_url() {
let result = validate_and_normalize_url("file:///etc/passwd");
assert!(result.is_err());
}
#[test]
fn should_reject_localhost_url() {
let result = validate_and_normalize_url("http://localhost/schema.json");
assert!(result.is_err());
}
#[test]
fn should_reject_link_local_ip_url() {
let result = validate_and_normalize_url("http://169.254.169.254/latest/meta-data/");
assert!(result.is_err());
}
#[test]
fn should_reject_loopback_ip_in_fetch() {
let result = fetch_schema("http://127.0.0.1:8080/schema.json");
assert!(result.is_err());
}
#[test]
fn should_reject_url_exceeding_max_length() {
let long_url = format!("https://example.com/{}", "a".repeat(2050));
let result = validate_and_normalize_url(&long_url);
assert!(result.is_err());
}
#[test]
fn should_normalize_cache_key_url() {
let a = validate_and_normalize_url("https://example.com/schema").expect("valid");
let b = validate_and_normalize_url("HTTPS://EXAMPLE.COM/schema").expect("valid");
assert_eq!(a, b, "scheme+host should be normalized to lowercase");
}
#[test]
fn should_reject_excessively_nested_schema() {
let mut v = json!({"type": "string"});
for _ in 0..100 {
v = json!({"type": "object", "properties": {"x": v}});
}
let _ = parse_schema(&v);
}
#[test]
fn should_reject_file_scheme_url_47() {
assert!(validate_and_normalize_url("file:///etc/passwd").is_err());
}
#[test]
fn should_reject_localhost_url_48() {
assert!(validate_and_normalize_url("http://localhost/schema.json").is_err());
}
#[test]
fn should_reject_loopback_ip_url() {
assert!(validate_and_normalize_url("http://127.0.0.1/schema.json").is_err());
}
#[test]
fn should_reject_ipv6_loopback_url() {
assert!(validate_and_normalize_url("http://[::1]/schema.json").is_err());
}
#[test]
fn should_reject_link_local_aws_metadata_url() {
assert!(validate_and_normalize_url("http://169.254.169.254/latest/meta-data/").is_err());
}
#[test]
fn should_reject_url_exceeding_max_length_52() {
let long_url = format!("https://example.com/{}", "a".repeat(2048));
assert!(validate_and_normalize_url(&long_url).is_err());
}
#[test]
fn should_accept_valid_https_url() {
let result = validate_and_normalize_url(
"https://schemastore.azurewebsites.net/schemas/json/package.json",
);
assert!(result.is_ok(), "valid https URL should be accepted");
}
#[test]
fn should_accept_valid_http_url() {
let result = validate_and_normalize_url("http://json.schemastore.org/package");
assert!(result.is_ok(), "valid http URL should be accepted");
}
#[test]
fn should_return_error_when_response_exceeds_size_limit() {
let buf = vec![b'x'; MAX_SCHEMA_BYTES as usize];
assert!(
buf.len() as u64 <= MAX_SCHEMA_BYTES,
"exactly MAX_SCHEMA_BYTES bytes must not trigger ResponseTooLarge"
);
}
#[test]
fn should_return_error_when_response_exceeds_size_limit_over() {
use std::io::Read as _;
let body = vec![b'x'; MAX_SCHEMA_BYTES as usize + 1];
let cursor = std::io::Cursor::new(&body);
let mut limited = cursor.take(MAX_SCHEMA_BYTES + 1);
let mut buf = Vec::new();
limited.read_to_end(&mut buf).expect("read succeeds");
assert!(
buf.len() as u64 > MAX_SCHEMA_BYTES,
"over-limit read should trigger ResponseTooLarge condition"
);
}
#[test]
fn should_reject_schema_exceeding_nesting_depth() {
let mut v = json!({"type": "string"});
for _ in 0..60 {
v = json!({"type": "object", "properties": {"child": v}});
}
let _ = parse_schema(&v);
}
#[test]
fn should_accept_schema_within_nesting_depth() {
let mut v = json!({"type": "string"});
for _ in 0..10 {
v = json!({"type": "object", "properties": {"child": v}});
}
let result = parse_schema(&v);
assert!(
result.is_some(),
"schema within depth limit should be accepted"
);
}
#[test]
fn should_not_hang_on_two_node_circular_ref() {
let v = json!({
"$ref": "#/definitions/A",
"definitions": {
"A": {"$ref": "#/definitions/B"},
"B": {"$ref": "#/definitions/A"}
}
});
let _ = parse_schema(&v);
}
#[test]
fn should_normalize_cache_key_trailing_slash() {
let key_no_slash = validate_and_normalize_url("https://example.com/schema").expect("valid");
let key_with_slash =
validate_and_normalize_url("https://example.com/schema/").expect("valid");
assert_ne!(
key_no_slash, key_with_slash,
"trailing-slash variants are distinct paths and must not share a cache key"
);
}
#[test]
fn should_normalize_cache_key_host_case() {
let key_upper = validate_and_normalize_url("https://EXAMPLE.COM/schema").expect("valid");
let key_lower = validate_and_normalize_url("https://example.com/schema").expect("valid");
assert_eq!(
key_upper, key_lower,
"host should be normalized to lowercase in cache key"
);
}
#[test]
#[ignore = "requires HTTP mock server; redirect blocking enforced by max_redirects(0) in fetch_schema"]
fn should_reject_redirect_to_different_host() {
unimplemented!()
}
#[test]
fn schema_error_display_fetch_failed() {
let e = SchemaError::FetchFailed("connection refused".to_string());
let msg = e.to_string();
assert!(msg.contains("fetch failed"), "got: {msg}");
assert!(msg.contains("connection refused"), "got: {msg}");
}
#[test]
fn schema_error_display_response_too_large() {
let e = SchemaError::ResponseTooLarge;
let msg = e.to_string();
assert!(msg.contains("size limit"), "got: {msg}");
}
#[test]
fn schema_error_display_parse_failed() {
let e = SchemaError::ParseFailed("unexpected token".to_string());
let msg = e.to_string();
assert!(msg.contains("parse failed"), "got: {msg}");
assert!(msg.contains("unexpected token"), "got: {msg}");
}
#[test]
fn schema_error_display_too_deep() {
let e = SchemaError::TooDeep;
let msg = e.to_string();
assert!(msg.contains("depth"), "got: {msg}");
}
#[test]
fn schema_error_display_url_not_permitted() {
let e = SchemaError::UrlNotPermitted("ftp://bad".to_string());
let msg = e.to_string();
assert!(msg.contains("not permitted"), "got: {msg}");
}
#[test]
fn should_reject_private_ipv4_10_range() {
let result = validate_and_normalize_url("http://10.0.0.1/schema.json");
assert!(result.is_err(), "private 10.x.x.x must be rejected");
}
#[test]
fn should_reject_private_ipv4_192_168_range() {
let result = validate_and_normalize_url("http://192.168.1.1/schema.json");
assert!(result.is_err(), "private 192.168.x.x must be rejected");
}
#[test]
fn should_reject_private_ipv4_172_16_range() {
let result = validate_and_normalize_url("http://172.16.0.1/schema.json");
assert!(result.is_err(), "private 172.16.x.x must be rejected");
}
#[test]
fn should_reject_unspecified_ipv4_0_0_0_0() {
let result = validate_and_normalize_url("http://0.0.0.0/schema.json");
assert!(result.is_err(), "unspecified 0.0.0.0 must be rejected");
}
#[test]
fn should_reject_ipv6_unspecified_double_colon() {
let result = validate_and_normalize_url("http://[::]/schema.json");
assert!(result.is_err(), "IPv6 unspecified :: must be rejected");
}
#[test]
fn should_reject_ipv6_link_local_fe80() {
let result = validate_and_normalize_url("http://[fe80::1]/schema.json");
assert!(result.is_err(), "IPv6 link-local fe80:: must be rejected");
}
#[test]
fn should_reject_ftp_scheme() {
let result = validate_and_normalize_url("ftp://example.com/schema.json");
assert!(result.is_err(), "ftp:// scheme must be rejected");
let msg = result.unwrap_err().to_string();
assert!(
msg.contains("ftp"),
"error message should mention the scheme, got: {msg}"
);
}
#[test]
fn should_reject_unparseable_url() {
let result = validate_and_normalize_url("not a url at all");
assert!(result.is_err(), "unparseable string must be rejected");
}
#[test]
fn parse_type_returns_none_for_non_string_non_array() {
let v = json!({"type": 42});
let s = parse_schema(&v).expect("should parse as object schema");
assert!(
s.schema_type.is_none(),
"non-string/non-array type should yield None"
);
}
#[test]
fn parse_type_returns_none_for_empty_type_array() {
let v = json!({"type": []});
let s = parse_schema(&v).expect("should parse");
assert!(
s.schema_type.is_none(),
"empty type array should yield None schema_type"
);
}
#[test]
fn parse_type_filters_non_string_items_from_array() {
let v = json!({"type": [42, "string"]});
let s = parse_schema(&v).expect("should parse");
assert!(
s.schema_type.is_some(),
"string item should survive filtering"
);
}
#[test]
fn ref_pointing_to_root_returns_parsed_root() {
let v = json!({
"definitions": {
"Root": {"$ref": "#"}
},
"type": "object"
});
let s = parse_schema(&v).expect("should parse");
assert_eq!(schema_type_str(&s), Some("object"));
}
#[test]
fn ref_without_hash_prefix_yields_ref_path_only() {
let v = json!({"$ref": "http://example.com/other-schema.json"});
let result = parse_schema(&v);
if let Some(s) = result {
assert_eq!(
s.ref_path.as_deref(),
Some("http://example.com/other-schema.json")
);
}
}
#[test]
fn empty_all_of_array_yields_none() {
let v = json!({"allOf": []});
let s = parse_schema(&v).expect("should parse");
assert!(s.all_of.is_none(), "empty allOf should yield None");
}
#[test]
fn all_of_with_non_object_entries_filtered_out_yields_none() {
let v = json!({"allOf": ["not a schema"]});
let s = parse_schema(&v).expect("should parse");
assert!(
s.all_of.is_none(),
"allOf with only invalid entries should yield None"
);
}
#[test]
fn empty_definitions_object_yields_none() {
let v = json!({"definitions": {}});
let s = parse_schema(&v).expect("should parse");
assert!(
s.definitions.is_none(),
"empty definitions should yield None"
);
}
#[test]
fn both_definitions_and_defs_are_merged() {
let v = json!({
"definitions": {"TypeA": {"type": "string"}},
"$defs": {"TypeB": {"type": "integer"}}
});
let s = parse_schema(&v).expect("should parse");
let defs = s
.definitions
.as_ref()
.expect("should have merged definitions");
assert!(
defs.contains_key("TypeA"),
"TypeA from definitions should be present"
);
assert!(
defs.contains_key("TypeB"),
"TypeB from $defs should be present"
);
}
#[test]
fn additional_properties_true_parsed_as_permissive_schema() {
let v = json!({"type": "object", "additionalProperties": true});
let s = parse_schema(&v).expect("should parse");
assert!(
matches!(
s.additional_properties,
Some(AdditionalProperties::Schema(_))
),
"additionalProperties: true should yield Schema variant"
);
}
#[test]
fn check_json_depth_rejects_deeply_nested_array() {
let mut v = json!("leaf");
for _ in 0..55 {
v = json!([v]);
}
let result = check_json_depth(&v, 0);
assert!(
result.is_err(),
"deeply nested array should exceed depth limit"
);
}
#[test]
fn check_json_depth_accepts_shallow_array() {
let v = json!(["a", "b", "c"]);
assert!(check_json_depth(&v, 0).is_ok());
}
#[test]
fn required_with_non_string_values_filtered() {
let v = json!({"required": [42, "name", true]});
let s = parse_schema(&v).expect("should parse");
let req = s.required.as_ref().expect("should have required");
assert_eq!(req.len(), 1, "only string 'name' should survive filtering");
assert!(req.contains(&"name".to_string()));
}
}