use crate::error::JacsError;
use crate::storage::MultiStorage;
use jsonschema::Retrieve;
use phf::phf_map;
use serde_json::Value;
use std::collections::HashMap;
use std::error::Error;
use std::fmt;
use std::sync::{Arc, OnceLock, RwLock};
use tracing::{debug, warn};
pub const STRICT_TLS_DEFAULT: bool = true;
pub const DEFAULT_ALLOWED_SCHEMA_DOMAINS: &[&str] = &["hai.ai", "schema.hai.ai", "jacs.sh"];
pub const DEFAULT_MAX_DOCUMENT_SIZE: usize = 10 * 1024 * 1024;
pub fn max_document_size() -> usize {
std::env::var("JACS_MAX_DOCUMENT_SIZE")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(DEFAULT_MAX_DOCUMENT_SIZE)
}
pub fn check_document_size(data: &str) -> Result<(), JacsError> {
let max = max_document_size();
let size = data.len();
if size > max {
return Err(JacsError::DocumentTooLarge {
size,
max_size: max,
});
}
Ok(())
}
static EXTRA_ALLOWED_SCHEMA_DOMAINS: std::sync::OnceLock<Vec<String>> = std::sync::OnceLock::new();
fn get_extra_allowed_domains() -> &'static Vec<String> {
EXTRA_ALLOWED_SCHEMA_DOMAINS.get_or_init(|| {
std::env::var("JACS_SCHEMA_ALLOWED_DOMAINS")
.map(|env_domains| {
env_domains
.split(',')
.map(|d| d.trim().to_string())
.filter(|d| !d.is_empty())
.collect()
})
.unwrap_or_default()
})
}
fn is_schema_url_allowed(url: &str) -> Result<(), JacsError> {
let parsed = url::Url::parse(url)
.map_err(|e| JacsError::SchemaError(format!("Invalid URL '{}': {}", url, e)))?;
let host = parsed
.host_str()
.ok_or_else(|| JacsError::SchemaError(format!("URL '{}' has no host", url)))?;
let extra = get_extra_allowed_domains();
let mut allowed_domains: Vec<&str> = DEFAULT_ALLOWED_SCHEMA_DOMAINS.to_vec();
for domain in extra {
allowed_domains.push(domain.as_str());
}
let host_lower = host.to_lowercase();
for allowed in &allowed_domains {
let allowed_lower = allowed.to_lowercase();
if host_lower == allowed_lower || host_lower.ends_with(&format!(".{}", allowed_lower)) {
return Ok(());
}
}
Err(JacsError::SchemaError(format!(
"Remote schema URL '{}' is not from an allowed domain. \
Allowed domains: {:?}. \
To add additional domains, set JACS_SCHEMA_ALLOWED_DOMAINS environment variable (comma-separated).",
url, allowed_domains
)))
}
#[cfg(not(target_arch = "wasm32"))]
fn should_accept_invalid_certs() -> bool {
let strict_tls = match std::env::var("JACS_STRICT_TLS") {
Ok(val) => match val.trim().to_ascii_lowercase().as_str() {
"true" | "1" | "yes" => true,
"false" | "0" | "no" => false,
other => {
warn!(
"Invalid JACS_STRICT_TLS value '{}'; defaulting to strict TLS validation.",
other
);
STRICT_TLS_DEFAULT
}
},
Err(_) => STRICT_TLS_DEFAULT,
};
if strict_tls {
false
} else {
warn!(
"SECURITY WARNING: JACS_STRICT_TLS=false. Accepting invalid TLS certificates increases MITM risk."
);
true
}
}
#[cfg(not(target_arch = "wasm32"))]
pub fn should_accept_invalid_certs_for_claim(claim: Option<&str>) -> bool {
match claim {
Some("verified") | Some("verified-registry") | Some("verified-hai.ai") => false,
_ => should_accept_invalid_certs(), }
}
#[cfg(target_arch = "wasm32")]
pub fn should_accept_invalid_certs_for_claim(_claim: Option<&str>) -> bool {
false
}
pub static DEFAULT_SCHEMA_STRINGS: phf::Map<&'static str, &'static str> = phf_map! {
"schemas/agent/v1/agent.schema.json" => include_str!("../../schemas/agent/v1/agent.schema.json"),
"schemas/header/v1/header.schema.json"=> include_str!("../../schemas/header/v1/header.schema.json"),
"schemas/components/signature/v1/signature.schema.json" => include_str!("../../schemas/components/signature/v1/signature.schema.json"),
"schemas/components/files/v1/files.schema.json" => include_str!("../../schemas/components/files/v1/files.schema.json"),
"schemas/components/agreement/v1/agreement.schema.json" => include_str!("../../schemas/components/agreement/v1/agreement.schema.json"),
"schemas/components/action/v1/action.schema.json" => include_str!("../../schemas/components/action/v1/action.schema.json"),
"schemas/components/unit/v1/unit.schema.json" => include_str!("../../schemas/components/unit/v1/unit.schema.json"),
"schemas/components/tool/v1/tool.schema.json" => include_str!("../../schemas/components/tool/v1/tool.schema.json"),
"schemas/components/service/v1/service.schema.json" => include_str!("../../schemas/components/service/v1/service.schema.json"),
"schemas/components/contact/v1/contact.schema.json" => include_str!("../../schemas/components/contact/v1/contact.schema.json"),
"schemas/task/v1/task.schema.json" => include_str!("../../schemas/task/v1/task.schema.json"),
"schemas/message/v1/message.schema.json" => include_str!("../../schemas/message/v1/message.schema.json"),
"schemas/eval/v1/eval.schema.json" => include_str!("../../schemas/eval/v1/eval.schema.json"),
"schemas/program/v1/program.schema.json" => include_str!("../../schemas/program/v1/program.schema.json"),
"schemas/node/v1/node.schema.json" => include_str!("../../schemas/node/v1/node.schema.json"),
"schemas/components/embedding/v1/embedding.schema.json" => include_str!("../../schemas/components/embedding/v1/embedding.schema.json"),
"schemas/agentstate/v1/agentstate.schema.json" => include_str!("../../schemas/agentstate/v1/agentstate.schema.json"),
"schemas/commitment/v1/commitment.schema.json" => include_str!("../../schemas/commitment/v1/commitment.schema.json"),
"schemas/todo/v1/todo.schema.json" => include_str!("../../schemas/todo/v1/todo.schema.json"),
"schemas/components/todoitem/v1/todoitem.schema.json" => include_str!("../../schemas/components/todoitem/v1/todoitem.schema.json"),
"schemas/attestation/v1/attestation.schema.json" => include_str!("../../schemas/attestation/v1/attestation.schema.json")
};
pub static SCHEMA_SHORT_NAME: phf::Map<&'static str, &'static str> = phf_map! {
"https://hai.ai/schemas/agent/v1/agent.schema.json" => "agent" ,
"https://hai.ai/schemas/components/action/v1/action.schema.json" => "action" ,
"https://hai.ai/schemas/components/agreement/v1/agreement.schema.json" => "agreement" ,
"https://hai.ai/schemas/components/contact/v1/contact.schema.json" => "contact" ,
"https://hai.ai/schemas/components/files/v1/files.schema.json" => "files" ,
"https://hai.ai/schemas/components/service/v1/service.schema.json" => "service" ,
"https://hai.ai/schemas/components/signature/v1/signature.schema.json" => "signature" ,
"https://hai.ai/schemas/components/tool/v1/tool.schema.json" => "tool" ,
"https://hai.ai/schemas/components/unit/v1/unit.schema.json" => "unit" ,
"https://hai.ai/schemas/eval/v1/eval.schema.json" => "eval" ,
"https://hai.ai/schemas/header/v1/header.schema.json" => "header" ,
"https://hai.ai/schemas/message/v1/message.schema.json" => "message" ,
"https://hai.ai/schemas/node/v1/node.schema.json" => "node" ,
"https://hai.ai/schemas/task/v1/task.schema.json" => "task" ,
"document" => "document" ,
"https://hai.ai/schemas/agentstate/v1/agentstate.schema.json" => "agentstate" ,
"https://hai.ai/schemas/commitment/v1/commitment.schema.json" => "commitment" ,
"https://hai.ai/schemas/todo/v1/todo.schema.json" => "todo" ,
"https://hai.ai/schemas/attestation/v1/attestation.schema.json" => "attestation" ,
};
pub fn get_short_name(jacs_document: &Value) -> Result<String, JacsError> {
let id: String = jacs_document
.get_str("$id")
.unwrap_or((&"document").to_string());
Ok(SCHEMA_SHORT_NAME
.get(&id)
.unwrap_or(&"document")
.to_string())
}
pub static CONFIG_SCHEMA_STRING: &str = include_str!("../../schemas/jacs.config.schema.json");
#[derive(Debug)]
#[allow(dead_code)]
struct SchemaResolverErrorWrapper(String);
impl fmt::Display for SchemaResolverErrorWrapper {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Error for SchemaResolverErrorWrapper {}
pub trait ValueExt {
fn get_str(&self, field: &str) -> Option<String>;
fn get_str_or(&self, field: &str, default: &str) -> String;
fn get_str_required(&self, field: &str) -> Result<String, JacsError>;
fn get_i64(&self, key: &str) -> Option<i64>;
fn get_bool(&self, key: &str) -> Option<bool>;
fn as_string(&self) -> String;
fn get_path(&self, path: &[&str]) -> Option<&Value>;
fn get_path_str(&self, path: &[&str]) -> Option<String>;
fn get_path_str_or(&self, path: &[&str], default: &str) -> String;
fn get_path_str_required(&self, path: &[&str]) -> Result<String, JacsError>;
fn get_path_array(&self, path: &[&str]) -> Option<&Vec<Value>>;
fn get_path_array_required(&self, path: &[&str]) -> Result<&Vec<Value>, JacsError>;
}
impl ValueExt for Value {
fn as_string(&self) -> String {
serde_json::to_string_pretty(self)
.unwrap_or_else(|e| format!("{{\"error\": \"Failed to serialize JSON: {}\"}}", e))
}
fn get_str(&self, field: &str) -> Option<String> {
self.get(field)?.as_str().map(String::from)
}
fn get_str_or(&self, field: &str, default: &str) -> String {
self.get(field)
.and_then(|v| v.as_str())
.unwrap_or(default)
.to_string()
}
fn get_str_required(&self, field: &str) -> Result<String, JacsError> {
self.get_str(field)
.ok_or_else(|| JacsError::DocumentMalformed {
field: field.to_string(),
reason: format!("Missing or invalid field: {}", field),
})
}
fn get_i64(&self, key: &str) -> Option<i64> {
self.get(key).and_then(|v| v.as_i64())
}
fn get_bool(&self, key: &str) -> Option<bool> {
self.get(key).and_then(|v| v.as_bool())
}
fn get_path(&self, path: &[&str]) -> Option<&Value> {
let mut current = self;
for key in path {
current = current.get(key)?;
}
Some(current)
}
fn get_path_str(&self, path: &[&str]) -> Option<String> {
self.get_path(path)?.as_str().map(String::from)
}
fn get_path_str_or(&self, path: &[&str], default: &str) -> String {
self.get_path_str(path)
.unwrap_or_else(|| default.to_string())
}
fn get_path_str_required(&self, path: &[&str]) -> Result<String, JacsError> {
let dotted_path = path.join(".");
self.get_path_str(path)
.ok_or_else(|| JacsError::DocumentMalformed {
field: dotted_path.clone(),
reason: format!("Missing or invalid field: {}", dotted_path),
})
}
fn get_path_array(&self, path: &[&str]) -> Option<&Vec<Value>> {
self.get_path(path)?.as_array()
}
fn get_path_array_required(&self, path: &[&str]) -> Result<&Vec<Value>, JacsError> {
let dotted_path = path.join(".");
self.get_path_array(path)
.ok_or_else(|| JacsError::DocumentMalformed {
field: dotted_path.clone(),
reason: format!("Missing or invalid array field: {}", dotted_path),
})
}
}
pub struct EmbeddedSchemaResolver {}
impl Default for EmbeddedSchemaResolver {
fn default() -> Self {
Self::new()
}
}
impl EmbeddedSchemaResolver {
pub fn new() -> Self {
EmbeddedSchemaResolver {}
}
}
impl Retrieve for EmbeddedSchemaResolver {
fn retrieve(
&self,
uri: &jsonschema::Uri<String>,
) -> Result<Value, Box<dyn Error + Send + Sync>> {
let path = uri.path().as_str();
resolve_schema(path).map(|arc| (*arc).clone()).map_err(|e| {
let err_msg = e.to_string();
Box::new(std::io::Error::other(err_msg)) as Box<dyn Error + Send + Sync>
})
}
}
#[cfg(not(target_arch = "wasm32"))]
fn get_remote_schema(url: &str) -> Result<Arc<Value>, JacsError> {
is_schema_url_allowed(url)?;
let accept_invalid = should_accept_invalid_certs();
let client = reqwest::blocking::Client::builder()
.danger_accept_invalid_certs(accept_invalid)
.build()
.map_err(|e| JacsError::NetworkError(format!("Failed to build HTTP client: {}", e)))?;
let response = client.get(url).send().map_err(|e| {
JacsError::NetworkError(format!("Failed to fetch schema from {}: {}", url, e))
})?;
if response.status().is_success() {
let schema_value: Value = response.json().map_err(|e| {
JacsError::SchemaError(format!("Failed to parse schema JSON from {}: {}", url, e))
})?;
Ok(Arc::new(schema_value))
} else {
Err(JacsError::SchemaError(format!(
"Failed to get schema from URL {}",
url
)))
}
}
#[cfg(target_arch = "wasm32")]
fn get_remote_schema(url: &str) -> Result<Arc<Value>, JacsError> {
Err(JacsError::SchemaError(format!(
"Remote URL schemas disabled in WASM: {}",
url
)))
}
fn normalize_access_path(path: &str) -> Result<std::path::PathBuf, JacsError> {
let path_obj = std::path::Path::new(path);
if let Ok(canonical) = path_obj.canonicalize() {
return Ok(canonical);
}
let absolute = if path_obj.is_absolute() {
path_obj.to_path_buf()
} else {
std::env::current_dir()
.map_err(|e| JacsError::SchemaError(format!("Failed to read current dir: {}", e)))?
.join(path_obj)
};
let mut normalized = std::path::PathBuf::new();
for component in absolute.components() {
match component {
std::path::Component::CurDir => {}
std::path::Component::ParentDir => {
normalized.pop();
}
other => normalized.push(other.as_os_str()),
}
}
Ok(normalized)
}
fn check_filesystem_schema_access(path: &str) -> Result<(), JacsError> {
let fs_enabled = std::env::var("JACS_ALLOW_FILESYSTEM_SCHEMAS")
.map(|v| v.eq_ignore_ascii_case("true") || v == "1")
.unwrap_or(false);
if !fs_enabled {
return Err(JacsError::SchemaError(format!(
"Filesystem schema access is disabled. Path '{}' cannot be loaded. \
To enable filesystem schemas, set JACS_ALLOW_FILESYSTEM_SCHEMAS=true",
path
)));
}
if path.contains("..") {
return Err(JacsError::SchemaError(format!(
"Path traversal detected in schema path '{}'. \
Schema paths must not contain '..' sequences.",
path
)));
}
let data_dir = std::env::var("JACS_DATA_DIRECTORY").ok();
let schema_dir = std::env::var("JACS_SCHEMA_DIRECTORY").ok();
if data_dir.is_some() || schema_dir.is_some() {
let candidate = normalize_access_path(path)?;
let mut allowed = false;
if let Some(ref data) = data_dir {
let allowed_root = normalize_access_path(data)?;
if candidate.starts_with(&allowed_root) {
allowed = true;
}
}
if let Some(ref schema) = schema_dir {
let allowed_root = normalize_access_path(schema)?;
if candidate.starts_with(&allowed_root) {
allowed = true;
}
}
if !allowed {
return Err(JacsError::SchemaError(format!(
"Schema path '{}' is outside allowed directories. \
Schemas must be within JACS_DATA_DIRECTORY ({:?}) or JACS_SCHEMA_DIRECTORY ({:?}).",
path, data_dir, schema_dir
)));
}
}
Ok(())
}
pub fn resolve_schema(rawpath: &str) -> Result<Arc<Value>, JacsError> {
debug!("Entering resolve_schema function with path: {}", rawpath);
let path = rawpath.strip_prefix('/').unwrap_or(rawpath);
let cache_key = schema_cache_key(path);
if let Some(cached) = get_cached_schema(&cache_key) {
return Ok(cached);
}
let resolved = if let Some(schema_json) = DEFAULT_SCHEMA_STRINGS.get(path) {
let schema_value: Value = serde_json::from_str(schema_json)?;
Arc::new(schema_value)
} else if path.starts_with("http://") || path.starts_with("https://") {
debug!("Attempting to fetch schema from URL: {}", path);
if path.starts_with("https://hai.ai") {
let relative_path = path.trim_start_matches("https://hai.ai/");
if let Some(schema_json) = DEFAULT_SCHEMA_STRINGS.get(relative_path) {
let schema_value: Value = serde_json::from_str(schema_json)?;
Arc::new(schema_value)
} else {
return Err(JacsError::SchemaError(format!(
"Schema not found in embedded schemas: '{}' (relative path: '{}'). Available schemas: {:?}",
path,
relative_path,
DEFAULT_SCHEMA_STRINGS.keys().collect::<Vec<_>>()
)));
}
} else {
get_remote_schema(path)?
}
} else {
check_filesystem_schema_access(path)?;
let storage = MultiStorage::default_new()
.map_err(|e| JacsError::SchemaError(format!("Failed to initialize storage: {}", e)))?;
if storage.file_exists(path, None).map_err(|e| {
JacsError::SchemaError(format!("Failed to check schema file existence: {}", e))
})? {
let file_bytes = storage.get_file(path, None).map_err(|e| {
JacsError::SchemaError(format!("Failed to read schema file '{}': {}", path, e))
})?;
let schema_json = String::from_utf8(file_bytes).map_err(|e| {
JacsError::SchemaError(format!(
"Schema file '{}' contains invalid UTF-8: {}",
path, e
))
})?;
let schema_value: Value = serde_json::from_str(&schema_json)?;
Arc::new(schema_value)
} else {
return Err(JacsError::FileNotFound {
path: path.to_string(),
});
}
};
Ok(cache_schema(cache_key, resolved))
}
fn schema_cache_key(path: &str) -> String {
path.strip_prefix("https://hai.ai/")
.or_else(|| path.strip_prefix("http://hai.ai/"))
.unwrap_or(path)
.to_string()
}
fn schema_cache() -> &'static RwLock<HashMap<String, Arc<Value>>> {
static SCHEMA_CACHE: OnceLock<RwLock<HashMap<String, Arc<Value>>>> = OnceLock::new();
SCHEMA_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
}
fn get_cached_schema(key: &str) -> Option<Arc<Value>> {
schema_cache().read().ok()?.get(key).cloned()
}
fn cache_schema(key: String, schema: Arc<Value>) -> Arc<Value> {
if let Ok(mut cache) = schema_cache().write() {
if let Some(existing) = cache.get(&key) {
return existing.clone();
}
cache.insert(key, schema.clone());
}
schema
}
#[cfg(test)]
mod tests {
use super::resolve_schema;
use std::sync::Arc;
#[test]
fn resolve_schema_embedded_path_is_cached() {
let first = resolve_schema("schemas/agent/v1/agent.schema.json").expect("first resolve");
let second = resolve_schema("schemas/agent/v1/agent.schema.json").expect("second resolve");
assert!(
Arc::ptr_eq(&first, &second),
"embedded schema should be returned from cache"
);
}
#[test]
fn resolve_schema_hai_url_and_relative_path_share_cache_entry() {
let relative = resolve_schema("schemas/header/v1/header.schema.json")
.expect("relative path resolve should succeed");
let via_url = resolve_schema("https://hai.ai/schemas/header/v1/header.schema.json")
.expect("hai url resolve should succeed");
assert!(
Arc::ptr_eq(&relative, &via_url),
"relative and hai URL lookups should share cached schema"
);
}
}