pub mod diff;
pub mod editor;
pub mod schema;
pub mod v2;
pub mod version;
pub use crate::generated::Manifest;
pub use v2::{load_provider_tree, ResolvedBinding, ResolvedManifest, ResolvedTool};
use std::path::Path;
use crate::error::{
OlError, OL_4210_SCHEMA_MISMATCH, OL_4211_UNKNOWN_THREAT_CATEGORY,
OL_4212_INVALID_ENDPOINT_URL, OL_4213_INVALID_AGENTS_SUPPORTED,
OL_4214_INVALID_HOOKS_SUPPORTED, OL_4216_CAPABILITY_MISSING_FIELD, OL_4273_MANIFEST_UNREADABLE,
OL_4300_PROCESS_SPEC_INVALID,
};
const MAX_MANIFEST_BYTES: usize = 256 * 1024;
pub const KNOWN_THREAT_CATEGORIES: &[&str] = &[
"pii_outbound",
"pii_inbound",
"credential_detection",
"injection_tool_response",
"injection_user_input",
"shell_dangerous",
"shell_exfiltration",
"tool_hash_verification",
"tool_poison_detection",
"tool_typosquatting",
"attack_path_analysis",
"configuration_threat",
];
pub const KNOWN_HOOK_EVENTS: &[&str] = &[
"pre_tool_use",
"post_tool_use",
"user_prompt_submit",
"before_shell_execution",
"before_mcp_execution",
"before_tool",
"after_tool",
"command_new",
"agent_bootstrap",
];
pub const KNOWN_AGENT_PLATFORMS: &[&str] = &[
"claude-code",
"cursor",
"windsurf",
"github-copilot",
"codex-cli",
"gemini-cli",
"cline",
"openclaw",
];
pub fn load(path: &Path) -> Result<Manifest, OlError> {
let bytes = std::fs::read(path).map_err(|e| {
OlError::new(
OL_4273_MANIFEST_UNREADABLE,
format!("cannot read '{}': {e}", path.display()),
)
})?;
if bytes.len() > MAX_MANIFEST_BYTES {
return Err(OlError::new(
OL_4273_MANIFEST_UNREADABLE,
format!(
"manifest at '{}' is {} bytes (cap {} KB)",
path.display(),
bytes.len(),
MAX_MANIFEST_BYTES / 1024
),
));
}
if let Ok((schema_version, kind)) = v2::peek_kind(&bytes) {
match (schema_version, kind.as_deref()) {
(Some(2), Some("Provider")) => {
let resolved = v2::load_provider_tree(path)?;
return Ok(resolved.synth);
}
(Some(2), Some("Tool")) => {
let parsed = v2::parse_tool_v2(&bytes, path)?;
let tools_json: Vec<serde_json::Value> = parsed
.tools
.iter()
.map(|t| {
let mut v = serde_json::to_value(t).unwrap_or(serde_json::Value::Null);
if let Some(m) = v.as_object_mut() {
m.remove("process");
}
v
})
.collect();
let synth_json = serde_json::json!({
"schema_version": 1,
"editor": serde_json::to_value(&parsed.editor).unwrap_or(serde_json::Value::Null),
"tools": tools_json,
"providers": [],
"bindings": [],
});
schema::validate(&synth_json)?;
let m: Manifest = serde_json::from_value(synth_json).map_err(|e| {
OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!("internal: synth tool-only manifest: {e}"),
)
})?;
return Ok(m);
}
_ => {}
}
}
parse(&bytes)
}
pub fn parse(bytes: &[u8]) -> Result<Manifest, OlError> {
let yaml: serde_yaml::Value = serde_yaml::from_slice(bytes)
.map_err(|e| OlError::new(OL_4210_SCHEMA_MISMATCH, format!("YAML parse: {e}")))?;
let json = serde_json::to_value(&yaml).map_err(|e| {
OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!("YAML→JSON conversion: {e}"),
)
})?;
schema::validate(&json)?;
let manifest: Manifest = serde_json::from_value(json)
.map_err(|e| OlError::new(OL_4210_SCHEMA_MISMATCH, format!("typify deserialize: {e}")))?;
semantic_check(&manifest)?;
Ok(manifest)
}
pub fn semantic_check(m: &Manifest) -> Result<(), OlError> {
use std::collections::BTreeSet;
let mut tool_slugs: BTreeSet<String> = BTreeSet::new();
for t in &m.tools {
let slug: &str = &t.slug;
if !tool_slugs.insert(slug.to_string()) {
return Err(OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!("duplicate tool slug `{slug}`"),
));
}
for a in &t.agents_supported {
if !KNOWN_AGENT_PLATFORMS.contains(&a.as_str()) {
tracing::warn!(agent = %a, "unknown agent.platform value (allowed but tagged)");
}
}
for h in &t.hooks_supported {
if !KNOWN_HOOK_EVENTS.contains(&h.as_str()) {
tracing::warn!(hook = %h, "unknown hook event value (allowed but tagged)");
}
}
if t.agents_supported.is_empty() {
return Err(OlError::new(
OL_4213_INVALID_AGENTS_SUPPORTED,
format!("tool `{slug}` has empty agents_supported"),
));
}
if t.hooks_supported.is_empty() {
return Err(OlError::new(
OL_4214_INVALID_HOOKS_SUPPORTED,
format!("tool `{slug}` has empty hooks_supported"),
));
}
if t.capabilities.is_empty() {
return Err(OlError::new(
OL_4216_CAPABILITY_MISSING_FIELD,
format!("tool `{slug}` has no capabilities"),
));
}
for cap in &t.capabilities {
let serialized = serde_json::to_value(cap.threat_category)
.ok()
.and_then(|v| v.as_str().map(|s| s.to_string()))
.unwrap_or_default();
if !serialized.is_empty() && !KNOWN_THREAT_CATEGORIES.contains(&serialized.as_str()) {
let mut err = OlError::new(
OL_4211_UNKNOWN_THREAT_CATEGORY,
format!("tool `{slug}`: unknown threat_category `{serialized}`"),
);
if let Some(suggestion) = closest_match(&serialized, KNOWN_THREAT_CATEGORIES) {
err = err.with_suggestion(format!("Did you mean `{suggestion}`?"));
}
return Err(err);
}
}
}
let mut provider_slugs: BTreeSet<String> = BTreeSet::new();
for p in &m.providers {
let slug: &str = &p.slug;
if !provider_slugs.insert(slug.to_string()) {
return Err(OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!("duplicate provider slug `{slug}`"),
));
}
if !p.endpoint_url.starts_with("https://") {
return Err(OlError::new(
OL_4212_INVALID_ENDPOINT_URL,
format!(
"provider `{}` endpoint_url must be HTTPS, got `{}`",
slug, p.endpoint_url
),
)
.with_suggestion(
"Use https:// scheme; the platform never connects to plaintext endpoints.",
));
}
}
let mut health_ports: BTreeSet<u64> = BTreeSet::new();
for b in &m.bindings {
if !tool_slugs.contains(&b.tool) {
return Err(OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!(
"binding references tool `{}` which is not declared in tools[]",
b.tool
),
));
}
if !provider_slugs.contains(&b.provider) {
return Err(OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!(
"binding references provider `{}` which is not declared in providers[]",
b.provider
),
));
}
let as_json = serde_json::to_value(b).map_err(|e| {
OlError::new(
OL_4210_SCHEMA_MISMATCH,
format!("serialise binding {}/{}: {e}", b.tool, b.provider),
)
})?;
if let Some(port) = as_json
.pointer("/process/health_check/http/port")
.and_then(serde_json::Value::as_u64)
{
if !health_ports.insert(port) {
return Err(OlError::new(
OL_4300_PROCESS_SPEC_INVALID,
format!(
"two or more bindings declare process.health_check.http.port = {port}; \
each binding spawns its own process and must own its port"
),
));
}
}
}
Ok(())
}
pub fn strip_local_only_binding_fields(value: &mut serde_json::Value) {
if let serde_json::Value::Object(map) = value {
map.remove("process");
}
}
fn closest_match(needle: &str, haystack: &[&str]) -> Option<String> {
let mut best: Option<(usize, &str)> = None;
for candidate in haystack {
let d = strsim::levenshtein(needle, candidate);
if d <= 4 {
match best {
None => best = Some((d, candidate)),
Some((bd, _)) if d < bd => best = Some((d, candidate)),
_ => {}
}
}
}
best.map(|(_, s)| s.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn closest_match_finds_typo() {
let suggestion = closest_match("pii_outbond", KNOWN_THREAT_CATEGORIES);
assert_eq!(suggestion.as_deref(), Some("pii_outbound"));
}
#[test]
fn known_threat_categories_match_schema() {
use std::collections::BTreeSet;
const CAPABILITY_SCHEMA: &str =
include_str!("../../schemas/manifest-capability.schema.json");
let value: serde_json::Value =
serde_json::from_str(CAPABILITY_SCHEMA).expect("capability schema must parse");
let enum_array = value
.pointer("/properties/threat_category/enum")
.and_then(|v| v.as_array())
.expect("schema.properties.threat_category.enum must exist");
let from_schema: BTreeSet<&str> = enum_array
.iter()
.map(|v| v.as_str().expect("threat_category enum must be strings"))
.collect();
let from_const: BTreeSet<&str> = KNOWN_THREAT_CATEGORIES.iter().copied().collect();
assert_eq!(
from_const, from_schema,
"KNOWN_THREAT_CATEGORIES drifted from manifest-capability.schema.json"
);
}
}