Skip to main content

ai_lib_rust/protocol/
validator.rs

1//! Protocol validator using JSON Schema
2
3use crate::protocol::{ProtocolError, ProtocolManifest};
4use jsonschema::{Draft, JSONSchema};
5
6/// Protocol validator that validates manifests against JSON Schema
7pub struct ProtocolValidator {
8    schema: JSONSchema,
9}
10
11impl ProtocolValidator {
12    /// Standard GitHub URL for the official AI-Protocol schema.
13    /// This is the canonical source of truth for schema validation.
14    const SCHEMA_GITHUB_URL: &'static str =
15        "https://raw.githubusercontent.com/hiddenpath/ai-protocol/main/schemas/v1.json";
16
17    /// Create a new validator with the v1 schema.
18    ///
19    /// Schema loading strategy (in order):
20    /// 1. GitHub URL (canonical source) - priority, used in production and CI
21    /// 2. AI_PROTOCOL_DIR as GitHub URL (if set and is a URL)
22    /// 3. Local file system (for offline development) - fallback if GitHub unavailable
23    ///
24    /// This ensures all validation uses the same standard schema, while allowing
25    /// local development when network is unavailable.
26    pub fn new() -> Result<Self, ProtocolError> {
27        // Priority 1: Try local file system first (for development)
28        // This allows developers to test schema changes locally before pushing to GitHub
29        let schema_content = Self::load_schema_from_local()
30            .or_else(|| {
31                // Priority 2: Try GitHub URL (canonical source)
32                Self::fetch_schema_from_github().ok()
33            })
34            .or_else(|| {
35                // Priority 3: Try AI_PROTOCOL_DIR as GitHub URL (if it's a URL)
36                if let Ok(root) =
37                    std::env::var("AI_PROTOCOL_DIR").or_else(|_| std::env::var("AI_PROTOCOL_PATH"))
38                {
39                    if root.starts_with("http://") || root.starts_with("https://") {
40                        let schema_url = if root.ends_with('/') {
41                            format!("{}schemas/v1.json", root)
42                        } else {
43                            format!("{}/schemas/v1.json", root)
44                        };
45                        Self::fetch_schema_from_url(&schema_url).ok()
46                    } else {
47                        None
48                    }
49                } else {
50                    None
51                }
52            })
53            .or_else(|| {
54                // Priority 4: Embedded canonical schema (offline-safe for published crates).
55                Some(Self::embedded_schema_v1().to_string())
56            })
57            .unwrap_or_else(|| {
58                // Final fallback (offline-safe): use a minimal built-in schema so the runtime
59                // can still operate, and rely on basic validation + runtime checks.
60                tracing::warn!(
61                    "AI-Protocol JSON Schema not found (offline). Falling back to built-in minimal schema. \
62                     Tip: set AI_PROTOCOL_PATH to your local ai-protocol checkout or a GitHub raw URL."
63                );
64                Self::builtin_minimal_schema()
65            });
66
67        let schema_value: serde_json::Value = serde_json::from_str(&schema_content)
68            .map_err(|e| ProtocolError::Internal(format!("Invalid JSON Schema: {}", e)))?;
69
70        let schema = JSONSchema::options()
71            .with_draft(Draft::Draft7)
72            .compile(&schema_value)
73            .map_err(|e| ProtocolError::Internal(format!("Failed to compile schema: {}", e)))?;
74
75        Ok(Self { schema })
76    }
77
78    /// Minimal schema used as an offline fallback when the canonical schema cannot be loaded.
79    ///
80    /// This schema is intentionally conservative: it checks for presence of the most critical
81    /// top-level fields, but does not attempt to fully validate all nested shapes.
82    fn builtin_minimal_schema() -> String {
83        // Draft7 is used by the `jsonschema` crate defaults we compile with.
84        // We keep this small to avoid embedding large schema assets in the runtime crate.
85        r#"{
86  "$schema": "http://json-schema.org/draft-07/schema#",
87  "type": "object",
88  "required": [
89    "id",
90    "protocol_version",
91    "endpoint",
92    "availability",
93    "capabilities",
94    "auth",
95    "status",
96    "category",
97    "official_url",
98    "support_contact",
99    "parameter_mappings"
100  ],
101  "properties": {
102    "id": { "type": "string", "minLength": 1 },
103    "protocol_version": { "type": "string", "minLength": 1 },
104    "endpoint": {
105      "type": "object",
106      "required": ["base_url"],
107      "properties": { "base_url": { "type": "string", "minLength": 1 } }
108    },
109    "availability": { "type": "object" },
110    "capabilities": { "type": "object" },
111    "auth": { "type": "object" },
112    "parameter_mappings": { "type": "object" }
113  },
114  "additionalProperties": true
115}"#
116        .to_string()
117    }
118
119    /// Embedded canonical AI-Protocol schema (v1.json) shipped with the crate.
120    ///
121    /// This guarantees schema validation works for published crates even when:
122    /// - GitHub is unreachable
123    /// - the user does not have a local ai-protocol checkout
124    fn embedded_schema_v1() -> &'static str {
125        include_str!("schema_v1.json")
126    }
127
128    /// Fetch schema from a specific URL.
129    fn fetch_schema_from_url(
130        url: &str,
131    ) -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
132        // Use a separate thread to avoid tokio runtime nesting issues
133        // This ensures the blocking client runs in its own thread context
134        let url = url.to_string();
135        let (tx, rx) = std::sync::mpsc::channel();
136
137        std::thread::spawn(move || {
138            let result = (|| -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
139                let client = reqwest::blocking::Client::builder()
140                    .timeout(std::time::Duration::from_secs(10))
141                    .build()
142                    .map_err(|e| format!("Failed to create HTTP client: {}", e))?;
143
144                let response = client
145                    .get(&url)
146                    .send()
147                    .map_err(|e| format!("HTTP request failed: {}", e))?;
148
149                if !response.status().is_success() {
150                    return Err(format!(
151                        "HTTP {}: {}",
152                        response.status(),
153                        response.text().unwrap_or_default()
154                    )
155                    .into());
156                }
157
158                Ok(response
159                    .text()
160                    .map_err(|e| format!("Failed to read response: {}", e))?)
161            })();
162
163            let _ = tx.send(result);
164        });
165
166        rx.recv()
167            .map_err(|e| format!("Failed to receive result from thread: {}", e))?
168    }
169
170    /// Fetch schema from GitHub (canonical source).
171    fn fetch_schema_from_github() -> Result<String, Box<dyn std::error::Error + Send + Sync>> {
172        Self::fetch_schema_from_url(Self::SCHEMA_GITHUB_URL)
173    }
174
175    /// Load schema from local file system (fallback for offline development).
176    fn load_schema_from_local() -> Option<String> {
177        use std::path::PathBuf;
178
179        let mut schema_paths: Vec<PathBuf> = Vec::new();
180
181        // If AI_PROTOCOL_DIR/AI_PROTOCOL_PATH is set and is a local path, try resolving it in a
182        // few robust ways (tests often set relative paths, and the test binary cwd is not crate root).
183        if let Ok(root) =
184            std::env::var("AI_PROTOCOL_DIR").or_else(|_| std::env::var("AI_PROTOCOL_PATH"))
185        {
186            if !root.starts_with("http://") && !root.starts_with("https://") {
187                let root_pb = PathBuf::from(&root);
188
189                // Candidate bases to resolve relative paths:
190                // - as-is (if absolute)
191                // - relative to current_dir
192                // - relative to current_exe dir
193                // - relative to crate root (compile-time)
194                let mut bases: Vec<PathBuf> = Vec::new();
195                bases.push(root_pb.clone());
196                if root_pb.is_relative() {
197                    if let Ok(cd) = std::env::current_dir() {
198                        bases.push(cd.join(&root_pb));
199                    }
200                    if let Ok(exe) = std::env::current_exe() {
201                        if let Some(dir) = exe.parent() {
202                            bases.push(dir.join(&root_pb));
203                        }
204                    }
205                    let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
206                    bases.push(crate_dir.join(&root_pb));
207                }
208
209                for base in bases {
210                    // Allow env var to point either to repo root or directly to schema file.
211                    if base.extension().and_then(|s| s.to_str()) == Some("json") {
212                        schema_paths.push(base.clone());
213                    } else {
214                        schema_paths.push(base.join("schemas").join("v1.json"));
215                    }
216                }
217            }
218        }
219
220        // Priority 2: Windows development convenience path (always check, add if exists)
221        let win_dev = PathBuf::from(r"D:\ai-protocol\schemas\v1.json");
222        schema_paths.push(win_dev);
223
224        // Priority 3: Common development paths (relative to crate root for determinism).
225        let crate_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
226        schema_paths.push(
227            crate_dir
228                .join("ai-protocol")
229                .join("schemas")
230                .join("v1.json"),
231        );
232        schema_paths.push(
233            crate_dir
234                .join("..")
235                .join("ai-protocol")
236                .join("schemas")
237                .join("v1.json"),
238        );
239        schema_paths.push(
240            crate_dir
241                .join("..")
242                .join("..")
243                .join("ai-protocol")
244                .join("schemas")
245                .join("v1.json"),
246        );
247
248        // Try all paths in order
249        for path in &schema_paths {
250            if path.exists() {
251                if let Ok(content) = std::fs::read_to_string(path) {
252                    return Some(content);
253                }
254            }
255        }
256
257        None
258    }
259
260    /// Validate a protocol manifest using the compiled JSON Schema
261    pub fn validate(&self, manifest: &ProtocolManifest) -> Result<(), ProtocolError> {
262        // Convert manifest to JSON for validation
263        let manifest_json = serde_json::to_value(manifest)
264            .map_err(|e| ProtocolError::ValidationError(format!("Serialization error: {}", e)))?;
265
266        // 1. JSON Schema validation
267        if let Err(errors) = self.schema.validate(&manifest_json) {
268            let error_msgs: Vec<String> = errors.map(|e| e.to_string()).collect();
269            return Err(ProtocolError::ValidationError(format!(
270                "JSON Schema validation failed:\n  - {}",
271                error_msgs.join("\n  - ")
272            ))
273            .with_hint(
274                "Check the official AI-Protocol documentation for the required file structure.",
275            ));
276        }
277
278        // 2. Perform basic logic validation
279        Self::validate_basic(manifest)?;
280
281        Ok(())
282    }
283
284    /// Basic validation without JSON Schema (fallback)
285    fn validate_basic(manifest: &ProtocolManifest) -> Result<(), ProtocolError> {
286        // Check required fields
287        if manifest.id.is_empty() {
288            return Err(ProtocolError::ValidationError(
289                "Protocol id is required".to_string(),
290            ));
291        }
292
293        if manifest.protocol_version.is_empty() {
294            return Err(ProtocolError::ValidationError(
295                "Protocol version is required".to_string(),
296            ));
297        }
298
299        if manifest.endpoint.base_url.is_empty() {
300            return Err(ProtocolError::ValidationError(
301                "Base URL is required".to_string(),
302            ));
303        }
304
305        // Validate protocol version
306        if !manifest.protocol_version.starts_with("1.") {
307            return Err(ProtocolError::InvalidVersion {
308                version: manifest.protocol_version.clone(),
309                max_supported: "1.x".to_string(),
310                hint: Some(
311                    "This version of the library only supports AI-Protocol v1.x manifests."
312                        .to_string(),
313                ),
314            });
315        }
316
317        Ok(())
318    }
319}
320
321impl Default for ProtocolValidator {
322    fn default() -> Self {
323        Self::new().expect("Failed to initialize validator")
324    }
325}