acp/
schema.rs

1//! @acp:module "Schema Validation"
2//! @acp:summary "JSON schema validation for ACP files using jsonschema crate"
3//! @acp:domain cli
4//! @acp:layer utility
5//!
6//! Provides comprehensive validation of ACP JSON files against their schemas.
7//! Uses embedded schemas compiled once at runtime for efficient repeated validation.
8
9use crate::attempts::AttemptTracker;
10use crate::error::{AcpError, Result};
11use jsonschema::{Draft, Retrieve, Uri, Validator};
12use std::sync::OnceLock;
13
14// Embed schemas at compile time (copied from acp-spec submodule by build.rs)
15static CACHE_SCHEMA_STR: &str = include_str!("../schemas/v1/cache.schema.json");
16static VARS_SCHEMA_STR: &str = include_str!("../schemas/v1/vars.schema.json");
17static CONFIG_SCHEMA_STR: &str = include_str!("../schemas/v1/config.schema.json");
18static ATTEMPTS_SCHEMA_STR: &str = include_str!("../schemas/v1/attempts.schema.json");
19static SYNC_SCHEMA_STR: &str = include_str!("../schemas/v1/sync.schema.json");
20static PRIMER_SCHEMA_STR: &str = include_str!("../schemas/v1/primer.schema.json");
21
22// Compiled schema validators (lazy initialization)
23static CACHE_VALIDATOR: OnceLock<Validator> = OnceLock::new();
24static VARS_VALIDATOR: OnceLock<Validator> = OnceLock::new();
25static CONFIG_VALIDATOR: OnceLock<Validator> = OnceLock::new();
26static ATTEMPTS_VALIDATOR: OnceLock<Validator> = OnceLock::new();
27static SYNC_VALIDATOR: OnceLock<Validator> = OnceLock::new();
28static PRIMER_VALIDATOR: OnceLock<Validator> = OnceLock::new();
29
30/// A retriever that returns embedded schemas for ACP URLs and fails for others
31struct AcpRetriever;
32
33impl Retrieve for AcpRetriever {
34    fn retrieve(
35        &self,
36        uri: &Uri<String>,
37    ) -> std::result::Result<serde_json::Value, Box<dyn std::error::Error + Send + Sync>> {
38        let uri_str = uri.as_str();
39
40        // Return embedded schemas for known ACP URLs
41        if uri_str == "https://acp-protocol.dev/schemas/v1/sync.schema.json" {
42            let mut schema: serde_json::Value = serde_json::from_str(SYNC_SCHEMA_STR)
43                .map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
44            // Remove $schema to prevent recursive meta-schema lookup
45            if let Some(obj) = schema.as_object_mut() {
46                obj.remove("$schema");
47            }
48            return Ok(schema);
49        }
50
51        // For meta-schemas, return a permissive empty schema
52        // This allows validation without fetching external resources
53        if uri_str.starts_with("https://json-schema.org/") {
54            return Ok(serde_json::json!({}));
55        }
56
57        Err(format!("Unknown schema URI: {}", uri_str).into())
58    }
59}
60
61/// Compile a JSON Schema from a string using Draft 7 with embedded retriever
62fn compile_schema(schema_str: &str, name: &str) -> Validator {
63    let mut schema: serde_json::Value = serde_json::from_str(schema_str)
64        .unwrap_or_else(|e| panic!("Invalid {} schema JSON: {}", name, e));
65
66    // Remove $schema field to prevent meta-schema lookup
67    // We explicitly set Draft7 below
68    if let Some(obj) = schema.as_object_mut() {
69        obj.remove("$schema");
70    }
71
72    // Build validator with custom retriever to avoid external fetches
73    jsonschema::options()
74        .with_draft(Draft::Draft7)
75        .with_retriever(AcpRetriever)
76        .build(&schema)
77        .unwrap_or_else(|e| panic!("Failed to compile {} schema: {}", name, e))
78}
79
80fn get_cache_validator() -> &'static Validator {
81    CACHE_VALIDATOR.get_or_init(|| compile_schema(CACHE_SCHEMA_STR, "cache"))
82}
83
84fn get_vars_validator() -> &'static Validator {
85    VARS_VALIDATOR.get_or_init(|| compile_schema(VARS_SCHEMA_STR, "vars"))
86}
87
88fn get_config_validator() -> &'static Validator {
89    CONFIG_VALIDATOR.get_or_init(|| compile_schema(CONFIG_SCHEMA_STR, "config"))
90}
91
92fn get_attempts_validator() -> &'static Validator {
93    ATTEMPTS_VALIDATOR.get_or_init(|| compile_schema(ATTEMPTS_SCHEMA_STR, "attempts"))
94}
95
96fn get_sync_validator() -> &'static Validator {
97    SYNC_VALIDATOR.get_or_init(|| compile_schema(SYNC_SCHEMA_STR, "sync"))
98}
99
100fn get_primer_validator() -> &'static Validator {
101    PRIMER_VALIDATOR.get_or_init(|| compile_schema(PRIMER_SCHEMA_STR, "primer"))
102}
103
104/// Collect validation errors into a formatted string
105fn collect_errors<'a>(
106    errors: impl Iterator<Item = jsonschema::error::ValidationError<'a>>,
107) -> String {
108    errors
109        .map(|e| format!("{}", e))
110        .collect::<Vec<_>>()
111        .join("; ")
112}
113
114/// @acp:summary "Validate cache file against schema"
115pub fn validate_cache(json: &str) -> Result<()> {
116    let value: serde_json::Value = serde_json::from_str(json)?;
117    let validator = get_cache_validator();
118
119    let errors: Vec<_> = validator.iter_errors(&value).collect();
120    if !errors.is_empty() {
121        return Err(AcpError::SchemaValidation(collect_errors(
122            errors.into_iter(),
123        )));
124    }
125
126    // Also validate with serde for type checking
127    let _: crate::cache::Cache = serde_json::from_value(value)?;
128    Ok(())
129}
130
131/// @acp:summary "Validate vars file against schema"
132pub fn validate_vars(json: &str) -> Result<()> {
133    let value: serde_json::Value = serde_json::from_str(json)?;
134    let validator = get_vars_validator();
135
136    let errors: Vec<_> = validator.iter_errors(&value).collect();
137    if !errors.is_empty() {
138        return Err(AcpError::SchemaValidation(collect_errors(
139            errors.into_iter(),
140        )));
141    }
142
143    // Also validate with serde for type checking
144    let _: crate::vars::VarsFile = serde_json::from_value(value)?;
145    Ok(())
146}
147
148/// @acp:summary "Validate config file against schema"
149pub fn validate_config(json: &str) -> Result<()> {
150    let value: serde_json::Value = serde_json::from_str(json)?;
151    let validator = get_config_validator();
152
153    let errors: Vec<_> = validator.iter_errors(&value).collect();
154    if !errors.is_empty() {
155        return Err(AcpError::SchemaValidation(collect_errors(
156            errors.into_iter(),
157        )));
158    }
159
160    // Also validate with serde for type checking
161    let _: crate::config::Config = serde_json::from_value(value)?;
162    Ok(())
163}
164
165/// @acp:summary "Validate attempts file against schema"
166pub fn validate_attempts(json: &str) -> Result<()> {
167    let value: serde_json::Value = serde_json::from_str(json)?;
168    let validator = get_attempts_validator();
169
170    let errors: Vec<_> = validator.iter_errors(&value).collect();
171    if !errors.is_empty() {
172        return Err(AcpError::SchemaValidation(collect_errors(
173            errors.into_iter(),
174        )));
175    }
176
177    // Also validate with serde for type checking
178    let tracker: AttemptTracker = serde_json::from_value(value)?;
179
180    // Semantic validation
181    validate_attempts_semantic(&tracker)?;
182
183    Ok(())
184}
185
186/// @acp:summary "Validate sync config against schema"
187pub fn validate_sync(json: &str) -> Result<()> {
188    let value: serde_json::Value = serde_json::from_str(json)?;
189    let validator = get_sync_validator();
190
191    let errors: Vec<_> = validator.iter_errors(&value).collect();
192    if !errors.is_empty() {
193        return Err(AcpError::SchemaValidation(collect_errors(
194            errors.into_iter(),
195        )));
196    }
197
198    // Semantic validation via JSON value (no Rust struct yet)
199    validate_sync_semantic(&value)?;
200
201    Ok(())
202}
203
204/// @acp:summary "Validate primer definition against schema"
205pub fn validate_primer(json: &str) -> Result<()> {
206    let value: serde_json::Value = serde_json::from_str(json)?;
207    let validator = get_primer_validator();
208
209    let errors: Vec<_> = validator.iter_errors(&value).collect();
210    if !errors.is_empty() {
211        return Err(AcpError::SchemaValidation(collect_errors(
212            errors.into_iter(),
213        )));
214    }
215
216    // Semantic validation via JSON value (no Rust struct yet)
217    validate_primer_semantic(&value)?;
218
219    Ok(())
220}
221
222// ============================================================================
223// Semantic Validation Functions
224// ============================================================================
225
226/// @acp:summary "Semantic validation for attempts that cannot be expressed in JSON Schema"
227fn validate_attempts_semantic(tracker: &AttemptTracker) -> Result<()> {
228    // Check lines_changed order: start_line <= end_line
229    for (attempt_id, attempt) in &tracker.attempts {
230        for file in &attempt.files {
231            if let Some([start, end]) = file.lines_changed {
232                if start > end {
233                    return Err(AcpError::SemanticValidation(format!(
234                        "In attempt '{}', file '{}': lines_changed start ({}) > end ({})",
235                        attempt_id, file.path, start, end
236                    )));
237                }
238            }
239        }
240    }
241
242    // Check history entries have valid ordering
243    for (i, entry) in tracker.history.iter().enumerate() {
244        if entry.started_at > entry.ended_at {
245            return Err(AcpError::SemanticValidation(format!(
246                "History entry {} ({}): started_at > ended_at",
247                i, entry.id
248            )));
249        }
250    }
251
252    Ok(())
253}
254
255/// @acp:summary "Semantic validation for sync config"
256fn validate_sync_semantic(value: &serde_json::Value) -> Result<()> {
257    // Warn (but don't error) on tool overlap between tools and exclude arrays
258    if let (Some(tools), Some(exclude)) = (
259        value.get("tools").and_then(|v| v.as_array()),
260        value.get("exclude").and_then(|v| v.as_array()),
261    ) {
262        let tool_set: std::collections::HashSet<_> =
263            tools.iter().filter_map(|v| v.as_str()).collect();
264
265        let overlap: Vec<_> = exclude
266            .iter()
267            .filter_map(|v| v.as_str())
268            .filter(|t| tool_set.contains(t))
269            .collect();
270
271        if !overlap.is_empty() {
272            tracing::warn!(
273                "Tools appear in both 'tools' and 'exclude' arrays: {:?}. Behavior is undefined.",
274                overlap
275            );
276        }
277    }
278
279    Ok(())
280}
281
282/// @acp:summary "Semantic validation for primer definitions"
283fn validate_primer_semantic(value: &serde_json::Value) -> Result<()> {
284    if let Some(sections) = value.get("sections").and_then(|v| v.as_array()) {
285        let section_ids: std::collections::HashSet<_> = sections
286            .iter()
287            .filter_map(|s| s.get("id"))
288            .filter_map(|id| id.as_str())
289            .collect();
290
291        for section in sections {
292            let section_id = section
293                .get("id")
294                .and_then(|id| id.as_str())
295                .unwrap_or("unknown");
296
297            // Check for self-references in conflictsWith
298            if let Some(conflicts) = section.get("conflictsWith").and_then(|v| v.as_array()) {
299                for conflict in conflicts {
300                    if let Some(conflict_id) = conflict.as_str() {
301                        if conflict_id == section_id {
302                            return Err(AcpError::SemanticValidation(format!(
303                                "Section '{}' has self-reference in conflictsWith",
304                                section_id
305                            )));
306                        }
307                    }
308                }
309            }
310
311            // Check for circular dependencies (simple check - warns if dependsOn references non-existent sections)
312            if let Some(depends) = section.get("dependsOn").and_then(|v| v.as_array()) {
313                for dep in depends {
314                    if let Some(dep_id) = dep.as_str() {
315                        if !section_ids.contains(dep_id) {
316                            tracing::warn!(
317                                "Section '{}' depends on '{}' which does not exist",
318                                section_id,
319                                dep_id
320                            );
321                        }
322                    }
323                }
324            }
325        }
326
327        // Full cycle detection using DFS
328        if let Err(cycle) = detect_dependency_cycles(sections) {
329            return Err(AcpError::SemanticValidation(format!(
330                "Circular dependency detected in primer sections: {}",
331                cycle
332            )));
333        }
334    }
335
336    Ok(())
337}
338
339/// Detect cycles in section dependencies using DFS
340fn detect_dependency_cycles(sections: &[serde_json::Value]) -> std::result::Result<(), String> {
341    use std::collections::{HashMap, HashSet};
342
343    // Build adjacency list
344    let mut deps: HashMap<&str, Vec<&str>> = HashMap::new();
345
346    for section in sections {
347        if let Some(id) = section.get("id").and_then(|v| v.as_str()) {
348            let dep_list = section
349                .get("dependsOn")
350                .and_then(|v| v.as_array())
351                .map(|arr| arr.iter().filter_map(|d| d.as_str()).collect())
352                .unwrap_or_default();
353            deps.insert(id, dep_list);
354        }
355    }
356
357    // DFS for cycle detection
358    let mut visited: HashSet<&str> = HashSet::new();
359    let mut rec_stack: HashSet<&str> = HashSet::new();
360    let mut path: Vec<&str> = Vec::new();
361
362    fn dfs<'a>(
363        node: &'a str,
364        deps: &HashMap<&'a str, Vec<&'a str>>,
365        visited: &mut HashSet<&'a str>,
366        rec_stack: &mut HashSet<&'a str>,
367        path: &mut Vec<&'a str>,
368    ) -> std::result::Result<(), String> {
369        visited.insert(node);
370        rec_stack.insert(node);
371        path.push(node);
372
373        if let Some(neighbors) = deps.get(node) {
374            for &neighbor in neighbors {
375                if !visited.contains(neighbor) {
376                    dfs(neighbor, deps, visited, rec_stack, path)?;
377                } else if rec_stack.contains(neighbor) {
378                    // Found cycle - construct path
379                    let cycle_start = path.iter().position(|&n| n == neighbor).unwrap();
380                    let cycle: Vec<_> = path[cycle_start..].to_vec();
381                    return Err(format!("{} -> {}", cycle.join(" -> "), neighbor));
382                }
383            }
384        }
385
386        path.pop();
387        rec_stack.remove(node);
388        Ok(())
389    }
390
391    for &node in deps.keys() {
392        if !visited.contains(node) {
393            dfs(node, &deps, &mut visited, &mut rec_stack, &mut path)?;
394        }
395    }
396
397    Ok(())
398}
399
400/// @acp:summary "Detect schema type from filename"
401pub fn detect_schema_type(filename: &str) -> Option<&'static str> {
402    let lower = filename.to_lowercase();
403
404    if lower.contains("cache") || lower.ends_with(".acp.cache.json") {
405        Some("cache")
406    } else if lower.contains("vars") || lower.ends_with(".acp.vars.json") {
407        Some("vars")
408    } else if lower.contains("config") || lower.ends_with(".acp.config.json") {
409        Some("config")
410    } else if lower.contains("attempts") || lower.ends_with("acp.attempts.json") {
411        Some("attempts")
412    } else if lower.contains("sync") || lower.ends_with("acp.sync.json") {
413        Some("sync")
414    } else if lower.contains("primer") {
415        Some("primer")
416    } else {
417        None
418    }
419}
420
421/// @acp:summary "Validate JSON against a specific schema type"
422pub fn validate_by_type(json: &str, schema_type: &str) -> Result<()> {
423    match schema_type {
424        "cache" => validate_cache(json),
425        "vars" => validate_vars(json),
426        "config" => validate_config(json),
427        "attempts" => validate_attempts(json),
428        "sync" => validate_sync(json),
429        "primer" => validate_primer(json),
430        _ => Err(AcpError::Other(format!(
431            "Unknown schema type: {}",
432            schema_type
433        ))),
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440
441    #[test]
442    fn test_detect_schema_type() {
443        assert_eq!(detect_schema_type(".acp.cache.json"), Some("cache"));
444        assert_eq!(detect_schema_type(".acp/acp.vars.json"), Some("vars"));
445        assert_eq!(detect_schema_type(".acp.config.json"), Some("config"));
446        assert_eq!(
447            detect_schema_type(".acp/acp.attempts.json"),
448            Some("attempts")
449        );
450        assert_eq!(detect_schema_type("acp.sync.json"), Some("sync"));
451        assert_eq!(detect_schema_type("primer.defaults.json"), Some("primer"));
452        assert_eq!(detect_schema_type("random.json"), None);
453    }
454
455    #[test]
456    fn test_dependency_cycle_detection() {
457        // No cycle
458        let sections: Vec<serde_json::Value> = serde_json::from_str(
459            r#"[
460            {"id": "a", "dependsOn": ["b"]},
461            {"id": "b", "dependsOn": ["c"]},
462            {"id": "c"}
463        ]"#,
464        )
465        .unwrap();
466        assert!(detect_dependency_cycles(&sections).is_ok());
467
468        // With cycle
469        let sections_with_cycle: Vec<serde_json::Value> = serde_json::from_str(
470            r#"[
471            {"id": "a", "dependsOn": ["b"]},
472            {"id": "b", "dependsOn": ["c"]},
473            {"id": "c", "dependsOn": ["a"]}
474        ]"#,
475        )
476        .unwrap();
477        assert!(detect_dependency_cycles(&sections_with_cycle).is_err());
478    }
479}