Skip to main content

chant/
validation.rs

1//! Output schema validation for spec agent outputs.
2//!
3//! This module provides JSON Schema validation for agent outputs when specs
4//! define an `output_schema` field in their frontmatter.
5//!
6//! # Doc Audit
7//! - audited: 2026-01-29
8//! - docs: reference/schema.md
9//! - ignore: false
10
11use anyhow::{Context, Result};
12use std::fs;
13use std::path::Path;
14
15/// Result of validating agent output against a JSON schema
16#[derive(Debug)]
17pub struct ValidationResult {
18    /// Whether the validation passed
19    pub is_valid: bool,
20    /// List of validation errors (empty if valid)
21    pub errors: Vec<String>,
22    /// The JSON that was extracted and validated (if any)
23    pub extracted_json: Option<serde_json::Value>,
24}
25
26/// Extract JSON from agent output text.
27///
28/// Tries multiple strategies:
29/// 1. Look for ```json code blocks
30/// 2. Look for bare ``` code blocks that contain JSON
31/// 3. Try parsing the entire output as JSON
32/// 4. Find JSON object/array patterns in the text
33pub fn extract_json_from_output(output: &str) -> Option<serde_json::Value> {
34    // Strategy 1: Look for ```json code blocks
35    if let Some(json) = extract_json_code_block(output, "json") {
36        return Some(json);
37    }
38
39    // Strategy 2: Look for bare ``` code blocks
40    if let Some(json) = extract_json_code_block(output, "") {
41        return Some(json);
42    }
43
44    // Strategy 3: Try parsing the entire output as JSON
45    if let Ok(json) = serde_json::from_str::<serde_json::Value>(output.trim()) {
46        return Some(json);
47    }
48
49    // Strategy 4: Find JSON object/array patterns
50    if let Some(json) = find_json_in_text(output) {
51        return Some(json);
52    }
53
54    None
55}
56
57/// Extract JSON from a fenced code block with optional language specifier
58fn extract_json_code_block(output: &str, lang: &str) -> Option<serde_json::Value> {
59    let mut in_fence = false;
60    let mut fence_content = String::new();
61    let mut fence_lang = String::new();
62
63    for line in output.lines() {
64        let trimmed = line.trim_start();
65        if let Some(after_fence) = trimmed.strip_prefix("```") {
66            if in_fence {
67                // End of fence
68                in_fence = false;
69                // Check if this is the right language (or any if lang is empty)
70                if lang.is_empty()
71                    || fence_lang.is_empty()
72                    || fence_lang.to_lowercase() == lang.to_lowercase()
73                {
74                    if let Ok(json) = serde_json::from_str::<serde_json::Value>(&fence_content) {
75                        return Some(json);
76                    }
77                }
78                fence_content.clear();
79                fence_lang.clear();
80            } else {
81                // Start of fence
82                in_fence = true;
83                fence_lang = after_fence.trim().to_string();
84            }
85        } else if in_fence {
86            if !fence_content.is_empty() {
87                fence_content.push('\n');
88            }
89            fence_content.push_str(line);
90        }
91    }
92
93    // Handle unclosed fence (try what we have)
94    if in_fence
95        && !fence_content.is_empty()
96        && (lang.is_empty()
97            || fence_lang.is_empty()
98            || fence_lang.to_lowercase() == lang.to_lowercase())
99    {
100        if let Ok(json) = serde_json::from_str::<serde_json::Value>(&fence_content) {
101            return Some(json);
102        }
103    }
104
105    None
106}
107
108/// Find JSON object or array patterns in text
109fn find_json_in_text(text: &str) -> Option<serde_json::Value> {
110    // Look for { ... } patterns (objects)
111    let mut brace_depth = 0;
112    let mut start_idx = None;
113
114    for (idx, ch) in text.char_indices() {
115        match ch {
116            '{' => {
117                if brace_depth == 0 {
118                    start_idx = Some(idx);
119                }
120                brace_depth += 1;
121            }
122            '}' => {
123                brace_depth -= 1;
124                if brace_depth == 0 {
125                    if let Some(start) = start_idx {
126                        let candidate = &text[start..=idx];
127                        if let Ok(json) = serde_json::from_str::<serde_json::Value>(candidate) {
128                            return Some(json);
129                        }
130                    }
131                }
132            }
133            _ => {}
134        }
135    }
136
137    // Look for [ ... ] patterns (arrays)
138    let mut bracket_depth = 0;
139    start_idx = None;
140
141    for (idx, ch) in text.char_indices() {
142        match ch {
143            '[' => {
144                if bracket_depth == 0 {
145                    start_idx = Some(idx);
146                }
147                bracket_depth += 1;
148            }
149            ']' => {
150                bracket_depth -= 1;
151                if bracket_depth == 0 {
152                    if let Some(start) = start_idx {
153                        let candidate = &text[start..=idx];
154                        if let Ok(json) = serde_json::from_str::<serde_json::Value>(candidate) {
155                            return Some(json);
156                        }
157                    }
158                }
159            }
160            _ => {}
161        }
162    }
163
164    None
165}
166
167/// Load and compile a JSON schema from a file path
168pub fn load_schema(schema_path: &Path) -> Result<jsonschema::Validator> {
169    let schema_content = fs::read_to_string(schema_path)
170        .with_context(|| format!("Failed to read schema file: {}", schema_path.display()))?;
171
172    let schema: serde_json::Value = serde_json::from_str(&schema_content)
173        .with_context(|| format!("Failed to parse schema as JSON: {}", schema_path.display()))?;
174
175    jsonschema::validator_for(&schema)
176        .map_err(|e| anyhow::anyhow!("Failed to compile JSON schema: {}", e))
177}
178
179/// Validate agent output against a JSON schema file.
180///
181/// # Arguments
182/// * `spec_id` - The spec ID (for error messages)
183/// * `schema_path` - Path to the JSON schema file
184/// * `agent_output` - The raw output from the agent
185///
186/// # Returns
187/// * `ValidationResult` with validation status and any errors
188pub fn validate_agent_output(
189    spec_id: &str,
190    schema_path: &Path,
191    agent_output: &str,
192) -> Result<ValidationResult> {
193    // Load and compile the schema
194    let validator = load_schema(schema_path)?;
195
196    // Extract JSON from the agent output
197    let extracted_json = match extract_json_from_output(agent_output) {
198        Some(json) => json,
199        None => {
200            return Ok(ValidationResult {
201                is_valid: false,
202                errors: vec![format!(
203                    "No JSON found in agent output for spec '{}'",
204                    spec_id
205                )],
206                extracted_json: None,
207            });
208        }
209    };
210
211    // Validate the extracted JSON against the schema using iter_errors
212    // to collect all validation errors
213    let error_iter = validator.iter_errors(&extracted_json);
214    let error_messages: Vec<String> = error_iter
215        .map(|e| {
216            let path = e.instance_path.to_string();
217            if path.is_empty() {
218                e.to_string()
219            } else {
220                format!("at '{}': {}", path, e)
221            }
222        })
223        .collect();
224
225    if error_messages.is_empty() {
226        Ok(ValidationResult {
227            is_valid: true,
228            errors: vec![],
229            extracted_json: Some(extracted_json),
230        })
231    } else {
232        Ok(ValidationResult {
233            is_valid: false,
234            errors: error_messages,
235            extracted_json: Some(extracted_json),
236        })
237    }
238}
239
240/// Read the agent log file for a spec and validate its output against the schema.
241///
242/// This is useful for batch validation (e.g., in `chant lint`).
243///
244/// # Arguments
245/// * `spec_id` - The spec ID
246/// * `schema_path` - Path to the JSON schema file
247/// * `logs_dir` - Path to the logs directory (typically `.chant/logs`)
248///
249/// # Returns
250/// * `Ok(Some(ValidationResult))` if log exists and validation was attempted
251/// * `Ok(None)` if no log file exists for this spec
252/// * `Err` if there was an error reading the log or schema
253pub fn validate_spec_output_from_log(
254    spec_id: &str,
255    schema_path: &Path,
256    logs_dir: &Path,
257) -> Result<Option<ValidationResult>> {
258    let log_path = logs_dir.join(format!("{}.log", spec_id));
259
260    if !log_path.exists() {
261        return Ok(None);
262    }
263
264    let log_content = fs::read_to_string(&log_path)
265        .with_context(|| format!("Failed to read log file: {}", log_path.display()))?;
266
267    let result = validate_agent_output(spec_id, schema_path, &log_content)?;
268    Ok(Some(result))
269}
270
271/// Generate an "Output Format" prompt section from a JSON schema.
272///
273/// This is injected into agent prompts when a spec has an `output_schema` field.
274pub fn generate_schema_prompt_section(schema_path: &Path) -> Result<String> {
275    let schema_content = fs::read_to_string(schema_path)
276        .with_context(|| format!("Failed to read schema file: {}", schema_path.display()))?;
277
278    let schema: serde_json::Value = serde_json::from_str(&schema_content)
279        .with_context(|| format!("Failed to parse schema as JSON: {}", schema_path.display()))?;
280
281    let mut section = String::new();
282    section.push_str("\n## Output Format\n\n");
283    section.push_str("Your output MUST include valid JSON matching this schema:\n\n");
284    section.push_str("```json\n");
285    section.push_str(&serde_json::to_string_pretty(&schema)?);
286    section.push_str("\n```\n\n");
287
288    // Extract required fields if present
289    if let Some(required) = schema.get("required").and_then(|r| r.as_array()) {
290        let required_fields: Vec<&str> = required.iter().filter_map(|v| v.as_str()).collect();
291        if !required_fields.is_empty() {
292            section.push_str(&format!(
293                "**Required fields:** {}\n\n",
294                required_fields.join(", ")
295            ));
296        }
297    }
298
299    // Generate an example if properties are defined
300    if let Some(properties) = schema.get("properties").and_then(|p| p.as_object()) {
301        section.push_str("**Example:**\n\n```json\n");
302        let example = generate_example_from_properties(properties, &schema);
303        section.push_str(&serde_json::to_string_pretty(&example)?);
304        section.push_str("\n```\n");
305    }
306
307    Ok(section)
308}
309
310/// Generate an example JSON object from schema properties
311fn generate_example_from_properties(
312    properties: &serde_json::Map<String, serde_json::Value>,
313    schema: &serde_json::Value,
314) -> serde_json::Value {
315    let required: Vec<&str> = schema
316        .get("required")
317        .and_then(|r| r.as_array())
318        .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
319        .unwrap_or_default();
320
321    let mut example = serde_json::Map::new();
322
323    for (key, prop_schema) in properties {
324        // Only include required fields in example, or first 3 properties
325        if !required.contains(&key.as_str()) && example.len() >= 3 {
326            continue;
327        }
328
329        let value = generate_example_value(prop_schema);
330        example.insert(key.clone(), value);
331    }
332
333    serde_json::Value::Object(example)
334}
335
336/// Generate an example value for a schema property
337fn generate_example_value(prop_schema: &serde_json::Value) -> serde_json::Value {
338    let prop_type = prop_schema.get("type").and_then(|t| t.as_str());
339
340    match prop_type {
341        Some("string") => {
342            // Check for pattern or enum
343            if let Some(enum_values) = prop_schema.get("enum").and_then(|e| e.as_array()) {
344                if let Some(first) = enum_values.first() {
345                    return first.clone();
346                }
347            }
348            serde_json::Value::String("...".to_string())
349        }
350        Some("number") | Some("integer") => serde_json::Value::Number(0.into()),
351        Some("boolean") => serde_json::Value::Bool(true),
352        Some("array") => {
353            let items_schema = prop_schema.get("items");
354            let item_example = items_schema
355                .map(generate_example_value)
356                .unwrap_or(serde_json::Value::String("...".to_string()));
357            serde_json::Value::Array(vec![item_example])
358        }
359        Some("object") => {
360            if let Some(props) = prop_schema.get("properties").and_then(|p| p.as_object()) {
361                generate_example_from_properties(props, prop_schema)
362            } else {
363                serde_json::Value::Object(serde_json::Map::new())
364            }
365        }
366        _ => serde_json::Value::Null,
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373    use tempfile::TempDir;
374
375    #[test]
376    fn test_extract_json_from_code_block() {
377        let output = r#"
378Here is the analysis:
379
380```json
381{
382  "spec_id": "C.1.1",
383  "findings": ["Found issue A", "Found issue B"],
384  "recommendation": "Fix the bug"
385}
386```
387
388That's my report.
389"#;
390
391        let json = extract_json_from_output(output).unwrap();
392        assert_eq!(json["spec_id"], "C.1.1");
393        assert!(json["findings"].is_array());
394    }
395
396    #[test]
397    fn test_extract_json_bare_output() {
398        let output = r#"{"spec_id": "test", "value": 42}"#;
399
400        let json = extract_json_from_output(output).unwrap();
401        assert_eq!(json["spec_id"], "test");
402        assert_eq!(json["value"], 42);
403    }
404
405    #[test]
406    fn test_extract_json_embedded_in_text() {
407        let output = r#"
408The analysis shows that the result is:
409{"status": "success", "count": 5}
410End of report.
411"#;
412
413        let json = extract_json_from_output(output).unwrap();
414        assert_eq!(json["status"], "success");
415        assert_eq!(json["count"], 5);
416    }
417
418    #[test]
419    fn test_extract_json_no_json() {
420        let output = "This is just plain text without any JSON content.";
421        assert!(extract_json_from_output(output).is_none());
422    }
423
424    #[test]
425    fn test_validate_valid_output() {
426        let tmp = TempDir::new().unwrap();
427        let schema_path = tmp.path().join("schema.json");
428
429        let schema = r#"{
430            "$schema": "https://json-schema.org/draft/2020-12/schema",
431            "type": "object",
432            "required": ["spec_id", "status"],
433            "properties": {
434                "spec_id": {"type": "string"},
435                "status": {"type": "string", "enum": ["success", "failure"]}
436            }
437        }"#;
438        fs::write(&schema_path, schema).unwrap();
439
440        let agent_output = r#"
441Here is my report:
442```json
443{"spec_id": "test-001", "status": "success"}
444```
445"#;
446
447        let result = validate_agent_output("test-001", &schema_path, agent_output).unwrap();
448        assert!(result.is_valid);
449        assert!(result.errors.is_empty());
450    }
451
452    #[test]
453    fn test_validate_invalid_output() {
454        let tmp = TempDir::new().unwrap();
455        let schema_path = tmp.path().join("schema.json");
456
457        let schema = r#"{
458            "$schema": "https://json-schema.org/draft/2020-12/schema",
459            "type": "object",
460            "required": ["spec_id"],
461            "properties": {
462                "spec_id": {"type": "string"}
463            }
464        }"#;
465        fs::write(&schema_path, schema).unwrap();
466
467        // Missing required field
468        let agent_output = r#"{"status": "done"}"#;
469
470        let result = validate_agent_output("test-001", &schema_path, agent_output).unwrap();
471        assert!(!result.is_valid);
472        assert!(!result.errors.is_empty());
473    }
474
475    #[test]
476    fn test_validate_no_json_in_output() {
477        let tmp = TempDir::new().unwrap();
478        let schema_path = tmp.path().join("schema.json");
479
480        let schema = r#"{
481            "type": "object",
482            "properties": {"x": {"type": "string"}}
483        }"#;
484        fs::write(&schema_path, schema).unwrap();
485
486        let agent_output = "Just some plain text, no JSON here.";
487
488        let result = validate_agent_output("test-001", &schema_path, agent_output).unwrap();
489        assert!(!result.is_valid);
490        assert!(result.errors[0].contains("No JSON found"));
491    }
492
493    #[test]
494    fn test_generate_schema_prompt_section() {
495        let tmp = TempDir::new().unwrap();
496        let schema_path = tmp.path().join("schema.json");
497
498        let schema = r#"{
499            "$schema": "https://json-schema.org/draft/2020-12/schema",
500            "type": "object",
501            "required": ["spec_id", "findings"],
502            "properties": {
503                "spec_id": {"type": "string"},
504                "findings": {"type": "array", "items": {"type": "string"}}
505            }
506        }"#;
507        fs::write(&schema_path, schema).unwrap();
508
509        let section = generate_schema_prompt_section(&schema_path).unwrap();
510
511        assert!(section.contains("## Output Format"));
512        assert!(section.contains("spec_id"));
513        assert!(section.contains("Required fields"));
514        assert!(section.contains("Example"));
515    }
516}