Skip to main content

nika_engine/ast/
schema_validator.rs

1//! Workflow Schema Validator
2//!
3//! Validates workflow YAML against the Nika JSON Schema before serde parsing.
4//!
5//! ## Design
6//!
7//! - Uses embedded schema (compiled at build time)
8//! - Validates YAML structure via JSON Schema
9//! - Returns detailed errors with paths and suggestions
10//!
11//! ## Usage
12//!
13//! ```rust,ignore
14//! use nika::ast::schema_validator::WorkflowSchemaValidator;
15//!
16//! let validator = WorkflowSchemaValidator::new()?;
17//! validator.validate_yaml(yaml_str)?;
18//! ```
19
20use crate::error::NikaError;
21use crate::serde_yaml;
22use jsonschema::Validator;
23use serde_json::Value;
24use std::sync::OnceLock;
25
26/// Embedded schema JSON (compiled at build time)
27const SCHEMA_JSON: &str = include_str!("../../schemas/nika-workflow.schema.json");
28
29/// Global schema validator instance (lazy initialization)
30static VALIDATOR: OnceLock<Result<Validator, String>> = OnceLock::new();
31
32/// Workflow schema validator
33///
34/// Validates workflow YAML against the Nika JSON Schema.
35pub struct WorkflowSchemaValidator {
36    /// Compiled JSON Schema validator
37    validator: &'static Validator,
38}
39
40impl WorkflowSchemaValidator {
41    /// Create a new workflow schema validator
42    ///
43    /// Uses a cached global validator for efficiency.
44    pub fn new() -> Result<Self, NikaError> {
45        let validator_result = VALIDATOR.get_or_init(|| {
46            let schema: Value = serde_json::from_str(SCHEMA_JSON)
47                .map_err(|e| format!("Failed to parse schema JSON: {}", e))?;
48            Validator::new(&schema).map_err(|e| format!("Failed to compile schema: {}", e))
49        });
50
51        match validator_result {
52            Ok(validator) => Ok(Self { validator }),
53            Err(e) => Err(NikaError::ValidationError { reason: e.clone() }),
54        }
55    }
56
57    /// Validate YAML string against the workflow schema
58    ///
59    /// # Arguments
60    ///
61    /// * `yaml` - YAML content to validate
62    ///
63    /// # Returns
64    ///
65    /// * `Ok(())` if valid
66    /// * `Err(NikaError::SchemaValidationFailed)` with detailed errors if invalid
67    pub fn validate_yaml(&self, yaml: &str) -> Result<(), NikaError> {
68        // Parse YAML to JSON Value (serde_yaml can handle this)
69        let value: Value = serde_yaml::from_str(yaml).map_err(|e| NikaError::ParseError {
70            details: format!("YAML parse error: {}", e),
71        })?;
72
73        self.validate_value(&value)
74    }
75
76    /// Validate a JSON Value against the workflow schema
77    ///
78    /// # Arguments
79    ///
80    /// * `value` - JSON value to validate
81    ///
82    /// # Returns
83    ///
84    /// * `Ok(())` if valid
85    /// * `Err(NikaError::SchemaValidationFailed)` with detailed errors if invalid
86    pub fn validate_value(&self, value: &Value) -> Result<(), NikaError> {
87        let errors: Vec<SchemaError> = self
88            .validator
89            .iter_errors(value)
90            .map(|e| SchemaError {
91                path: e.instance_path.to_string(),
92                message: e.to_string(),
93                kind: classify_error(&e),
94            })
95            .collect();
96
97        if errors.is_empty() {
98            Ok(())
99        } else {
100            Err(NikaError::SchemaValidationFailed { errors })
101        }
102    }
103}
104
105/// Schema validation error details
106#[derive(Debug, Clone)]
107pub struct SchemaError {
108    /// JSON pointer path to the error (e.g., "/tasks/0/invoke/params")
109    pub path: String,
110    /// Human-readable error message
111    pub message: String,
112    /// Error classification
113    pub kind: SchemaErrorKind,
114}
115
116/// Schema error classification
117#[derive(Debug, Clone, PartialEq)]
118pub enum SchemaErrorKind {
119    /// Missing required field
120    MissingRequired { field: String },
121    /// Unknown field (not in schema)
122    UnknownField { field: String },
123    /// Type mismatch
124    TypeMismatch { expected: String, actual: String },
125    /// Invalid enum value
126    InvalidEnum { value: String, allowed: Vec<String> },
127    /// Generic validation error
128    Other,
129}
130
131/// Classify a JSON Schema error into a SchemaErrorKind
132fn classify_error(error: &jsonschema::ValidationError) -> SchemaErrorKind {
133    let error_str = format!("{:?}", error.kind);
134    let message = error.to_string();
135
136    if error_str.contains("Required") {
137        // Extract field name from message
138        let field = extract_quoted(&message).unwrap_or_else(|| "unknown".to_string());
139        SchemaErrorKind::MissingRequired { field }
140    } else if error_str.contains("AdditionalProperties") {
141        // Extract field from path
142        let path = error.instance_path.to_string();
143        let field = path
144            .rsplit('/')
145            .next()
146            .filter(|s| !s.is_empty())
147            .unwrap_or("unknown")
148            .to_string();
149        SchemaErrorKind::UnknownField { field }
150    } else if error_str.contains("Type") {
151        SchemaErrorKind::TypeMismatch {
152            expected: extract_type(&message).unwrap_or_else(|| "expected".to_string()),
153            actual: "actual".to_string(),
154        }
155    } else if error_str.contains("Enum") || error_str.contains("Pattern") {
156        // Pattern failures on constrained fields (like schema version) are semantically enums
157        SchemaErrorKind::InvalidEnum {
158            value: error.instance.to_string(),
159            allowed: vec![],
160        }
161    } else {
162        SchemaErrorKind::Other
163    }
164}
165
166/// Extract quoted string from error message
167fn extract_quoted(msg: &str) -> Option<String> {
168    // Pattern: "fieldname" or 'fieldname'
169    if let Some(start) = msg.find('"') {
170        if let Some(end) = msg[start + 1..].find('"') {
171            return Some(msg[start + 1..start + 1 + end].to_string());
172        }
173    }
174    if let Some(start) = msg.find('\'') {
175        if let Some(end) = msg[start + 1..].find('\'') {
176            return Some(msg[start + 1..start + 1 + end].to_string());
177        }
178    }
179    None
180}
181
182/// Extract type name from error message
183fn extract_type(msg: &str) -> Option<String> {
184    for t in ["string", "integer", "number", "boolean", "array", "object"] {
185        if msg.contains(t) {
186            return Some(t.to_string());
187        }
188    }
189    None
190}
191
192// ============================================================================
193// TESTS (TDD)
194// ============================================================================
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use serde_json::json;
200
201    // ========================================================================
202    // Test: Validator creation succeeds
203    // ========================================================================
204    #[test]
205    fn test_validator_creation_succeeds() {
206        let validator = WorkflowSchemaValidator::new();
207        assert!(
208            validator.is_ok(),
209            "Validator should be created successfully"
210        );
211    }
212
213    // ========================================================================
214    // Test: Valid minimal workflow passes
215    // ========================================================================
216    #[test]
217    fn test_valid_minimal_workflow_passes() {
218        let validator = WorkflowSchemaValidator::new().unwrap();
219        let yaml = r#"
220schema: "nika/workflow@0.12"
221tasks:
222  - id: step1
223    infer: "Hello world"
224"#;
225        let result = validator.validate_yaml(yaml);
226        assert!(result.is_ok(), "Valid workflow should pass: {:?}", result);
227    }
228
229    // ========================================================================
230    // Test: Missing schema field fails
231    // ========================================================================
232    #[test]
233    fn test_missing_schema_field_fails() {
234        let validator = WorkflowSchemaValidator::new().unwrap();
235        let yaml = r#"
236tasks:
237  - id: step1
238    infer: "Hello"
239"#;
240        let result = validator.validate_yaml(yaml);
241        assert!(result.is_err(), "Missing schema should fail");
242
243        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
244            assert!(!errors.is_empty());
245            assert!(matches!(
246                errors[0].kind,
247                SchemaErrorKind::MissingRequired { ref field } if field == "schema"
248            ));
249        } else {
250            panic!("Expected SchemaValidationFailed error");
251        }
252    }
253
254    // ========================================================================
255    // Test: Invalid schema version fails
256    // ========================================================================
257    #[test]
258    fn test_invalid_schema_version_fails() {
259        let validator = WorkflowSchemaValidator::new().unwrap();
260        let yaml = r#"
261schema: "nika/workflow@9.9"
262tasks:
263  - id: step1
264    infer: "Hello"
265"#;
266        let result = validator.validate_yaml(yaml);
267        assert!(result.is_err(), "Invalid schema version should fail");
268
269        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
270            assert!(!errors.is_empty());
271            assert!(matches!(
272                errors[0].kind,
273                SchemaErrorKind::InvalidEnum { .. }
274            ));
275        } else {
276            panic!("Expected SchemaValidationFailed error");
277        }
278    }
279
280    // ========================================================================
281    // Test: Unknown field in invoke params fails
282    // ========================================================================
283    #[test]
284    fn test_unknown_field_in_invoke_params_fails() {
285        let validator = WorkflowSchemaValidator::new().unwrap();
286        let yaml = r#"
287schema: "nika/workflow@0.12"
288mcp:
289  novanet:
290    command: cargo
291    args: [run]
292tasks:
293  - id: describe
294    invoke:
295      mcp: novanet
296      tool: novanet_describe
297      params:
298        unknown_field: "value"
299"#;
300        let result = validator.validate_yaml(yaml);
301        // Note: params is not additionalProperties: false, so this may pass
302        // But the key insight is we can validate the overall structure
303        // The user's original issue was about invoke.params structure
304        // Actually looking at the schema, params has additionalProperties: true
305        // So this test should pass (params can have any fields)
306        assert!(
307            result.is_ok(),
308            "Params can have any fields (additionalProperties: true)"
309        );
310    }
311
312    // ========================================================================
313    // Test: Missing required invoke.mcp or server fails
314    // ========================================================================
315    #[test]
316    fn test_missing_required_invoke_mcp_fails() {
317        let validator = WorkflowSchemaValidator::new().unwrap();
318        let yaml = r#"
319schema: "nika/workflow@0.12"
320tasks:
321  - id: describe
322    invoke:
323      tool: novanet_describe
324"#;
325        let result = validator.validate_yaml(yaml);
326        assert!(result.is_err(), "Missing invoke.mcp/server should fail");
327
328        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
329            assert!(!errors.is_empty());
330            // With oneOf schema (mcp+tool | mcp+resource | server+tool | server+resource),
331            // missing server identifier triggers oneOf validation failure or MissingRequired
332            // We just need to ensure validation fails appropriately
333            let has_invoke_error = errors
334                .iter()
335                .any(|e| e.path.contains("invoke") || e.path.contains("tasks"));
336            assert!(
337                has_invoke_error,
338                "Should have error related to invoke params: {:?}",
339                errors
340            );
341        } else {
342            panic!("Expected SchemaValidationFailed error");
343        }
344    }
345
346    // ========================================================================
347    // Test: Unknown field at workflow level fails
348    // ========================================================================
349    #[test]
350    fn test_unknown_field_at_workflow_level_fails() {
351        let validator = WorkflowSchemaValidator::new().unwrap();
352        let yaml = r#"
353schema: "nika/workflow@0.12"
354unknown_field: "should fail"
355tasks:
356  - id: step1
357    infer: "Hello"
358"#;
359        let result = validator.validate_yaml(yaml);
360        assert!(
361            result.is_err(),
362            "Unknown field at workflow level should fail"
363        );
364
365        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
366            assert!(!errors.is_empty());
367            let has_unknown_error = errors
368                .iter()
369                .any(|e| matches!(&e.kind, SchemaErrorKind::UnknownField { .. }));
370            assert!(has_unknown_error, "Should have UnknownField error");
371        } else {
372            panic!("Expected SchemaValidationFailed error");
373        }
374    }
375
376    // ========================================================================
377    // Test: Valid invoke workflow passes
378    // ========================================================================
379    #[test]
380    fn test_valid_invoke_workflow_passes() {
381        let validator = WorkflowSchemaValidator::new().unwrap();
382        let yaml = r#"
383schema: "nika/workflow@0.12"
384provider: claude
385mcp:
386  novanet:
387    command: cargo
388    args: [run, -p, novanet-mcp]
389    env:
390      NEO4J_URI: bolt://localhost:7687
391tasks:
392  - id: describe
393    invoke:
394      mcp: novanet
395      tool: novanet_describe
396      params: {}
397    output:
398      format: json
399
400  - id: generate
401    with:
402      schema: describe
403    invoke:
404      mcp: novanet
405      tool: novanet_context
406      params:
407        entity: qr-code
408        locale: fr-FR
409        forms:
410          - text
411          - title
412    output:
413      format: json
414"#;
415        let result = validator.validate_yaml(yaml);
416        assert!(
417            result.is_ok(),
418            "Valid invoke workflow should pass: {:?}",
419            result
420        );
421    }
422
423    // ========================================================================
424    // Test: Task without any verb fails
425    // ========================================================================
426    #[test]
427    fn test_task_without_verb_fails() {
428        let validator = WorkflowSchemaValidator::new().unwrap();
429        let yaml = r#"
430schema: "nika/workflow@0.12"
431tasks:
432  - id: step1
433    output:
434      format: json
435"#;
436        let result = validator.validate_yaml(yaml);
437        assert!(result.is_err(), "Task without verb should fail");
438    }
439
440    // ========================================================================
441    // Test: Multiple verbs in task fails (oneOf)
442    // ========================================================================
443    #[test]
444    fn test_multiple_verbs_in_task_fails() {
445        let validator = WorkflowSchemaValidator::new().unwrap();
446        let yaml = r#"
447schema: "nika/workflow@0.12"
448tasks:
449  - id: step1
450    infer: "Hello"
451    exec: "echo done"
452"#;
453        let result = validator.validate_yaml(yaml);
454        assert!(result.is_err(), "Multiple verbs should fail");
455    }
456
457    // ========================================================================
458    // Test: Valid agent params passes
459    // ========================================================================
460    #[test]
461    fn test_valid_agent_params_passes() {
462        let validator = WorkflowSchemaValidator::new().unwrap();
463        let yaml = r#"
464schema: "nika/workflow@0.12"
465mcp:
466  novanet:
467    command: cargo
468tasks:
469  - id: orchestrator
470    agent:
471      prompt: "Generate content"
472      mcp:
473        - novanet
474      max_turns: 5
475      depth_limit: 3
476      extended_thinking: true
477      thinking_budget: 8192
478"#;
479        let result = validator.validate_yaml(yaml);
480        assert!(
481            result.is_ok(),
482            "Valid agent params should pass: {:?}",
483            result
484        );
485    }
486
487    // ========================================================================
488    // Test: Invalid depth_limit fails
489    // ========================================================================
490    #[test]
491    fn test_invalid_depth_limit_fails() {
492        let validator = WorkflowSchemaValidator::new().unwrap();
493        let yaml = r#"
494schema: "nika/workflow@0.12"
495tasks:
496  - id: orchestrator
497    agent:
498      prompt: "Generate content"
499      depth_limit: 100
500"#;
501        let result = validator.validate_yaml(yaml);
502        assert!(result.is_err(), "depth_limit > 10 should fail");
503    }
504
505    // ========================================================================
506    // Test: Valid decompose spec passes
507    // ========================================================================
508    #[test]
509    fn test_valid_decompose_spec_passes() {
510        let validator = WorkflowSchemaValidator::new().unwrap();
511        let yaml = r#"
512schema: "nika/workflow@0.12"
513tasks:
514  - id: expand_entities
515    decompose:
516      strategy: semantic
517      traverse: HAS_CHILD
518      source: "$entity"
519      max_items: 10
520    infer: "Generate for {{with.item}}"
521"#;
522        let result = validator.validate_yaml(yaml);
523        assert!(
524            result.is_ok(),
525            "Valid decompose spec should pass: {:?}",
526            result
527        );
528    }
529
530    // ========================================================================
531    // Test: Invalid decompose strategy fails
532    // ========================================================================
533    #[test]
534    fn test_invalid_decompose_strategy_fails() {
535        let validator = WorkflowSchemaValidator::new().unwrap();
536        let yaml = r#"
537schema: "nika/workflow@0.12"
538tasks:
539  - id: expand_entities
540    decompose:
541      strategy: invalid_strategy
542      traverse: HAS_CHILD
543      source: "$entity"
544    infer: "Generate for {{with.item}}"
545"#;
546        let result = validator.validate_yaml(yaml);
547        assert!(result.is_err(), "Invalid decompose strategy should fail");
548    }
549
550    // ========================================================================
551    // Test: Valid lazy binding passes
552    // ========================================================================
553    #[test]
554    fn test_valid_lazy_binding_passes() {
555        let validator = WorkflowSchemaValidator::new().unwrap();
556        let yaml = r#"
557schema: "nika/workflow@0.12"
558tasks:
559  - id: step1
560    infer: "Hello"
561
562  - id: step2
563    with:
564      eager: step1
565      lazy_val:
566        path: step1.result
567        lazy: true
568        default: "fallback"
569    infer: "Using {{with.eager}} and {{with.lazy_val}}"
570"#;
571        let result = validator.validate_yaml(yaml);
572        assert!(
573            result.is_ok(),
574            "Valid lazy binding should pass: {:?}",
575            result
576        );
577    }
578
579    // ========================================================================
580    // Test: for_each with binding expression passes
581    // ========================================================================
582    #[test]
583    fn test_for_each_binding_expression_passes() {
584        let validator = WorkflowSchemaValidator::new().unwrap();
585        let yaml = r#"
586schema: "nika/workflow@0.12"
587tasks:
588  - id: process
589    for_each: "{{with.items}}"
590    as: item
591    concurrency: 5
592    infer: "Process {{with.item}}"
593"#;
594        let result = validator.validate_yaml(yaml);
595        assert!(
596            result.is_ok(),
597            "for_each binding expression should pass: {:?}",
598            result
599        );
600    }
601
602    // ========================================================================
603    // Test: Error message includes path
604    // ========================================================================
605    #[test]
606    fn test_error_message_includes_path() {
607        let validator = WorkflowSchemaValidator::new().unwrap();
608        let yaml = r#"
609schema: "nika/workflow@0.12"
610tasks:
611  - id: step1
612    invoke:
613      tool: novanet_describe
614"#;
615        let result = validator.validate_yaml(yaml);
616        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
617            // Should have path pointing to the invoke object
618            let has_path = errors.iter().any(|e| e.path.contains("invoke"));
619            assert!(has_path, "Error should include path to invoke");
620        } else {
621            panic!("Expected SchemaValidationFailed error");
622        }
623    }
624
625    // ========================================================================
626    // Test: JSON value validation works
627    // ========================================================================
628    #[test]
629    fn test_validate_value_works() {
630        let validator = WorkflowSchemaValidator::new().unwrap();
631        let value = json!({
632            "schema": "nika/workflow@0.12",
633            "tasks": [
634                {
635                    "id": "step1",
636                    "infer": "Hello"
637                }
638            ]
639        });
640        let result = validator.validate_value(&value);
641        assert!(result.is_ok(), "JSON value validation should work");
642    }
643
644    // ========================================================================
645    // Test: Artifact format: binary passes schema validation
646    // Binary format was added for CAS media pipeline support
647    // ========================================================================
648    #[test]
649    fn test_artifact_format_binary_passes_schema() {
650        let validator = WorkflowSchemaValidator::new().unwrap();
651
652        // Task-level artifact with format: binary
653        let yaml = r#"
654schema: "nika/workflow@0.12"
655tasks:
656  - id: download_image
657    exec: "curl -o /tmp/img.png https://example.com/img.png"
658    artifact:
659      path: ./output/image.bin
660      format: binary
661"#;
662        let result = validator.validate_yaml(yaml);
663        assert!(
664            result.is_ok(),
665            "Artifact format: binary should pass schema validation: {:?}",
666            result
667        );
668
669        // Workflow-level artifacts config with format: binary
670        let yaml2 = r#"
671schema: "nika/workflow@0.12"
672artifacts:
673  dir: ./output
674  format: binary
675tasks:
676  - id: step1
677    exec: "echo hello"
678    artifact: true
679"#;
680        let result2 = validator.validate_yaml(yaml2);
681        assert!(
682            result2.is_ok(),
683            "Workflow-level artifacts format: binary should pass: {:?}",
684            result2
685        );
686
687        // Multiple artifacts array with format: binary
688        let yaml3 = r#"
689schema: "nika/workflow@0.12"
690tasks:
691  - id: multi_output
692    exec: "echo done"
693    artifact:
694      - path: ./output/data.json
695        format: json
696      - path: ./output/image.bin
697        format: binary
698"#;
699        let result3 = validator.validate_yaml(yaml3);
700        assert!(
701            result3.is_ok(),
702            "Multiple artifacts with format: binary should pass: {:?}",
703            result3
704        );
705    }
706
707    // ========================================================================
708    // Test: Valid infer with guardrails passes
709    // ========================================================================
710    #[test]
711    fn test_valid_infer_with_guardrails_passes() {
712        let yaml = r#"
713schema: nika/workflow@0.12
714provider: mock
715tasks:
716  - id: guarded
717    infer:
718      prompt: "Generate content"
719      guardrails:
720        - type: length
721          min_words: 100
722          max_words: 500
723          on_failure: retry
724        - type: regex
725          pattern: "(?i)conclusion"
726          on_failure: fail
727"#;
728        let validator = WorkflowSchemaValidator::new().unwrap();
729        assert!(validator.validate_yaml(yaml).is_ok());
730    }
731
732    // ========================================================================
733    // Test: Valid agent with completion + limits + guardrails passes
734    // ========================================================================
735    #[test]
736    fn test_valid_agent_with_completion_limits_guardrails_passes() {
737        let yaml = r#"
738schema: nika/workflow@0.12
739provider: mock
740tasks:
741  - id: advanced_agent
742    agent:
743      prompt: "Research topic"
744      tools: [builtin]
745      max_turns: 10
746      completion:
747        mode: explicit
748      limits:
749        max_cost_usd: 1.0
750        max_duration_secs: 120
751        on_limit_reached:
752          action: complete_partial
753      guardrails:
754        - type: length
755          min_words: 200
756          on_failure: retry
757"#;
758        let validator = WorkflowSchemaValidator::new().unwrap();
759        assert!(validator.validate_yaml(yaml).is_ok());
760    }
761
762    // ========================================================================
763    // Test: Valid invoke with resource passes
764    // ========================================================================
765    #[test]
766    fn test_valid_invoke_with_resource_passes() {
767        let yaml = r#"
768schema: nika/workflow@0.12
769provider: mock
770tasks:
771  - id: read_resource
772    invoke:
773      mcp: novanet
774      resource: "schema://entities"
775"#;
776        let validator = WorkflowSchemaValidator::new().unwrap();
777        assert!(validator.validate_yaml(yaml).is_ok());
778    }
779}
780
781// ========================================================================
782// Test: Builtin invoke tool (nika:*) without mcp passes
783// JSON schema supports tool-only invoke for nika:* builtins
784// ========================================================================
785#[test]
786fn test_builtin_invoke_tool_without_mcp_passes() {
787    let validator = WorkflowSchemaValidator::new().unwrap();
788    let yaml = r#"
789schema: "nika/workflow@0.12"
790tasks:
791  - id: write_file
792    invoke:
793      tool: nika:write
794      params:
795        file_path: /tmp/test.txt
796        content: "Hello"
797"#;
798    let result = validator.validate_yaml(yaml);
799    assert!(
800        result.is_ok(),
801        "Builtin tool (nika:write) should work without mcp: {:?}",
802        result
803    );
804}