nika 0.20.0

Semantic YAML workflow engine for AI tasks - DAG execution, MCP integration, multi-provider LLM support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
//! Workflow Schema Validator
//!
//! Validates workflow YAML against the Nika JSON Schema before serde parsing.
//!
//! ## Design
//!
//! - Uses embedded schema (compiled at build time)
//! - Validates YAML structure via JSON Schema
//! - Returns detailed errors with paths and suggestions
//!
//! ## Usage
//!
//! ```rust,ignore
//! use nika::ast::schema_validator::WorkflowSchemaValidator;
//!
//! let validator = WorkflowSchemaValidator::new()?;
//! validator.validate_yaml(yaml_str)?;
//! ```

use crate::error::NikaError;
use crate::serde_yaml;
use jsonschema::Validator;
use serde_json::Value;
use std::sync::OnceLock;

/// Embedded schema JSON (compiled at build time)
const SCHEMA_JSON: &str = include_str!("../../schemas/nika-workflow.schema.json");

/// Global schema validator instance (lazy initialization)
static VALIDATOR: OnceLock<Result<Validator, String>> = OnceLock::new();

/// Workflow schema validator
///
/// Validates workflow YAML against the Nika JSON Schema.
pub struct WorkflowSchemaValidator {
    /// Compiled JSON Schema validator
    validator: &'static Validator,
}

impl WorkflowSchemaValidator {
    /// Create a new workflow schema validator
    ///
    /// Uses a cached global validator for efficiency.
    pub fn new() -> Result<Self, NikaError> {
        let validator_result = VALIDATOR.get_or_init(|| {
            let schema: Value = serde_json::from_str(SCHEMA_JSON)
                .map_err(|e| format!("Failed to parse schema JSON: {}", e))?;
            Validator::new(&schema).map_err(|e| format!("Failed to compile schema: {}", e))
        });

        match validator_result {
            Ok(validator) => Ok(Self { validator }),
            Err(e) => Err(NikaError::ValidationError { reason: e.clone() }),
        }
    }

    /// Validate YAML string against the workflow schema
    ///
    /// # Arguments
    ///
    /// * `yaml` - YAML content to validate
    ///
    /// # Returns
    ///
    /// * `Ok(())` if valid
    /// * `Err(NikaError::SchemaValidationFailed)` with detailed errors if invalid
    pub fn validate_yaml(&self, yaml: &str) -> Result<(), NikaError> {
        // Parse YAML to JSON Value (serde_yaml can handle this)
        let value: Value = serde_yaml::from_str(yaml).map_err(|e| NikaError::ParseError {
            details: format!("YAML parse error: {}", e),
        })?;

        self.validate_value(&value)
    }

    /// Validate a JSON Value against the workflow schema
    ///
    /// # Arguments
    ///
    /// * `value` - JSON value to validate
    ///
    /// # Returns
    ///
    /// * `Ok(())` if valid
    /// * `Err(NikaError::SchemaValidationFailed)` with detailed errors if invalid
    pub fn validate_value(&self, value: &Value) -> Result<(), NikaError> {
        let errors: Vec<SchemaError> = self
            .validator
            .iter_errors(value)
            .map(|e| SchemaError {
                path: e.instance_path.to_string(),
                message: e.to_string(),
                kind: classify_error(&e),
            })
            .collect();

        if errors.is_empty() {
            Ok(())
        } else {
            Err(NikaError::SchemaValidationFailed { errors })
        }
    }
}

/// Schema validation error details
#[derive(Debug, Clone)]
pub struct SchemaError {
    /// JSON pointer path to the error (e.g., "/tasks/0/invoke/params")
    pub path: String,
    /// Human-readable error message
    pub message: String,
    /// Error classification
    pub kind: SchemaErrorKind,
}

/// Schema error classification
#[derive(Debug, Clone, PartialEq)]
pub enum SchemaErrorKind {
    /// Missing required field
    MissingRequired { field: String },
    /// Unknown field (not in schema)
    UnknownField { field: String },
    /// Type mismatch
    TypeMismatch { expected: String, actual: String },
    /// Invalid enum value
    InvalidEnum { value: String, allowed: Vec<String> },
    /// Generic validation error
    Other,
}

/// Classify a JSON Schema error into a SchemaErrorKind
fn classify_error(error: &jsonschema::ValidationError) -> SchemaErrorKind {
    let error_str = format!("{:?}", error.kind);
    let message = error.to_string();

    if error_str.contains("Required") {
        // Extract field name from message
        let field = extract_quoted(&message).unwrap_or_else(|| "unknown".to_string());
        SchemaErrorKind::MissingRequired { field }
    } else if error_str.contains("AdditionalProperties") {
        // Extract field from path
        let path = error.instance_path.to_string();
        let field = path
            .rsplit('/')
            .next()
            .filter(|s| !s.is_empty())
            .unwrap_or("unknown")
            .to_string();
        SchemaErrorKind::UnknownField { field }
    } else if error_str.contains("Type") {
        SchemaErrorKind::TypeMismatch {
            expected: extract_type(&message).unwrap_or_else(|| "expected".to_string()),
            actual: "actual".to_string(),
        }
    } else if error_str.contains("Enum") || error_str.contains("Pattern") {
        // Pattern failures on constrained fields (like schema version) are semantically enums
        SchemaErrorKind::InvalidEnum {
            value: error.instance.to_string(),
            allowed: vec![],
        }
    } else {
        SchemaErrorKind::Other
    }
}

/// Extract quoted string from error message
fn extract_quoted(msg: &str) -> Option<String> {
    // Pattern: "fieldname" or 'fieldname'
    if let Some(start) = msg.find('"') {
        if let Some(end) = msg[start + 1..].find('"') {
            return Some(msg[start + 1..start + 1 + end].to_string());
        }
    }
    if let Some(start) = msg.find('\'') {
        if let Some(end) = msg[start + 1..].find('\'') {
            return Some(msg[start + 1..start + 1 + end].to_string());
        }
    }
    None
}

/// Extract type name from error message
fn extract_type(msg: &str) -> Option<String> {
    for t in ["string", "integer", "number", "boolean", "array", "object"] {
        if msg.contains(t) {
            return Some(t.to_string());
        }
    }
    None
}

// ============================================================================
// TESTS (TDD)
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    // ========================================================================
    // Test: Validator creation succeeds
    // ========================================================================
    #[test]
    fn test_validator_creation_succeeds() {
        let validator = WorkflowSchemaValidator::new();
        assert!(
            validator.is_ok(),
            "Validator should be created successfully"
        );
    }

    // ========================================================================
    // Test: Valid minimal workflow passes
    // ========================================================================
    #[test]
    fn test_valid_minimal_workflow_passes() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: step1
    infer: "Hello world"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_ok(), "Valid workflow should pass: {:?}", result);
    }

    // ========================================================================
    // Test: Missing schema field fails
    // ========================================================================
    #[test]
    fn test_missing_schema_field_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
tasks:
  - id: step1
    infer: "Hello"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "Missing schema should fail");

        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
            assert!(!errors.is_empty());
            assert!(matches!(
                errors[0].kind,
                SchemaErrorKind::MissingRequired { ref field } if field == "schema"
            ));
        } else {
            panic!("Expected SchemaValidationFailed error");
        }
    }

    // ========================================================================
    // Test: Invalid schema version fails
    // ========================================================================
    #[test]
    fn test_invalid_schema_version_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@9.9"
tasks:
  - id: step1
    infer: "Hello"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "Invalid schema version should fail");

        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
            assert!(!errors.is_empty());
            assert!(matches!(
                errors[0].kind,
                SchemaErrorKind::InvalidEnum { .. }
            ));
        } else {
            panic!("Expected SchemaValidationFailed error");
        }
    }

    // ========================================================================
    // Test: Unknown field in invoke params fails
    // ========================================================================
    #[test]
    fn test_unknown_field_in_invoke_params_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
mcp:
  novanet:
    command: cargo
    args: [run]
tasks:
  - id: describe
    invoke:
      mcp: novanet
      tool: novanet_describe
      params:
        unknown_field: "value"
"#;
        let result = validator.validate_yaml(yaml);
        // Note: params is not additionalProperties: false, so this may pass
        // But the key insight is we can validate the overall structure
        // The user's original issue was about invoke.params structure
        // Actually looking at the schema, params has additionalProperties: true
        // So this test should pass (params can have any fields)
        assert!(
            result.is_ok(),
            "Params can have any fields (additionalProperties: true)"
        );
    }

    // ========================================================================
    // Test: Missing required invoke.mcp or server fails
    // ========================================================================
    #[test]
    fn test_missing_required_invoke_mcp_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: describe
    invoke:
      tool: novanet_describe
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "Missing invoke.mcp/server should fail");

        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
            assert!(!errors.is_empty());
            // With oneOf schema (mcp+tool | mcp+resource | server+tool | server+resource),
            // missing server identifier triggers oneOf validation failure or MissingRequired
            // We just need to ensure validation fails appropriately
            let has_invoke_error = errors
                .iter()
                .any(|e| e.path.contains("invoke") || e.path.contains("tasks"));
            assert!(
                has_invoke_error,
                "Should have error related to invoke params: {:?}",
                errors
            );
        } else {
            panic!("Expected SchemaValidationFailed error");
        }
    }

    // ========================================================================
    // Test: Unknown field at workflow level fails
    // ========================================================================
    #[test]
    fn test_unknown_field_at_workflow_level_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
unknown_field: "should fail"
tasks:
  - id: step1
    infer: "Hello"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(
            result.is_err(),
            "Unknown field at workflow level should fail"
        );

        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
            assert!(!errors.is_empty());
            let has_unknown_error = errors
                .iter()
                .any(|e| matches!(&e.kind, SchemaErrorKind::UnknownField { .. }));
            assert!(has_unknown_error, "Should have UnknownField error");
        } else {
            panic!("Expected SchemaValidationFailed error");
        }
    }

    // ========================================================================
    // Test: Valid invoke workflow passes
    // ========================================================================
    #[test]
    fn test_valid_invoke_workflow_passes() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
provider: claude
mcp:
  novanet:
    command: cargo
    args: [run, -p, novanet-mcp]
    env:
      NEO4J_URI: bolt://localhost:7687
tasks:
  - id: describe
    invoke:
      mcp: novanet
      tool: novanet_describe
      params: {}
    output:
      format: json

  - id: generate
    use:
      schema: describe
    invoke:
      mcp: novanet
      tool: novanet_generate
      params:
        entity: qr-code
        locale: fr-FR
        forms:
          - text
          - title
    output:
      format: json
flows:
  - source: describe
    target: generate
"#;
        let result = validator.validate_yaml(yaml);
        assert!(
            result.is_ok(),
            "Valid invoke workflow should pass: {:?}",
            result
        );
    }

    // ========================================================================
    // Test: Task without any verb fails
    // ========================================================================
    #[test]
    fn test_task_without_verb_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: step1
    output:
      format: json
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "Task without verb should fail");
    }

    // ========================================================================
    // Test: Multiple verbs in task fails (oneOf)
    // ========================================================================
    #[test]
    fn test_multiple_verbs_in_task_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: step1
    infer: "Hello"
    exec: "echo done"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "Multiple verbs should fail");
    }

    // ========================================================================
    // Test: Valid agent params passes
    // ========================================================================
    #[test]
    fn test_valid_agent_params_passes() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
mcp:
  novanet:
    command: cargo
tasks:
  - id: orchestrator
    agent:
      prompt: "Generate content"
      mcp:
        - novanet
      max_turns: 5
      depth_limit: 3
      extended_thinking: true
      thinking_budget: 8192
"#;
        let result = validator.validate_yaml(yaml);
        assert!(
            result.is_ok(),
            "Valid agent params should pass: {:?}",
            result
        );
    }

    // ========================================================================
    // Test: Invalid depth_limit fails
    // ========================================================================
    #[test]
    fn test_invalid_depth_limit_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: orchestrator
    agent:
      prompt: "Generate content"
      depth_limit: 100
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "depth_limit > 10 should fail");
    }

    // ========================================================================
    // Test: Valid decompose spec passes
    // ========================================================================
    #[test]
    fn test_valid_decompose_spec_passes() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: expand_entities
    decompose:
      strategy: semantic
      traverse: HAS_CHILD
      source: "$entity"
      max_items: 10
    infer: "Generate for {{use.item}}"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(
            result.is_ok(),
            "Valid decompose spec should pass: {:?}",
            result
        );
    }

    // ========================================================================
    // Test: Invalid decompose strategy fails
    // ========================================================================
    #[test]
    fn test_invalid_decompose_strategy_fails() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: expand_entities
    decompose:
      strategy: invalid_strategy
      traverse: HAS_CHILD
      source: "$entity"
    infer: "Generate for {{use.item}}"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(result.is_err(), "Invalid decompose strategy should fail");
    }

    // ========================================================================
    // Test: Valid lazy binding passes
    // ========================================================================
    #[test]
    fn test_valid_lazy_binding_passes() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: step1
    infer: "Hello"

  - id: step2
    use:
      eager: step1
      lazy_val:
        path: step1.result
        lazy: true
        default: "fallback"
    infer: "Using {{use.eager}} and {{use.lazy_val}}"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(
            result.is_ok(),
            "Valid lazy binding should pass: {:?}",
            result
        );
    }

    // ========================================================================
    // Test: for_each with binding expression passes
    // ========================================================================
    #[test]
    fn test_for_each_binding_expression_passes() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: process
    for_each: "{{use.items}}"
    as: item
    concurrency: 5
    infer: "Process {{use.item}}"
"#;
        let result = validator.validate_yaml(yaml);
        assert!(
            result.is_ok(),
            "for_each binding expression should pass: {:?}",
            result
        );
    }

    // ========================================================================
    // Test: Error message includes path
    // ========================================================================
    #[test]
    fn test_error_message_includes_path() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let yaml = r#"
schema: "nika/workflow@0.5"
tasks:
  - id: step1
    invoke:
      tool: novanet_describe
"#;
        let result = validator.validate_yaml(yaml);
        if let Err(NikaError::SchemaValidationFailed { errors }) = result {
            // Should have path pointing to the invoke object
            let has_path = errors.iter().any(|e| e.path.contains("invoke"));
            assert!(has_path, "Error should include path to invoke");
        } else {
            panic!("Expected SchemaValidationFailed error");
        }
    }

    // ========================================================================
    // Test: JSON value validation works
    // ========================================================================
    #[test]
    fn test_validate_value_works() {
        let validator = WorkflowSchemaValidator::new().unwrap();
        let value = json!({
            "schema": "nika/workflow@0.5",
            "tasks": [
                {
                    "id": "step1",
                    "infer": "Hello"
                }
            ]
        });
        let result = validator.validate_value(&value);
        assert!(result.is_ok(), "JSON value validation should work");
    }
}