oxify_model/
validation.rs

1//! Comprehensive workflow validation
2//!
3//! This module provides detailed validation for workflows to catch errors
4//! before execution, including cycle detection, orphan nodes, and structural issues.
5
6use crate::{NodeId, NodeKind, Workflow};
7use std::collections::{HashMap, HashSet, VecDeque};
8use thiserror::Error;
9
10pub type Result<T> = std::result::Result<T, ValidationError>;
11
12#[derive(Error, Debug, Clone)]
13pub enum ValidationError {
14    #[error("Workflow has no start node")]
15    NoStartNode,
16
17    #[error("Workflow has multiple start nodes: {0}")]
18    MultipleStartNodes(String),
19
20    #[error("Workflow has no end node")]
21    NoEndNode,
22
23    #[error("Workflow has multiple end nodes: {0}")]
24    MultipleEndNodes(String),
25
26    #[error("Workflow contains a cycle")]
27    CycleDetected,
28
29    #[error("Node {0} is unreachable from start")]
30    UnreachableNode(NodeId),
31
32    #[error("Node {0} cannot reach end")]
33    DeadEndNode(NodeId),
34
35    #[error("Edge references non-existent node: {0}")]
36    InvalidNodeReference(NodeId),
37
38    #[error("Conditional node {0} missing true branch")]
39    MissingTrueBranch(NodeId),
40
41    #[error("Conditional node {0} missing false branch")]
42    MissingFalseBranch(NodeId),
43
44    #[error("Conditional node {0} has invalid branch: {1}")]
45    InvalidConditionalBranch(NodeId, NodeId),
46
47    #[error("Duplicate edge from {0} to {1}")]
48    DuplicateEdge(NodeId, NodeId),
49
50    #[error("Workflow has no nodes")]
51    EmptyWorkflow,
52
53    #[error("Workflow has no edges")]
54    NoEdges,
55
56    #[error("Switch node {0} has no cases defined")]
57    SwitchNodeNoCases(NodeId),
58
59    #[error("Switch node {0} has empty switch expression")]
60    SwitchNodeEmptyExpression(NodeId),
61
62    #[error("Switch node {0} case has empty match value")]
63    SwitchCaseEmptyMatch(NodeId),
64
65    #[error("Parallel node {0} has no tasks defined")]
66    ParallelNodeNoTasks(NodeId),
67
68    #[error("Parallel node {0} task '{1}' has empty expression")]
69    ParallelTaskEmptyExpression(NodeId, String),
70
71    #[error("Parallel node {0} has duplicate task ID: {1}")]
72    ParallelDuplicateTaskId(NodeId, String),
73
74    #[error("Approval node {0} has empty message")]
75    ApprovalEmptyMessage(NodeId),
76
77    #[error("Form node {0} has no fields defined")]
78    FormNoFields(NodeId),
79
80    #[error("Form node {0} has duplicate field ID: {1}")]
81    FormDuplicateFieldId(NodeId, String),
82
83    #[error("Form node {0} field '{1}' has empty label")]
84    FormFieldEmptyLabel(NodeId, String),
85
86    #[error("Loop node {0} has empty collection path")]
87    LoopEmptyCollectionPath(NodeId),
88
89    #[error("Loop node {0} has empty body expression")]
90    LoopEmptyBodyExpression(NodeId),
91
92    #[error("TryCatch node {0} has empty try expression")]
93    TryCatchEmptyTryExpression(NodeId),
94
95    #[error("SubWorkflow node {0} has empty workflow path")]
96    SubWorkflowEmptyPath(NodeId),
97}
98
99/// Comprehensive workflow validator
100pub struct WorkflowValidator;
101
102impl WorkflowValidator {
103    /// Validate a workflow and return all errors
104    pub fn validate(workflow: &Workflow) -> Result<ValidationReport> {
105        let mut errors = Vec::new();
106        let mut warnings = Vec::new();
107
108        // Basic structure validation
109        if workflow.nodes.is_empty() {
110            return Err(ValidationError::EmptyWorkflow);
111        }
112
113        // Validate start/end nodes
114        if let Err(e) = Self::validate_start_end_nodes(workflow) {
115            errors.push(e);
116        }
117
118        // Validate node references in edges (must be valid before cycle detection)
119        let edges_valid = if let Err(errs) = Self::validate_edge_references(workflow) {
120            errors.extend(errs);
121            false
122        } else {
123            true
124        };
125
126        // Validate conditional nodes
127        if let Err(errs) = Self::validate_conditional_nodes(workflow) {
128            errors.extend(errs);
129        }
130
131        // Validate new node types (Switch, Parallel, Approval, Form, etc.)
132        if let Err(errs) = Self::validate_advanced_nodes(workflow) {
133            errors.extend(errs);
134        }
135
136        // Only run cycle detection if all edges are valid
137        if edges_valid {
138            // Detect cycles
139            if let Err(e) = Self::detect_cycles(workflow) {
140                errors.push(e);
141            }
142
143            // Check for unreachable nodes
144            if let Err(errs) = Self::find_unreachable_nodes(workflow) {
145                warnings.extend(errs);
146            }
147
148            // Check for dead-end nodes
149            if let Err(errs) = Self::find_dead_end_nodes(workflow) {
150                warnings.extend(errs);
151            }
152
153            // Check for duplicate edges
154            if let Err(errs) = Self::find_duplicate_edges(workflow) {
155                warnings.extend(errs);
156            }
157        }
158
159        if !errors.is_empty() {
160            return Err(errors.into_iter().next().unwrap());
161        }
162
163        // Calculate stats
164        let stats = Self::calculate_stats(workflow);
165
166        Ok(ValidationReport {
167            valid: true,
168            warnings,
169            stats,
170        })
171    }
172
173    fn validate_start_end_nodes(workflow: &Workflow) -> Result<()> {
174        let start_nodes: Vec<_> = workflow
175            .nodes
176            .iter()
177            .filter(|n| matches!(n.kind, NodeKind::Start))
178            .collect();
179
180        let end_nodes: Vec<_> = workflow
181            .nodes
182            .iter()
183            .filter(|n| matches!(n.kind, NodeKind::End))
184            .collect();
185
186        if start_nodes.is_empty() {
187            return Err(ValidationError::NoStartNode);
188        }
189
190        if start_nodes.len() > 1 {
191            let ids = start_nodes
192                .iter()
193                .map(|n| n.id.to_string())
194                .collect::<Vec<_>>()
195                .join(", ");
196            return Err(ValidationError::MultipleStartNodes(ids));
197        }
198
199        if end_nodes.is_empty() {
200            return Err(ValidationError::NoEndNode);
201        }
202
203        if end_nodes.len() > 1 {
204            let ids = end_nodes
205                .iter()
206                .map(|n| n.id.to_string())
207                .collect::<Vec<_>>()
208                .join(", ");
209            return Err(ValidationError::MultipleEndNodes(ids));
210        }
211
212        Ok(())
213    }
214
215    fn validate_edge_references(
216        workflow: &Workflow,
217    ) -> std::result::Result<(), Vec<ValidationError>> {
218        let node_ids: HashSet<_> = workflow.nodes.iter().map(|n| n.id).collect();
219        let mut errors = Vec::new();
220
221        for edge in &workflow.edges {
222            if !node_ids.contains(&edge.from) {
223                errors.push(ValidationError::InvalidNodeReference(edge.from));
224            }
225            if !node_ids.contains(&edge.to) {
226                errors.push(ValidationError::InvalidNodeReference(edge.to));
227            }
228        }
229
230        if errors.is_empty() {
231            Ok(())
232        } else {
233            Err(errors)
234        }
235    }
236
237    fn validate_conditional_nodes(
238        workflow: &Workflow,
239    ) -> std::result::Result<(), Vec<ValidationError>> {
240        let node_ids: HashSet<_> = workflow.nodes.iter().map(|n| n.id).collect();
241        let mut errors = Vec::new();
242
243        for node in &workflow.nodes {
244            if let NodeKind::IfElse(condition) = &node.kind {
245                if !node_ids.contains(&condition.true_branch) {
246                    errors.push(ValidationError::InvalidConditionalBranch(
247                        node.id,
248                        condition.true_branch,
249                    ));
250                }
251                if !node_ids.contains(&condition.false_branch) {
252                    errors.push(ValidationError::InvalidConditionalBranch(
253                        node.id,
254                        condition.false_branch,
255                    ));
256                }
257            }
258        }
259
260        if errors.is_empty() {
261            Ok(())
262        } else {
263            Err(errors)
264        }
265    }
266
267    fn validate_advanced_nodes(
268        workflow: &Workflow,
269    ) -> std::result::Result<(), Vec<ValidationError>> {
270        let mut errors = Vec::new();
271
272        for node in &workflow.nodes {
273            match &node.kind {
274                // Validate Switch nodes
275                NodeKind::Switch(config) => {
276                    if config.switch_on.trim().is_empty() {
277                        errors.push(ValidationError::SwitchNodeEmptyExpression(node.id));
278                    }
279                    if config.cases.is_empty() && config.default_case.is_none() {
280                        errors.push(ValidationError::SwitchNodeNoCases(node.id));
281                    }
282                    for case in &config.cases {
283                        if case.match_value.trim().is_empty() {
284                            errors.push(ValidationError::SwitchCaseEmptyMatch(node.id));
285                        }
286                    }
287                }
288
289                // Validate Parallel nodes
290                NodeKind::Parallel(config) => {
291                    if config.tasks.is_empty() {
292                        errors.push(ValidationError::ParallelNodeNoTasks(node.id));
293                    }
294                    let mut seen_ids = HashSet::new();
295                    for task in &config.tasks {
296                        if task.expression.trim().is_empty() {
297                            errors.push(ValidationError::ParallelTaskEmptyExpression(
298                                node.id,
299                                task.id.clone(),
300                            ));
301                        }
302                        if !seen_ids.insert(&task.id) {
303                            errors.push(ValidationError::ParallelDuplicateTaskId(
304                                node.id,
305                                task.id.clone(),
306                            ));
307                        }
308                    }
309                }
310
311                // Validate Approval nodes
312                NodeKind::Approval(config) => {
313                    if config.message.trim().is_empty() {
314                        errors.push(ValidationError::ApprovalEmptyMessage(node.id));
315                    }
316                }
317
318                // Validate Form nodes
319                NodeKind::Form(config) => {
320                    if config.fields.is_empty() {
321                        errors.push(ValidationError::FormNoFields(node.id));
322                    }
323                    let mut seen_ids = HashSet::new();
324                    for field in &config.fields {
325                        if field.label.trim().is_empty() {
326                            errors.push(ValidationError::FormFieldEmptyLabel(
327                                node.id,
328                                field.id.clone(),
329                            ));
330                        }
331                        if !seen_ids.insert(&field.id) {
332                            errors.push(ValidationError::FormDuplicateFieldId(
333                                node.id,
334                                field.id.clone(),
335                            ));
336                        }
337                    }
338                }
339
340                // Validate Loop nodes
341                NodeKind::Loop(config) => match &config.loop_type {
342                    crate::LoopType::ForEach {
343                        collection_path,
344                        body_expression,
345                        ..
346                    } => {
347                        if collection_path.trim().is_empty() {
348                            errors.push(ValidationError::LoopEmptyCollectionPath(node.id));
349                        }
350                        if body_expression.trim().is_empty() {
351                            errors.push(ValidationError::LoopEmptyBodyExpression(node.id));
352                        }
353                    }
354                    crate::LoopType::While {
355                        body_expression, ..
356                    } => {
357                        if body_expression.trim().is_empty() {
358                            errors.push(ValidationError::LoopEmptyBodyExpression(node.id));
359                        }
360                    }
361                    crate::LoopType::Repeat {
362                        body_expression, ..
363                    } => {
364                        if body_expression.trim().is_empty() {
365                            errors.push(ValidationError::LoopEmptyBodyExpression(node.id));
366                        }
367                    }
368                },
369
370                // Validate TryCatch nodes
371                NodeKind::TryCatch(config) => {
372                    if config.try_expression.trim().is_empty() {
373                        errors.push(ValidationError::TryCatchEmptyTryExpression(node.id));
374                    }
375                }
376
377                // Validate SubWorkflow nodes
378                NodeKind::SubWorkflow(config) => {
379                    if config.workflow_path.trim().is_empty() {
380                        errors.push(ValidationError::SubWorkflowEmptyPath(node.id));
381                    }
382                }
383
384                // Other node types don't need advanced validation here
385                _ => {}
386            }
387        }
388
389        if errors.is_empty() {
390            Ok(())
391        } else {
392            Err(errors)
393        }
394    }
395
396    fn detect_cycles(workflow: &Workflow) -> Result<()> {
397        let mut in_degree: HashMap<NodeId, usize> = HashMap::new();
398        let mut adj_list: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
399
400        // Initialize
401        for node in &workflow.nodes {
402            in_degree.insert(node.id, 0);
403            adj_list.insert(node.id, Vec::new());
404        }
405
406        // Build adjacency list
407        for edge in &workflow.edges {
408            adj_list.get_mut(&edge.from).unwrap().push(edge.to);
409            *in_degree.get_mut(&edge.to).unwrap() += 1;
410        }
411
412        // Kahn's algorithm
413        let mut queue: VecDeque<_> = in_degree
414            .iter()
415            .filter(|(_, &deg)| deg == 0)
416            .map(|(&id, _)| id)
417            .collect();
418
419        let mut processed = 0;
420
421        while let Some(node_id) = queue.pop_front() {
422            processed += 1;
423
424            if let Some(neighbors) = adj_list.get(&node_id) {
425                for &neighbor in neighbors {
426                    let deg = in_degree.get_mut(&neighbor).unwrap();
427                    *deg -= 1;
428                    if *deg == 0 {
429                        queue.push_back(neighbor);
430                    }
431                }
432            }
433        }
434
435        if processed != workflow.nodes.len() {
436            return Err(ValidationError::CycleDetected);
437        }
438
439        Ok(())
440    }
441
442    fn find_unreachable_nodes(
443        workflow: &Workflow,
444    ) -> std::result::Result<(), Vec<ValidationError>> {
445        let start_node = workflow
446            .nodes
447            .iter()
448            .find(|n| matches!(n.kind, NodeKind::Start));
449
450        if start_node.is_none() {
451            return Ok(()); // Already caught by start/end validation
452        }
453
454        let start_id = start_node.unwrap().id;
455
456        // Build adjacency list
457        let mut adj_list: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
458        for node in &workflow.nodes {
459            adj_list.insert(node.id, Vec::new());
460        }
461        for edge in &workflow.edges {
462            adj_list.get_mut(&edge.from).unwrap().push(edge.to);
463        }
464
465        // BFS from start
466        let mut visited = HashSet::new();
467        let mut queue = VecDeque::new();
468        queue.push_back(start_id);
469        visited.insert(start_id);
470
471        while let Some(node_id) = queue.pop_front() {
472            if let Some(neighbors) = adj_list.get(&node_id) {
473                for &neighbor in neighbors {
474                    if visited.insert(neighbor) {
475                        queue.push_back(neighbor);
476                    }
477                }
478            }
479        }
480
481        // Find unreachable nodes
482        let errors: Vec<_> = workflow
483            .nodes
484            .iter()
485            .filter(|n| !visited.contains(&n.id))
486            .map(|n| ValidationError::UnreachableNode(n.id))
487            .collect();
488
489        if errors.is_empty() {
490            Ok(())
491        } else {
492            Err(errors)
493        }
494    }
495
496    fn find_dead_end_nodes(workflow: &Workflow) -> std::result::Result<(), Vec<ValidationError>> {
497        let end_node = workflow
498            .nodes
499            .iter()
500            .find(|n| matches!(n.kind, NodeKind::End));
501
502        if end_node.is_none() {
503            return Ok(()); // Already caught by start/end validation
504        }
505
506        let end_id = end_node.unwrap().id;
507
508        // Build reverse adjacency list
509        let mut reverse_adj: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
510        for node in &workflow.nodes {
511            reverse_adj.insert(node.id, Vec::new());
512        }
513        for edge in &workflow.edges {
514            reverse_adj.get_mut(&edge.to).unwrap().push(edge.from);
515        }
516
517        // BFS from end (backwards)
518        let mut visited = HashSet::new();
519        let mut queue = VecDeque::new();
520        queue.push_back(end_id);
521        visited.insert(end_id);
522
523        while let Some(node_id) = queue.pop_front() {
524            if let Some(predecessors) = reverse_adj.get(&node_id) {
525                for &pred in predecessors {
526                    if visited.insert(pred) {
527                        queue.push_back(pred);
528                    }
529                }
530            }
531        }
532
533        // Find dead-end nodes
534        let errors: Vec<_> = workflow
535            .nodes
536            .iter()
537            .filter(|n| !visited.contains(&n.id))
538            .map(|n| ValidationError::DeadEndNode(n.id))
539            .collect();
540
541        if errors.is_empty() {
542            Ok(())
543        } else {
544            Err(errors)
545        }
546    }
547
548    fn find_duplicate_edges(workflow: &Workflow) -> std::result::Result<(), Vec<ValidationError>> {
549        let mut seen = HashSet::new();
550        let mut errors = Vec::new();
551
552        for edge in &workflow.edges {
553            let pair = (edge.from, edge.to);
554            if !seen.insert(pair) {
555                errors.push(ValidationError::DuplicateEdge(edge.from, edge.to));
556            }
557        }
558
559        if errors.is_empty() {
560            Ok(())
561        } else {
562            Err(errors)
563        }
564    }
565
566    fn calculate_stats(workflow: &Workflow) -> ValidationStats {
567        let total_nodes = workflow.nodes.len();
568        let total_edges = workflow.edges.len();
569
570        let start_nodes = workflow
571            .nodes
572            .iter()
573            .filter(|n| matches!(n.kind, NodeKind::Start))
574            .count();
575
576        let end_nodes = workflow
577            .nodes
578            .iter()
579            .filter(|n| matches!(n.kind, NodeKind::End))
580            .count();
581
582        // Calculate max depth using BFS
583        let max_depth = Self::calculate_max_depth_bfs(workflow);
584
585        // Count node types
586        let mut node_type_counts = HashMap::new();
587        for node in &workflow.nodes {
588            let type_name = match &node.kind {
589                NodeKind::Start => "Start",
590                NodeKind::End => "End",
591                NodeKind::LLM(_) => "LLM",
592                NodeKind::Retriever(_) => "Retriever",
593                NodeKind::Code(_) => "Code",
594                NodeKind::IfElse(_) => "IfElse",
595                NodeKind::Tool(_) => "Tool",
596                NodeKind::Loop(_) => "Loop",
597                NodeKind::TryCatch(_) => "TryCatch",
598                NodeKind::SubWorkflow(_) => "SubWorkflow",
599                NodeKind::Switch(_) => "Switch",
600                NodeKind::Parallel(_) => "Parallel",
601                NodeKind::Approval(_) => "Approval",
602                NodeKind::Form(_) => "Form",
603                NodeKind::Vision(_) => "Vision",
604            };
605            *node_type_counts.entry(type_name.to_string()).or_insert(0) += 1;
606        }
607
608        ValidationStats {
609            total_nodes,
610            total_edges,
611            start_nodes,
612            end_nodes,
613            max_depth,
614            node_type_counts,
615        }
616    }
617
618    fn calculate_max_depth_bfs(workflow: &Workflow) -> usize {
619        let start_node = workflow
620            .nodes
621            .iter()
622            .find(|n| matches!(n.kind, NodeKind::Start));
623
624        if start_node.is_none() {
625            return 0;
626        }
627
628        let start_id = start_node.unwrap().id;
629
630        // Build adjacency list
631        let mut adj_list: HashMap<NodeId, Vec<NodeId>> = HashMap::new();
632        for node in &workflow.nodes {
633            adj_list.insert(node.id, Vec::new());
634        }
635        for edge in &workflow.edges {
636            adj_list.get_mut(&edge.from).unwrap().push(edge.to);
637        }
638
639        // BFS to calculate depth
640        let mut queue = VecDeque::new();
641        let mut depths = HashMap::new();
642
643        queue.push_back(start_id);
644        depths.insert(start_id, 0);
645
646        let mut max_depth = 0;
647
648        while let Some(node_id) = queue.pop_front() {
649            let depth = *depths.get(&node_id).unwrap();
650            max_depth = max_depth.max(depth);
651
652            if let Some(neighbors) = adj_list.get(&node_id) {
653                for &neighbor in neighbors {
654                    use std::collections::hash_map::Entry;
655                    if let Entry::Vacant(e) = depths.entry(neighbor) {
656                        e.insert(depth + 1);
657                        queue.push_back(neighbor);
658                    }
659                }
660            }
661        }
662
663        max_depth
664    }
665}
666
667/// Validation report with detailed statistics
668#[derive(Debug, Clone)]
669pub struct ValidationReport {
670    pub valid: bool,
671    pub warnings: Vec<ValidationError>,
672    pub stats: ValidationStats,
673}
674
675/// Validation statistics
676#[derive(Debug, Clone)]
677pub struct ValidationStats {
678    pub total_nodes: usize,
679    pub total_edges: usize,
680    pub start_nodes: usize,
681    pub end_nodes: usize,
682    pub max_depth: usize,
683    pub node_type_counts: std::collections::HashMap<String, usize>,
684}
685
686#[cfg(test)]
687mod tests {
688    use super::*;
689    use crate::{Edge, Node, WorkflowMetadata};
690    use proptest::prelude::*;
691
692    #[test]
693    fn test_valid_workflow() {
694        let start = Node::new("Start".to_string(), NodeKind::Start);
695        let end = Node::new("End".to_string(), NodeKind::End);
696
697        let workflow = Workflow {
698            metadata: WorkflowMetadata::new("Test".to_string()),
699            nodes: vec![start.clone(), end.clone()],
700            edges: vec![Edge::new(start.id, end.id)],
701        };
702
703        let result = WorkflowValidator::validate(&workflow);
704        assert!(result.is_ok());
705    }
706
707    #[test]
708    fn test_no_start_node() {
709        let end = Node::new("End".to_string(), NodeKind::End);
710
711        let workflow = Workflow {
712            metadata: WorkflowMetadata::new("Test".to_string()),
713            nodes: vec![end],
714            edges: vec![],
715        };
716
717        let result = WorkflowValidator::validate(&workflow);
718        assert!(matches!(result, Err(ValidationError::NoStartNode)));
719    }
720
721    #[test]
722    fn test_cycle_detection() {
723        use crate::LlmConfig;
724
725        let start = Node::new("Start".to_string(), NodeKind::Start);
726        let llm1 = Node::new(
727            "LLM1".to_string(),
728            NodeKind::LLM(LlmConfig {
729                provider: "openai".to_string(),
730                model: "gpt-4".to_string(),
731                system_prompt: None,
732                prompt_template: "test".to_string(),
733                temperature: None,
734                max_tokens: None,
735                tools: vec![],
736                images: vec![],
737                extra_params: serde_json::Value::Null,
738            }),
739        );
740        let llm2 = Node::new(
741            "LLM2".to_string(),
742            NodeKind::LLM(LlmConfig {
743                provider: "openai".to_string(),
744                model: "gpt-4".to_string(),
745                system_prompt: None,
746                prompt_template: "test".to_string(),
747                temperature: None,
748                max_tokens: None,
749                tools: vec![],
750                images: vec![],
751                extra_params: serde_json::Value::Null,
752            }),
753        );
754        let end = Node::new("End".to_string(), NodeKind::End);
755
756        let workflow = Workflow {
757            metadata: WorkflowMetadata::new("Test".to_string()),
758            nodes: vec![start.clone(), llm1.clone(), llm2.clone(), end.clone()],
759            edges: vec![
760                Edge::new(start.id, llm1.id),
761                Edge::new(llm1.id, llm2.id),
762                Edge::new(llm2.id, llm1.id), // Creates cycle
763                Edge::new(llm2.id, end.id),
764            ],
765        };
766
767        let result = WorkflowValidator::validate(&workflow);
768        assert!(matches!(result, Err(ValidationError::CycleDetected)));
769    }
770
771    // Property-based tests
772    proptest! {
773        /// Property: A valid DAG with start and end should always pass validation
774        #[test]
775        fn prop_valid_dag_passes_validation(node_count in 2usize..10) {
776            use crate::LlmConfig;
777
778            // Generate a simple linear workflow (guaranteed to be a DAG)
779            let start = Node::new("Start".to_string(), NodeKind::Start);
780            let end = Node::new("End".to_string(), NodeKind::End);
781
782            let mut nodes = vec![start.clone()];
783            let mut edges = Vec::new();
784
785            // Create intermediate nodes
786            let mut prev_id = start.id;
787            for i in 0..node_count - 2 {
788                let llm = Node::new(
789                    format!("LLM{}", i),
790                    NodeKind::LLM(LlmConfig {
791                        provider: "openai".to_string(),
792                        model: "gpt-4".to_string(),
793                        system_prompt: None,
794                        prompt_template: "test".to_string(),
795                        temperature: None,
796                        max_tokens: None,
797                        tools: vec![],
798                        images: vec![],
799                        extra_params: serde_json::Value::Null,
800                    }),
801                );
802                edges.push(Edge::new(prev_id, llm.id));
803                prev_id = llm.id;
804                nodes.push(llm);
805            }
806
807            nodes.push(end.clone());
808            edges.push(Edge::new(prev_id, end.id));
809
810            let workflow = Workflow {
811                metadata: WorkflowMetadata::new("Test".to_string()),
812                nodes,
813                edges,
814            };
815
816            let result = WorkflowValidator::validate(&workflow);
817            prop_assert!(result.is_ok(), "Linear DAG should be valid: {:?}", result);
818        }
819
820        /// Property: Workflow without start node always fails
821        #[test]
822        fn prop_no_start_fails(node_count in 1usize..5) {
823            let mut nodes = Vec::new();
824            for i in 0..node_count {
825                nodes.push(Node::new(format!("Node{}", i), NodeKind::End));
826            }
827
828            let workflow = Workflow {
829                metadata: WorkflowMetadata::new("Test".to_string()),
830                nodes,
831                edges: vec![],
832            };
833
834            let result = WorkflowValidator::validate(&workflow);
835            prop_assert!(matches!(result, Err(ValidationError::NoStartNode)));
836        }
837
838        /// Property: Workflow without end node always fails
839        #[test]
840        fn prop_no_end_fails(node_count in 1usize..5) {
841            let mut nodes = Vec::new();
842            for i in 0..node_count {
843                nodes.push(Node::new(format!("Node{}", i), NodeKind::Start));
844            }
845
846            let workflow = Workflow {
847                metadata: WorkflowMetadata::new("Test".to_string()),
848                nodes,
849                edges: vec![],
850            };
851
852            let result = WorkflowValidator::validate(&workflow);
853            prop_assert!(
854                matches!(result, Err(ValidationError::NoEndNode) | Err(ValidationError::MultipleStartNodes(_)))
855            );
856        }
857
858        /// Property: Edge referencing non-existent node always fails
859        #[test]
860        fn prop_invalid_edge_fails(_dummy in 0..10) {
861            let start = Node::new("Start".to_string(), NodeKind::Start);
862            let end = Node::new("End".to_string(), NodeKind::End);
863            let invalid_id = uuid::Uuid::new_v4(); // Random non-existent ID
864
865            let workflow = Workflow {
866                metadata: WorkflowMetadata::new("Test".to_string()),
867                nodes: vec![start.clone(), end.clone()],
868                edges: vec![
869                    Edge::new(start.id, invalid_id), // Invalid edge
870                ],
871            };
872
873            let result = WorkflowValidator::validate(&workflow);
874            prop_assert!(matches!(result, Err(ValidationError::InvalidNodeReference(_))));
875        }
876
877        /// Property: Stats are always non-negative and consistent
878        #[test]
879        fn prop_stats_consistency(node_count in 2usize..20) {
880            use crate::LlmConfig;
881
882            let start = Node::new("Start".to_string(), NodeKind::Start);
883            let end = Node::new("End".to_string(), NodeKind::End);
884
885            let mut nodes = vec![start.clone()];
886            let mut edges = Vec::new();
887            let mut prev_id = start.id;
888
889            for i in 0..node_count - 2 {
890                let llm = Node::new(
891                    format!("LLM{}", i),
892                    NodeKind::LLM(LlmConfig {
893                        provider: "openai".to_string(),
894                        model: "gpt-4".to_string(),
895                        system_prompt: None,
896                        prompt_template: "test".to_string(),
897                        temperature: None,
898                        max_tokens: None,
899                        tools: vec![],
900                        images: vec![],
901                        extra_params: serde_json::Value::Null,
902                    }),
903                );
904                edges.push(Edge::new(prev_id, llm.id));
905                prev_id = llm.id;
906                nodes.push(llm);
907            }
908
909            nodes.push(end.clone());
910            edges.push(Edge::new(prev_id, end.id));
911
912            let workflow = Workflow {
913                metadata: WorkflowMetadata::new("Test".to_string()),
914                nodes: nodes.clone(),
915                edges: edges.clone(),
916            };
917
918            if let Ok(report) = WorkflowValidator::validate(&workflow) {
919                prop_assert_eq!(report.stats.total_nodes, nodes.len());
920                prop_assert_eq!(report.stats.total_edges, edges.len());
921                prop_assert_eq!(report.stats.start_nodes, 1);
922                prop_assert_eq!(report.stats.end_nodes, 1);
923                prop_assert!(report.stats.max_depth >= node_count - 1);
924            }
925        }
926
927        /// Property: Duplicate edges are detected as warnings
928        #[test]
929        fn prop_duplicate_edges_detected(_dummy in 0..10) {
930            let start = Node::new("Start".to_string(), NodeKind::Start);
931            let end = Node::new("End".to_string(), NodeKind::End);
932
933            let workflow = Workflow {
934                metadata: WorkflowMetadata::new("Test".to_string()),
935                nodes: vec![start.clone(), end.clone()],
936                edges: vec![
937                    Edge::new(start.id, end.id),
938                    Edge::new(start.id, end.id), // Duplicate
939                ],
940            };
941
942            let result = WorkflowValidator::validate(&workflow);
943            prop_assert!(result.is_ok(), "Duplicate edges should be a warning, not error");
944            if let Ok(report) = result {
945                let has_duplicate_warning = report.warnings.iter().any(|w| {
946                    matches!(w, ValidationError::DuplicateEdge(_, _))
947                });
948                prop_assert!(has_duplicate_warning, "Should have duplicate edge warning");
949            }
950        }
951    }
952
953    // Validation tests for advanced node types
954
955    #[test]
956    fn test_switch_node_empty_expression() {
957        use crate::{SwitchCase, SwitchConfig};
958
959        let start = Node::new("Start".to_string(), NodeKind::Start);
960        let switch = Node::new(
961            "Switch".to_string(),
962            NodeKind::Switch(SwitchConfig {
963                switch_on: "".to_string(), // Empty expression
964                cases: vec![SwitchCase {
965                    match_value: "success".to_string(),
966                    action: "action1".to_string(),
967                }],
968                default_case: None,
969            }),
970        );
971        let end = Node::new("End".to_string(), NodeKind::End);
972
973        let workflow = Workflow {
974            metadata: WorkflowMetadata::new("Test".to_string()),
975            nodes: vec![start.clone(), switch.clone(), end.clone()],
976            edges: vec![Edge::new(start.id, switch.id), Edge::new(switch.id, end.id)],
977        };
978
979        let result = WorkflowValidator::validate(&workflow);
980        assert!(matches!(
981            result,
982            Err(ValidationError::SwitchNodeEmptyExpression(_))
983        ));
984    }
985
986    #[test]
987    fn test_switch_node_no_cases() {
988        use crate::SwitchConfig;
989
990        let start = Node::new("Start".to_string(), NodeKind::Start);
991        let switch = Node::new(
992            "Switch".to_string(),
993            NodeKind::Switch(SwitchConfig {
994                switch_on: "{{status}}".to_string(),
995                cases: vec![], // No cases
996                default_case: None,
997            }),
998        );
999        let end = Node::new("End".to_string(), NodeKind::End);
1000
1001        let workflow = Workflow {
1002            metadata: WorkflowMetadata::new("Test".to_string()),
1003            nodes: vec![start.clone(), switch.clone(), end.clone()],
1004            edges: vec![Edge::new(start.id, switch.id), Edge::new(switch.id, end.id)],
1005        };
1006
1007        let result = WorkflowValidator::validate(&workflow);
1008        assert!(matches!(result, Err(ValidationError::SwitchNodeNoCases(_))));
1009    }
1010
1011    #[test]
1012    fn test_parallel_node_no_tasks() {
1013        use crate::{ParallelConfig, ParallelStrategy};
1014
1015        let start = Node::new("Start".to_string(), NodeKind::Start);
1016        let parallel = Node::new(
1017            "Parallel".to_string(),
1018            NodeKind::Parallel(ParallelConfig {
1019                strategy: ParallelStrategy::WaitAll,
1020                tasks: vec![], // No tasks
1021                max_concurrency: None,
1022                timeout_ms: None,
1023            }),
1024        );
1025        let end = Node::new("End".to_string(), NodeKind::End);
1026
1027        let workflow = Workflow {
1028            metadata: WorkflowMetadata::new("Test".to_string()),
1029            nodes: vec![start.clone(), parallel.clone(), end.clone()],
1030            edges: vec![
1031                Edge::new(start.id, parallel.id),
1032                Edge::new(parallel.id, end.id),
1033            ],
1034        };
1035
1036        let result = WorkflowValidator::validate(&workflow);
1037        assert!(matches!(
1038            result,
1039            Err(ValidationError::ParallelNodeNoTasks(_))
1040        ));
1041    }
1042
1043    #[test]
1044    fn test_parallel_node_empty_expression() {
1045        use crate::{ParallelConfig, ParallelStrategy, ParallelTask};
1046
1047        let start = Node::new("Start".to_string(), NodeKind::Start);
1048        let parallel = Node::new(
1049            "Parallel".to_string(),
1050            NodeKind::Parallel(ParallelConfig {
1051                strategy: ParallelStrategy::WaitAll,
1052                tasks: vec![ParallelTask {
1053                    id: "task1".to_string(),
1054                    expression: "".to_string(), // Empty expression
1055                    description: None,
1056                }],
1057                max_concurrency: None,
1058                timeout_ms: None,
1059            }),
1060        );
1061        let end = Node::new("End".to_string(), NodeKind::End);
1062
1063        let workflow = Workflow {
1064            metadata: WorkflowMetadata::new("Test".to_string()),
1065            nodes: vec![start.clone(), parallel.clone(), end.clone()],
1066            edges: vec![
1067                Edge::new(start.id, parallel.id),
1068                Edge::new(parallel.id, end.id),
1069            ],
1070        };
1071
1072        let result = WorkflowValidator::validate(&workflow);
1073        assert!(matches!(
1074            result,
1075            Err(ValidationError::ParallelTaskEmptyExpression(_, _))
1076        ));
1077    }
1078
1079    #[test]
1080    fn test_parallel_node_duplicate_task_id() {
1081        use crate::{ParallelConfig, ParallelStrategy, ParallelTask};
1082
1083        let start = Node::new("Start".to_string(), NodeKind::Start);
1084        let parallel = Node::new(
1085            "Parallel".to_string(),
1086            NodeKind::Parallel(ParallelConfig {
1087                strategy: ParallelStrategy::WaitAll,
1088                tasks: vec![
1089                    ParallelTask {
1090                        id: "task1".to_string(),
1091                        expression: "{{expr1}}".to_string(),
1092                        description: None,
1093                    },
1094                    ParallelTask {
1095                        id: "task1".to_string(), // Duplicate ID
1096                        expression: "{{expr2}}".to_string(),
1097                        description: None,
1098                    },
1099                ],
1100                max_concurrency: None,
1101                timeout_ms: None,
1102            }),
1103        );
1104        let end = Node::new("End".to_string(), NodeKind::End);
1105
1106        let workflow = Workflow {
1107            metadata: WorkflowMetadata::new("Test".to_string()),
1108            nodes: vec![start.clone(), parallel.clone(), end.clone()],
1109            edges: vec![
1110                Edge::new(start.id, parallel.id),
1111                Edge::new(parallel.id, end.id),
1112            ],
1113        };
1114
1115        let result = WorkflowValidator::validate(&workflow);
1116        assert!(matches!(
1117            result,
1118            Err(ValidationError::ParallelDuplicateTaskId(_, _))
1119        ));
1120    }
1121
1122    #[test]
1123    fn test_approval_node_empty_message() {
1124        use crate::ApprovalConfig;
1125
1126        let start = Node::new("Start".to_string(), NodeKind::Start);
1127        let approval = Node::new(
1128            "Approval".to_string(),
1129            NodeKind::Approval(ApprovalConfig {
1130                message: "".to_string(), // Empty message
1131                description: None,
1132                approvers: vec![],
1133                timeout_seconds: None,
1134                context_data: serde_json::Value::Null,
1135            }),
1136        );
1137        let end = Node::new("End".to_string(), NodeKind::End);
1138
1139        let workflow = Workflow {
1140            metadata: WorkflowMetadata::new("Test".to_string()),
1141            nodes: vec![start.clone(), approval.clone(), end.clone()],
1142            edges: vec![
1143                Edge::new(start.id, approval.id),
1144                Edge::new(approval.id, end.id),
1145            ],
1146        };
1147
1148        let result = WorkflowValidator::validate(&workflow);
1149        assert!(matches!(
1150            result,
1151            Err(ValidationError::ApprovalEmptyMessage(_))
1152        ));
1153    }
1154
1155    #[test]
1156    fn test_form_node_no_fields() {
1157        use crate::FormConfig;
1158
1159        let start = Node::new("Start".to_string(), NodeKind::Start);
1160        let form = Node::new(
1161            "Form".to_string(),
1162            NodeKind::Form(FormConfig {
1163                title: "Test Form".to_string(),
1164                description: None,
1165                fields: vec![], // No fields
1166                timeout_seconds: None,
1167                allowed_submitters: vec![],
1168            }),
1169        );
1170        let end = Node::new("End".to_string(), NodeKind::End);
1171
1172        let workflow = Workflow {
1173            metadata: WorkflowMetadata::new("Test".to_string()),
1174            nodes: vec![start.clone(), form.clone(), end.clone()],
1175            edges: vec![Edge::new(start.id, form.id), Edge::new(form.id, end.id)],
1176        };
1177
1178        let result = WorkflowValidator::validate(&workflow);
1179        assert!(matches!(result, Err(ValidationError::FormNoFields(_))));
1180    }
1181
1182    #[test]
1183    fn test_form_node_duplicate_field_id() {
1184        use crate::{FormConfig, FormField, FormFieldType};
1185
1186        let start = Node::new("Start".to_string(), NodeKind::Start);
1187        let form = Node::new(
1188            "Form".to_string(),
1189            NodeKind::Form(FormConfig {
1190                title: "Test Form".to_string(),
1191                description: None,
1192                fields: vec![
1193                    FormField {
1194                        id: "field1".to_string(),
1195                        label: "Field 1".to_string(),
1196                        field_type: FormFieldType::Text,
1197                        required: false,
1198                        default_value: None,
1199                        validation: None,
1200                        options: vec![],
1201                    },
1202                    FormField {
1203                        id: "field1".to_string(), // Duplicate ID
1204                        label: "Field 2".to_string(),
1205                        field_type: FormFieldType::Text,
1206                        required: false,
1207                        default_value: None,
1208                        validation: None,
1209                        options: vec![],
1210                    },
1211                ],
1212                timeout_seconds: None,
1213                allowed_submitters: vec![],
1214            }),
1215        );
1216        let end = Node::new("End".to_string(), NodeKind::End);
1217
1218        let workflow = Workflow {
1219            metadata: WorkflowMetadata::new("Test".to_string()),
1220            nodes: vec![start.clone(), form.clone(), end.clone()],
1221            edges: vec![Edge::new(start.id, form.id), Edge::new(form.id, end.id)],
1222        };
1223
1224        let result = WorkflowValidator::validate(&workflow);
1225        assert!(matches!(
1226            result,
1227            Err(ValidationError::FormDuplicateFieldId(_, _))
1228        ));
1229    }
1230
1231    #[test]
1232    fn test_valid_switch_node() {
1233        use crate::{SwitchCase, SwitchConfig};
1234
1235        let start = Node::new("Start".to_string(), NodeKind::Start);
1236        let switch = Node::new(
1237            "Switch".to_string(),
1238            NodeKind::Switch(SwitchConfig {
1239                switch_on: "{{status}}".to_string(),
1240                cases: vec![
1241                    SwitchCase {
1242                        match_value: "success".to_string(),
1243                        action: "action1".to_string(),
1244                    },
1245                    SwitchCase {
1246                        match_value: "error".to_string(),
1247                        action: "action2".to_string(),
1248                    },
1249                ],
1250                default_case: Some("default_action".to_string()),
1251            }),
1252        );
1253        let end = Node::new("End".to_string(), NodeKind::End);
1254
1255        let workflow = Workflow {
1256            metadata: WorkflowMetadata::new("Test".to_string()),
1257            nodes: vec![start.clone(), switch.clone(), end.clone()],
1258            edges: vec![Edge::new(start.id, switch.id), Edge::new(switch.id, end.id)],
1259        };
1260
1261        let result = WorkflowValidator::validate(&workflow);
1262        assert!(result.is_ok());
1263    }
1264
1265    #[test]
1266    fn test_valid_parallel_node() {
1267        use crate::{ParallelConfig, ParallelStrategy, ParallelTask};
1268
1269        let start = Node::new("Start".to_string(), NodeKind::Start);
1270        let parallel = Node::new(
1271            "Parallel".to_string(),
1272            NodeKind::Parallel(ParallelConfig {
1273                strategy: ParallelStrategy::Race,
1274                tasks: vec![
1275                    ParallelTask {
1276                        id: "task1".to_string(),
1277                        expression: "{{query1}}".to_string(),
1278                        description: Some("First task".to_string()),
1279                    },
1280                    ParallelTask {
1281                        id: "task2".to_string(),
1282                        expression: "{{query2}}".to_string(),
1283                        description: Some("Second task".to_string()),
1284                    },
1285                ],
1286                max_concurrency: Some(2),
1287                timeout_ms: Some(10000),
1288            }),
1289        );
1290        let end = Node::new("End".to_string(), NodeKind::End);
1291
1292        let workflow = Workflow {
1293            metadata: WorkflowMetadata::new("Test".to_string()),
1294            nodes: vec![start.clone(), parallel.clone(), end.clone()],
1295            edges: vec![
1296                Edge::new(start.id, parallel.id),
1297                Edge::new(parallel.id, end.id),
1298            ],
1299        };
1300
1301        let result = WorkflowValidator::validate(&workflow);
1302        assert!(result.is_ok());
1303    }
1304}