Skip to main content

punch_kernel/
workflow.rs

1//! Multi-step agent workflow engine with DAG execution.
2//!
3//! The [`WorkflowEngine`] allows registering named workflows composed of
4//! sequential steps or DAG-structured steps with parallel fan-out, conditional
5//! branching, loops, and advanced error handling.
6//!
7//! ## Variable substitution
8//!
9//! Prompt templates support:
10//! - `{{input}}` / `{{previous_output}}` — current pipeline input
11//! - `{{step_name}}` — name of the current step
12//! - `{{step_N}}` — output of step N (1-indexed, sequential mode)
13//! - `{{some_step_name}}` — output of a step by name
14//! - `{{step_name.output}}` — explicit step output reference
15//! - `{{step_name.status}}` — step completion status
16//! - `{{step_name.duration_ms}}` — step duration
17//! - `{{loop.index}}` — current loop iteration
18//! - `{{loop.item}}` — current loop item (ForEach)
19//! - `{{step_name.output.field.nested}}` — JSON path into step output
20//! - `{{step_name.output | uppercase}}` — data transformation
21
22use std::collections::HashMap;
23use std::sync::Arc;
24use std::time::Instant;
25
26use chrono::{DateTime, Utc};
27use dashmap::DashMap;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, error, info, instrument, warn};
30use uuid::Uuid;
31
32use punch_memory::MemorySubstrate;
33use punch_runtime::{FighterLoopParams, LlmDriver, run_fighter_loop, tools_for_capabilities};
34use punch_types::{FighterId, FighterManifest, ModelConfig, PunchError, PunchResult, WeightClass};
35
36use crate::workflow_conditions::{Condition, evaluate_condition};
37use crate::workflow_loops::{LoopConfig, LoopState, calculate_backoff, parse_foreach_items};
38use crate::workflow_validation::{ValidationError, topological_sort, validate_workflow};
39
40// ---------------------------------------------------------------------------
41// ID types
42// ---------------------------------------------------------------------------
43
44/// Unique identifier for a workflow definition.
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46#[serde(transparent)]
47pub struct WorkflowId(pub Uuid);
48
49impl WorkflowId {
50    pub fn new() -> Self {
51        Self(Uuid::new_v4())
52    }
53}
54
55impl Default for WorkflowId {
56    fn default() -> Self {
57        Self::new()
58    }
59}
60
61impl std::fmt::Display for WorkflowId {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        write!(f, "{}", self.0)
64    }
65}
66
67/// Unique identifier for a workflow run (execution instance).
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
69#[serde(transparent)]
70pub struct WorkflowRunId(pub Uuid);
71
72impl WorkflowRunId {
73    pub fn new() -> Self {
74        Self(Uuid::new_v4())
75    }
76}
77
78impl Default for WorkflowRunId {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl std::fmt::Display for WorkflowRunId {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "{}", self.0)
87    }
88}
89
90// ---------------------------------------------------------------------------
91// Workflow types
92// ---------------------------------------------------------------------------
93
94/// What to do when a workflow step fails.
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
96#[serde(rename_all = "snake_case")]
97#[derive(Default)]
98pub enum OnError {
99    /// Abort the entire workflow.
100    #[default]
101    FailWorkflow,
102    /// Skip the failed step and continue.
103    SkipStep,
104    /// Retry the step once, then fail if it fails again.
105    RetryOnce,
106    /// On error, run a fallback step instead.
107    Fallback { step: String },
108    /// Run an error handler step, then continue the workflow.
109    CatchAndContinue { error_handler: String },
110    /// Stop trying after N consecutive failures, with a cooldown.
111    CircuitBreaker {
112        max_failures: usize,
113        cooldown_secs: u64,
114    },
115}
116
117/// Per-step execution status.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum StepStatus {
121    Pending,
122    Running,
123    Completed,
124    Failed,
125    Skipped,
126    Cancelled,
127}
128
129impl std::fmt::Display for StepStatus {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        match self {
132            Self::Pending => write!(f, "pending"),
133            Self::Running => write!(f, "running"),
134            Self::Completed => write!(f, "completed"),
135            Self::Failed => write!(f, "failed"),
136            Self::Skipped => write!(f, "skipped"),
137            Self::Cancelled => write!(f, "cancelled"),
138        }
139    }
140}
141
142/// A single step within a sequential workflow (legacy format, still supported).
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct WorkflowStep {
145    /// Human-readable name for this step.
146    pub name: String,
147    /// The fighter name to use for this step.
148    pub fighter_name: String,
149    /// Prompt template with variable substitution.
150    pub prompt_template: String,
151    /// Maximum time in seconds for this step (default 120).
152    pub timeout_secs: Option<u64>,
153    /// Error handling strategy.
154    #[serde(default)]
155    pub on_error: OnError,
156}
157
158/// A single step within a DAG workflow.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct DagWorkflowStep {
161    /// Human-readable name for this step (must be unique within the workflow).
162    pub name: String,
163    /// The fighter name to use for this step.
164    pub fighter_name: String,
165    /// Prompt template with variable substitution.
166    pub prompt_template: String,
167    /// Maximum time in seconds for this step (default 120).
168    pub timeout_secs: Option<u64>,
169    /// Error handling strategy.
170    #[serde(default)]
171    pub on_error: OnError,
172    /// Steps that must complete before this one runs.
173    #[serde(default)]
174    pub depends_on: Vec<String>,
175    /// Optional condition — step is skipped if condition evaluates to false.
176    #[serde(default)]
177    pub condition: Option<Condition>,
178    /// If condition is false, run this step instead (if/else branching).
179    #[serde(default)]
180    pub else_step: Option<String>,
181    /// Optional loop configuration.
182    #[serde(default)]
183    pub loop_config: Option<LoopConfig>,
184}
185
186impl DagWorkflowStep {
187    /// Extract the fallback step name from the on_error strategy, if any.
188    pub fn fallback_step(&self) -> Option<String> {
189        match &self.on_error {
190            OnError::Fallback { step } => Some(step.clone()),
191            OnError::CatchAndContinue { error_handler } => Some(error_handler.clone()),
192            _ => None,
193        }
194    }
195}
196
197/// A workflow definition composed of sequential steps (legacy).
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct Workflow {
200    /// Unique identifier.
201    pub id: WorkflowId,
202    /// Human-readable name.
203    pub name: String,
204    /// Ordered steps to execute.
205    pub steps: Vec<WorkflowStep>,
206}
207
208/// A DAG workflow definition with parallel execution support.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct DagWorkflow {
211    /// Unique identifier.
212    pub id: WorkflowId,
213    /// Human-readable name.
214    pub name: String,
215    /// DAG steps (order in vec doesn't matter — execution order is determined by dependencies).
216    pub steps: Vec<DagWorkflowStep>,
217}
218
219/// Status of a workflow run.
220#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
221#[serde(rename_all = "snake_case")]
222pub enum WorkflowRunStatus {
223    Pending,
224    Running,
225    Completed,
226    Failed,
227    /// Some branches succeeded, some failed.
228    PartiallyCompleted,
229}
230
231impl std::fmt::Display for WorkflowRunStatus {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        match self {
234            Self::Pending => write!(f, "pending"),
235            Self::Running => write!(f, "running"),
236            Self::Completed => write!(f, "completed"),
237            Self::Failed => write!(f, "failed"),
238            Self::PartiallyCompleted => write!(f, "partially_completed"),
239        }
240    }
241}
242
243/// Result of executing a single workflow step.
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct StepResult {
246    /// Name of the step.
247    pub step_name: String,
248    /// The response text from the fighter.
249    pub response: String,
250    /// Tokens consumed.
251    pub tokens_used: u64,
252    /// Duration in milliseconds.
253    pub duration_ms: u64,
254    /// Error message, if any.
255    pub error: Option<String>,
256    /// Per-step status.
257    #[serde(default = "default_step_status")]
258    pub status: StepStatus,
259    /// When the step started executing.
260    #[serde(default)]
261    pub started_at: Option<DateTime<Utc>>,
262    /// When the step finished executing.
263    #[serde(default)]
264    pub completed_at: Option<DateTime<Utc>>,
265}
266
267fn default_step_status() -> StepStatus {
268    StepStatus::Pending
269}
270
271/// A failed step result stored in the dead letter queue.
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub struct DeadLetterEntry {
274    /// The step name that failed.
275    pub step_name: String,
276    /// The error message.
277    pub error: String,
278    /// The input that was provided to the step.
279    pub input: String,
280    /// When the failure occurred.
281    pub failed_at: DateTime<Utc>,
282}
283
284/// A single execution of a workflow.
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct WorkflowRun {
287    /// Unique run identifier.
288    pub id: WorkflowRunId,
289    /// The workflow that was executed.
290    pub workflow_id: WorkflowId,
291    /// Current status.
292    pub status: WorkflowRunStatus,
293    /// Results of each completed step.
294    pub step_results: Vec<StepResult>,
295    /// When the run started.
296    pub started_at: DateTime<Utc>,
297    /// When the run completed (or failed).
298    pub completed_at: Option<DateTime<Utc>>,
299    /// Dead letter queue for failed steps.
300    #[serde(default)]
301    pub dead_letters: Vec<DeadLetterEntry>,
302    /// Execution trace showing which steps ran in parallel.
303    #[serde(default)]
304    pub execution_trace: Vec<ExecutionTraceEntry>,
305}
306
307/// An entry in the execution trace showing what happened at each "wave" of execution.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct ExecutionTraceEntry {
310    /// Steps that executed in this wave (parallel batch).
311    pub steps: Vec<String>,
312    /// When this wave started.
313    pub started_at: DateTime<Utc>,
314    /// When this wave completed.
315    pub completed_at: Option<DateTime<Utc>>,
316}
317
318// ---------------------------------------------------------------------------
319// Variable substitution
320// ---------------------------------------------------------------------------
321
322/// Replace template variables in a prompt string (sequential mode).
323///
324/// Supported variables:
325/// - `{{input}}` — the current input (original input or previous step's output)
326/// - `{{previous_output}}` — alias for `{{input}}`
327/// - `{{step_name}}` — the name of the current step
328/// - `{{step_1}}` / `{{step_N}}` — output of step N (1-indexed)
329/// - `{{some_step_name}}` — output of a step referenced by its name
330fn expand_variables(
331    template: &str,
332    current_input: &str,
333    step_name: &str,
334    step_results: &[StepResult],
335) -> String {
336    let mut result = template.to_string();
337
338    // {{input}} and {{previous_output}} both resolve to the current pipeline input
339    result = result.replace("{{input}}", current_input);
340    result = result.replace("{{previous_output}}", current_input);
341
342    // {{step_name}} resolves to the current step's name
343    result = result.replace("{{step_name}}", step_name);
344
345    // {{step_N}} resolves to the output of the Nth step (1-indexed)
346    for (i, sr) in step_results.iter().enumerate() {
347        let var = format!("{{{{step_{}}}}}", i + 1);
348        result = result.replace(&var, &sr.response);
349    }
350
351    // {{step_result_name}} resolves to the output of a step by name
352    for sr in step_results {
353        let var = format!("{{{{{}}}}}", sr.step_name);
354        result = result.replace(&var, &sr.response);
355    }
356
357    result
358}
359
360/// Replace template variables in a prompt string (DAG mode).
361///
362/// Supports all the sequential variables plus:
363/// - `{{step_name.output}}` — explicit output reference
364/// - `{{step_name.status}}` — step status
365/// - `{{step_name.duration_ms}}` — step duration
366/// - `{{loop.index}}` — current loop iteration
367/// - `{{loop.item}}` — current loop item
368/// - `{{step_name.output.field.nested}}` — JSON path
369/// - `{{step_name.output | uppercase}}` — transformations
370pub fn expand_dag_variables(
371    template: &str,
372    current_input: &str,
373    step_name: &str,
374    step_results: &HashMap<String, StepResult>,
375    loop_state: Option<&LoopState>,
376) -> String {
377    let mut result = template.to_string();
378
379    // Basic variables
380    result = result.replace("{{input}}", current_input);
381    result = result.replace("{{previous_output}}", current_input);
382    result = result.replace("{{step_name}}", step_name);
383
384    // Loop variables
385    if let Some(ls) = loop_state {
386        result = result.replace("{{loop.index}}", &ls.index.to_string());
387        if let Some(ref item) = ls.item {
388            result = result.replace("{{loop.item}}", item);
389        }
390    }
391
392    // Process {{name.property}} and {{name.output.path}} patterns
393    // We need to find all {{...}} patterns and resolve them
394    let mut output = String::with_capacity(result.len());
395    let mut remaining = result.as_str();
396
397    while let Some(start) = remaining.find("{{") {
398        output.push_str(&remaining[..start]);
399        let after_start = &remaining[start + 2..];
400        if let Some(end) = after_start.find("}}") {
401            let var_content = &after_start[..end];
402            let resolved = resolve_dag_variable(var_content, step_results);
403            output.push_str(&resolved);
404            remaining = &after_start[end + 2..];
405        } else {
406            output.push_str("{{");
407            remaining = after_start;
408        }
409    }
410    output.push_str(remaining);
411
412    output
413}
414
415/// Resolve a single variable expression like `step_name.output` or `step_name.output | uppercase`.
416fn resolve_dag_variable(var: &str, step_results: &HashMap<String, StepResult>) -> String {
417    // Check for pipe transformation: `expr | transform`
418    let (expr, transform) = if let Some(pipe_pos) = var.find(" | ") {
419        let expr = var[..pipe_pos].trim();
420        let transform = var[pipe_pos + 3..].trim();
421        (expr, Some(transform))
422    } else {
423        (var.trim(), None)
424    };
425
426    // Resolve the expression
427    let value = resolve_dag_expression(expr, step_results);
428
429    // Apply transformation if present
430    match transform {
431        Some("uppercase") => value.to_uppercase(),
432        Some("lowercase") => value.to_lowercase(),
433        Some("trim") => value.trim().to_string(),
434        Some("len") | Some("length") => value.len().to_string(),
435        Some(t) if t.starts_with("json_extract ") => {
436            let path = t
437                .strip_prefix("json_extract ")
438                .unwrap_or("")
439                .trim_matches('"');
440            json_path_extract(&value, path)
441        }
442        _ => value,
443    }
444}
445
446/// Resolve a dotted expression like `step_name.output.field.nested`.
447fn resolve_dag_expression(expr: &str, step_results: &HashMap<String, StepResult>) -> String {
448    let parts: Vec<&str> = expr.splitn(2, '.').collect();
449    if parts.len() < 2 {
450        // Plain step name reference
451        return step_results
452            .get(parts[0])
453            .map(|r| r.response.clone())
454            .unwrap_or_else(|| format!("{{{{{expr}}}}}"));
455    }
456
457    let step_name = parts[0];
458    let property = parts[1];
459
460    let step_result = match step_results.get(step_name) {
461        Some(r) => r,
462        None => return format!("{{{{{expr}}}}}"),
463    };
464
465    match property {
466        "output" => step_result.response.clone(),
467        "status" => step_result.status.to_string(),
468        "duration_ms" => step_result.duration_ms.to_string(),
469        "error" => step_result
470            .error
471            .clone()
472            .unwrap_or_else(|| "none".to_string()),
473        _ if property.starts_with("output.") => {
474            let json_path = property.strip_prefix("output.").unwrap_or("");
475            json_path_extract(&step_result.response, json_path)
476        }
477        _ => format!("{{{{{expr}}}}}"),
478    }
479}
480
481/// Extract a value from a JSON string using a dot-separated path.
482///
483/// Supports paths like `field`, `field.nested`, `$.key` (strips leading `$.`).
484fn json_path_extract(json_str: &str, path: &str) -> String {
485    let path = path.strip_prefix("$.").unwrap_or(path);
486    let parsed: serde_json::Value = match serde_json::from_str(json_str) {
487        Ok(v) => v,
488        Err(_) => return json_str.to_string(),
489    };
490
491    let mut current = &parsed;
492    for segment in path.split('.') {
493        if segment.is_empty() {
494            continue;
495        }
496        match current.get(segment) {
497            Some(v) => current = v,
498            None => return String::new(),
499        }
500    }
501
502    match current {
503        serde_json::Value::String(s) => s.clone(),
504        other => other.to_string(),
505    }
506}
507
508// ---------------------------------------------------------------------------
509// Circuit breaker state
510// ---------------------------------------------------------------------------
511
512/// Tracks circuit breaker state per-step across workflow runs.
513#[derive(Debug, Clone, Default)]
514pub struct CircuitBreakerState {
515    /// Number of consecutive failures.
516    pub consecutive_failures: usize,
517    /// When the circuit was last tripped (entered open state).
518    pub last_trip_time: Option<Instant>,
519}
520
521impl CircuitBreakerState {
522    /// Check if the circuit is currently open (blocking execution).
523    pub fn is_open(&self, max_failures: usize, cooldown_secs: u64) -> bool {
524        if self.consecutive_failures < max_failures {
525            return false;
526        }
527        // Check if cooldown has elapsed
528        match self.last_trip_time {
529            Some(trip_time) => trip_time.elapsed().as_secs() < cooldown_secs,
530            None => true,
531        }
532    }
533
534    /// Record a failure.
535    pub fn record_failure(&mut self) {
536        self.consecutive_failures += 1;
537        self.last_trip_time = Some(Instant::now());
538    }
539
540    /// Record a success, resetting the counter.
541    pub fn record_success(&mut self) {
542        self.consecutive_failures = 0;
543        self.last_trip_time = None;
544    }
545}
546
547// ---------------------------------------------------------------------------
548// DAG Executor (testable without LLM)
549// ---------------------------------------------------------------------------
550
551/// A step executor trait that allows testing the DAG engine without real LLM calls.
552#[async_trait::async_trait]
553pub trait StepExecutor: Send + Sync {
554    /// Execute a single step and return its result.
555    async fn execute(
556        &self,
557        step: &DagWorkflowStep,
558        input: &str,
559        step_results: &HashMap<String, StepResult>,
560        loop_state: Option<&LoopState>,
561    ) -> Result<StepResult, String>;
562}
563
564/// Execute a DAG workflow using the provided step executor.
565///
566/// This is the core DAG execution engine. Steps with no dependencies (roots) run
567/// first. When a step completes, any step whose dependencies are now all satisfied
568/// is scheduled. Steps with no mutual dependencies run concurrently using
569/// `tokio::task::JoinSet` for true multi-threaded parallelism.
570pub async fn execute_dag(
571    workflow_name: &str,
572    steps: &[DagWorkflowStep],
573    input: &str,
574    executor: Arc<dyn StepExecutor>,
575) -> DagExecutionResult {
576    // Validate first
577    let validation_errors = validate_workflow(steps);
578    if !validation_errors.is_empty() {
579        return DagExecutionResult {
580            status: WorkflowRunStatus::Failed,
581            step_results: HashMap::new(),
582            dead_letters: Vec::new(),
583            execution_trace: Vec::new(),
584            validation_errors,
585        };
586    }
587
588    // Get topological order
589    let topo_order = match topological_sort(steps) {
590        Ok(order) => order,
591        Err(_) => {
592            return DagExecutionResult {
593                status: WorkflowRunStatus::Failed,
594                step_results: HashMap::new(),
595                dead_letters: Vec::new(),
596                execution_trace: Vec::new(),
597                validation_errors: vec![ValidationError::CycleDetected {
598                    steps: steps.iter().map(|s| s.name.clone()).collect(),
599                }],
600            };
601        }
602    };
603
604    let step_map: HashMap<&str, &DagWorkflowStep> =
605        steps.iter().map(|s| (s.name.as_str(), s)).collect();
606
607    let mut completed: HashMap<String, StepResult> = HashMap::new();
608    let mut dead_letters: Vec<DeadLetterEntry> = Vec::new();
609    let mut execution_trace: Vec<ExecutionTraceEntry> = Vec::new();
610    let mut circuit_breakers: HashMap<String, CircuitBreakerState> = HashMap::new();
611    let mut skipped_steps: std::collections::HashSet<String> = std::collections::HashSet::new();
612    let mut failed_steps: std::collections::HashSet<String> = std::collections::HashSet::new();
613
614    // Process in waves: each wave contains steps whose dependencies are all satisfied
615    let mut remaining: Vec<String> = topo_order;
616
617    while !remaining.is_empty() {
618        // Find all steps that can run now (all deps satisfied)
619        let (ready, not_ready): (Vec<String>, Vec<String>) =
620            remaining.into_iter().partition(|name| {
621                let step = match step_map.get(name.as_str()) {
622                    Some(s) => s,
623                    None => return false,
624                };
625                step.depends_on.iter().all(|dep| {
626                    // A dependency is satisfied if it completed (not in failed_steps)
627                    // or was explicitly skipped/handled
628                    let is_done = completed.contains_key(dep) || skipped_steps.contains(dep);
629                    let is_blocking_failure = failed_steps.contains(dep);
630                    is_done && !is_blocking_failure
631                })
632            });
633
634        if ready.is_empty() {
635            // No progress possible — remaining steps have unmet deps (likely due to failures)
636            for name in &not_ready {
637                skipped_steps.insert(name.clone());
638                completed.insert(
639                    name.clone(),
640                    StepResult {
641                        step_name: name.clone(),
642                        response: String::new(),
643                        tokens_used: 0,
644                        duration_ms: 0,
645                        error: Some("cancelled: unmet dependencies".to_string()),
646                        status: StepStatus::Cancelled,
647                        started_at: None,
648                        completed_at: None,
649                    },
650                );
651            }
652            break;
653        }
654
655        remaining = not_ready;
656
657        let wave_start = Utc::now();
658        let wave_step_names: Vec<String> = ready.to_vec();
659
660        // Execute all ready steps concurrently using tokio::task::JoinSet
661        // for true multi-threaded parallelism.
662        let mut wave_results: Vec<(String, Result<StepResult, String>, Option<String>)> =
663            Vec::new();
664        let mut join_set: tokio::task::JoinSet<(
665            String,
666            Result<StepResult, String>,
667            Option<String>,
668        )> = tokio::task::JoinSet::new();
669
670        for step_name in &wave_step_names {
671            let step = match step_map.get(step_name.as_str()) {
672                Some(s) => (*s).clone(),
673                None => continue,
674            };
675
676            // Check condition
677            let should_run = match &step.condition {
678                Some(cond) => evaluate_condition(cond, &completed),
679                None => true,
680            };
681
682            if !should_run {
683                let else_step_name = step.else_step.clone();
684                wave_results.push((
685                    step_name.clone(),
686                    Ok(StepResult {
687                        step_name: step_name.clone(),
688                        response: String::new(),
689                        tokens_used: 0,
690                        duration_ms: 0,
691                        error: None,
692                        status: StepStatus::Skipped,
693                        started_at: Some(Utc::now()),
694                        completed_at: Some(Utc::now()),
695                    }),
696                    else_step_name,
697                ));
698                continue;
699            }
700
701            // Check circuit breaker
702            let cb_state = circuit_breakers
703                .entry(step_name.clone())
704                .or_default()
705                .clone();
706            if let OnError::CircuitBreaker {
707                max_failures,
708                cooldown_secs,
709            } = &step.on_error
710                && cb_state.is_open(*max_failures, *cooldown_secs)
711            {
712                wave_results.push((
713                    step_name.clone(),
714                    Ok(StepResult {
715                        step_name: step_name.clone(),
716                        response: String::new(),
717                        tokens_used: 0,
718                        duration_ms: 0,
719                        error: Some("circuit breaker open".to_string()),
720                        status: StepStatus::Failed,
721                        started_at: Some(Utc::now()),
722                        completed_at: Some(Utc::now()),
723                    }),
724                    None,
725                ));
726                continue;
727            }
728
729            let sn = step_name.clone();
730            let completed_snapshot = completed.clone();
731            let input_clone = input.to_string();
732            let executor_clone = Arc::clone(&executor);
733
734            join_set.spawn(async move {
735                let result = execute_step_with_loops(
736                    &step,
737                    &input_clone,
738                    &completed_snapshot,
739                    executor_clone.as_ref(),
740                )
741                .await;
742                (sn, result, None::<String>)
743            });
744        }
745
746        // Wait for all spawned tasks to complete
747        while let Some(join_result) = join_set.join_next().await {
748            match join_result {
749                Ok(task_result) => wave_results.push(task_result),
750                Err(join_err) => {
751                    // A JoinError means the task panicked or was cancelled
752                    error!(error = %join_err, "spawned step task failed unexpectedly");
753                }
754            }
755        }
756
757        // Process results
758        for (step_name, result, _else_step) in wave_results {
759            match result {
760                Ok(mut step_result) => {
761                    if step_result.status == StepStatus::Skipped {
762                        skipped_steps.insert(step_name.clone());
763                        debug!(step = %step_name, workflow = %workflow_name, "step skipped (condition false)");
764                    } else if step_result.error.is_some() {
765                        failed_steps.insert(step_name.clone());
766                        // Update circuit breaker
767                        circuit_breakers
768                            .entry(step_name.clone())
769                            .or_default()
770                            .record_failure();
771
772                        let step = step_map.get(step_name.as_str());
773                        if let Some(step) = step {
774                            match &step.on_error {
775                                OnError::Fallback { step: fb_step } => {
776                                    // Try to execute fallback
777                                    if let Some(fb) = step_map.get(fb_step.as_str()) {
778                                        let fb_result =
779                                            executor.execute(fb, input, &completed, None).await;
780                                        match fb_result {
781                                            Ok(fb_res) => {
782                                                step_result = fb_res;
783                                                step_result.step_name = step_name.clone();
784                                                failed_steps.remove(&step_name);
785                                            }
786                                            Err(fb_err) => {
787                                                dead_letters.push(DeadLetterEntry {
788                                                    step_name: step_name.clone(),
789                                                    error: fb_err,
790                                                    input: input.to_string(),
791                                                    failed_at: Utc::now(),
792                                                });
793                                            }
794                                        }
795                                    }
796                                }
797                                OnError::CatchAndContinue { error_handler } => {
798                                    // Run the error handler
799                                    if let Some(handler) = step_map.get(error_handler.as_str()) {
800                                        let _ = executor
801                                            .execute(handler, input, &completed, None)
802                                            .await;
803                                    }
804                                    // Continue anyway — mark as handled
805                                    failed_steps.remove(&step_name);
806                                }
807                                OnError::SkipStep => {
808                                    skipped_steps.insert(step_name.clone());
809                                    failed_steps.remove(&step_name);
810                                }
811                                OnError::FailWorkflow => {
812                                    dead_letters.push(DeadLetterEntry {
813                                        step_name: step_name.clone(),
814                                        error: step_result.error.clone().unwrap_or_default(),
815                                        input: input.to_string(),
816                                        failed_at: Utc::now(),
817                                    });
818                                }
819                                _ => {}
820                            }
821                        }
822                    } else {
823                        // Success
824                        circuit_breakers
825                            .entry(step_name.clone())
826                            .or_default()
827                            .record_success();
828                        info!(step = %step_name, workflow = %workflow_name, "DAG step completed");
829                    }
830                    completed.insert(step_name, step_result);
831                }
832                Err(e) => {
833                    failed_steps.insert(step_name.clone());
834                    circuit_breakers
835                        .entry(step_name.clone())
836                        .or_default()
837                        .record_failure();
838
839                    let mut step_result = StepResult {
840                        step_name: step_name.clone(),
841                        response: String::new(),
842                        tokens_used: 0,
843                        duration_ms: 0,
844                        error: Some(e.clone()),
845                        status: StepStatus::Failed,
846                        started_at: Some(Utc::now()),
847                        completed_at: Some(Utc::now()),
848                    };
849
850                    // Try error recovery strategies
851                    let step = step_map.get(step_name.as_str());
852                    if let Some(step) = step {
853                        match &step.on_error {
854                            OnError::Fallback { step: fb_step } => {
855                                if let Some(fb) = step_map.get(fb_step.as_str())
856                                    && let Ok(fb_res) =
857                                        executor.execute(fb, input, &completed, None).await
858                                {
859                                    step_result = fb_res;
860                                    step_result.step_name = step_name.clone();
861                                    step_result.error = None;
862                                    step_result.status = StepStatus::Completed;
863                                    failed_steps.remove(&step_name);
864                                }
865                            }
866                            OnError::CatchAndContinue { error_handler } => {
867                                if let Some(handler) = step_map.get(error_handler.as_str()) {
868                                    let _ =
869                                        executor.execute(handler, input, &completed, None).await;
870                                }
871                                failed_steps.remove(&step_name);
872                            }
873                            OnError::SkipStep => {
874                                step_result.status = StepStatus::Skipped;
875                                skipped_steps.insert(step_name.clone());
876                                failed_steps.remove(&step_name);
877                            }
878                            OnError::FailWorkflow => {
879                                dead_letters.push(DeadLetterEntry {
880                                    step_name: step_name.clone(),
881                                    error: e,
882                                    input: input.to_string(),
883                                    failed_at: Utc::now(),
884                                });
885                            }
886                            _ => {
887                                dead_letters.push(DeadLetterEntry {
888                                    step_name: step_name.clone(),
889                                    error: e,
890                                    input: input.to_string(),
891                                    failed_at: Utc::now(),
892                                });
893                            }
894                        }
895                    } else {
896                        dead_letters.push(DeadLetterEntry {
897                            step_name: step_name.clone(),
898                            error: e,
899                            input: input.to_string(),
900                            failed_at: Utc::now(),
901                        });
902                    }
903
904                    completed.insert(step_name, step_result);
905                }
906            }
907        }
908
909        execution_trace.push(ExecutionTraceEntry {
910            steps: wave_step_names,
911            started_at: wave_start,
912            completed_at: Some(Utc::now()),
913        });
914    }
915
916    // Determine final status
917    let has_failures = completed.values().any(|r| r.status == StepStatus::Failed);
918    let has_successes = completed
919        .values()
920        .any(|r| r.status == StepStatus::Completed);
921
922    let status = if has_failures && has_successes {
923        WorkflowRunStatus::PartiallyCompleted
924    } else if has_failures {
925        WorkflowRunStatus::Failed
926    } else {
927        WorkflowRunStatus::Completed
928    };
929
930    DagExecutionResult {
931        status,
932        step_results: completed,
933        dead_letters,
934        execution_trace,
935        validation_errors: Vec::new(),
936    }
937}
938
939/// Execute a step, handling loop configurations.
940async fn execute_step_with_loops(
941    step: &DagWorkflowStep,
942    input: &str,
943    completed: &HashMap<String, StepResult>,
944    executor: &dyn StepExecutor,
945) -> Result<StepResult, String> {
946    match &step.loop_config {
947        None => executor.execute(step, input, completed, None).await,
948        Some(LoopConfig::ForEach {
949            source_step,
950            max_iterations,
951        }) => {
952            let source_output = completed
953                .get(source_step)
954                .map(|r| r.response.as_str())
955                .unwrap_or("[]");
956            let items = parse_foreach_items(source_output)?;
957            let max = (*max_iterations).min(items.len());
958
959            let mut loop_state = LoopState::new();
960            let start = Utc::now();
961            let instant = Instant::now();
962
963            for (i, item) in items.into_iter().take(max).enumerate() {
964                loop_state.index = i;
965                loop_state.item = Some(item);
966
967                let result = executor
968                    .execute(step, input, completed, Some(&loop_state))
969                    .await;
970
971                match result {
972                    Ok(r) => {
973                        // Check for break/continue signals in output
974                        if r.response.contains("__BREAK__") {
975                            loop_state.push_result(r.response.replace("__BREAK__", ""));
976                            break;
977                        }
978                        if r.response.contains("__CONTINUE__") {
979                            continue;
980                        }
981                        loop_state.push_result(r.response);
982                    }
983                    Err(e) => return Err(e),
984                }
985            }
986
987            let combined = loop_state.accumulated_results.join("\n");
988            Ok(StepResult {
989                step_name: step.name.clone(),
990                response: combined,
991                tokens_used: 0,
992                duration_ms: instant.elapsed().as_millis() as u64,
993                error: None,
994                status: StepStatus::Completed,
995                started_at: Some(start),
996                completed_at: Some(Utc::now()),
997            })
998        }
999        Some(LoopConfig::While {
1000            condition,
1001            max_iterations,
1002        }) => {
1003            let mut loop_state = LoopState::new();
1004            let start = Utc::now();
1005            let instant = Instant::now();
1006
1007            for i in 0..*max_iterations {
1008                // Evaluate the condition with current completed results
1009                // For while loops, we add the accumulated results as a synthetic step
1010                let mut extended = completed.clone();
1011                if !loop_state.accumulated_results.is_empty() {
1012                    extended.insert(
1013                        step.name.clone(),
1014                        StepResult {
1015                            step_name: step.name.clone(),
1016                            response: loop_state
1017                                .accumulated_results
1018                                .last()
1019                                .cloned()
1020                                .unwrap_or_default(),
1021                            tokens_used: 0,
1022                            duration_ms: 0,
1023                            error: None,
1024                            status: StepStatus::Completed,
1025                            started_at: None,
1026                            completed_at: None,
1027                        },
1028                    );
1029                }
1030
1031                if !evaluate_condition(condition, &extended) {
1032                    break;
1033                }
1034
1035                loop_state.index = i;
1036                let result = executor
1037                    .execute(step, input, &extended, Some(&loop_state))
1038                    .await;
1039
1040                match result {
1041                    Ok(r) => {
1042                        if r.response.contains("__BREAK__") {
1043                            loop_state.push_result(r.response.replace("__BREAK__", ""));
1044                            break;
1045                        }
1046                        loop_state.push_result(r.response);
1047                    }
1048                    Err(e) => return Err(e),
1049                }
1050            }
1051
1052            let combined = loop_state.accumulated_results.join("\n");
1053            Ok(StepResult {
1054                step_name: step.name.clone(),
1055                response: combined,
1056                tokens_used: 0,
1057                duration_ms: instant.elapsed().as_millis() as u64,
1058                error: None,
1059                status: StepStatus::Completed,
1060                started_at: Some(start),
1061                completed_at: Some(Utc::now()),
1062            })
1063        }
1064        Some(LoopConfig::Retry {
1065            max_retries,
1066            backoff_ms,
1067            backoff_multiplier,
1068        }) => {
1069            let start = Utc::now();
1070            let instant = Instant::now();
1071            let mut last_error = String::new();
1072
1073            for attempt in 0..=*max_retries {
1074                if attempt > 0 {
1075                    let wait = calculate_backoff(attempt - 1, *backoff_ms, *backoff_multiplier);
1076                    tokio::time::sleep(std::time::Duration::from_millis(wait)).await;
1077                }
1078
1079                match executor.execute(step, input, completed, None).await {
1080                    Ok(r) => return Ok(r),
1081                    Err(e) => {
1082                        last_error = e;
1083                        warn!(step = %step.name, attempt = attempt + 1, "retry attempt failed");
1084                    }
1085                }
1086            }
1087
1088            Ok(StepResult {
1089                step_name: step.name.clone(),
1090                response: String::new(),
1091                tokens_used: 0,
1092                duration_ms: instant.elapsed().as_millis() as u64,
1093                error: Some(last_error),
1094                status: StepStatus::Failed,
1095                started_at: Some(start),
1096                completed_at: Some(Utc::now()),
1097            })
1098        }
1099    }
1100}
1101
1102/// Result of executing a DAG workflow.
1103#[derive(Debug, Clone)]
1104pub struct DagExecutionResult {
1105    /// Overall workflow status.
1106    pub status: WorkflowRunStatus,
1107    /// Per-step results keyed by step name.
1108    pub step_results: HashMap<String, StepResult>,
1109    /// Dead letter entries for failed steps.
1110    pub dead_letters: Vec<DeadLetterEntry>,
1111    /// Execution trace.
1112    pub execution_trace: Vec<ExecutionTraceEntry>,
1113    /// Validation errors (if any — non-empty means workflow didn't execute).
1114    pub validation_errors: Vec<ValidationError>,
1115}
1116
1117// ---------------------------------------------------------------------------
1118// WorkflowEngine
1119// ---------------------------------------------------------------------------
1120
1121/// Engine for registering and executing multi-step agent workflows.
1122pub struct WorkflowEngine {
1123    /// Registered workflow definitions (sequential).
1124    workflows: DashMap<WorkflowId, Workflow>,
1125    /// Registered DAG workflow definitions.
1126    dag_workflows: DashMap<WorkflowId, DagWorkflow>,
1127    /// Workflow execution runs.
1128    runs: DashMap<WorkflowRunId, WorkflowRun>,
1129}
1130
1131impl WorkflowEngine {
1132    /// Create a new workflow engine.
1133    pub fn new() -> Self {
1134        Self {
1135            workflows: DashMap::new(),
1136            dag_workflows: DashMap::new(),
1137            runs: DashMap::new(),
1138        }
1139    }
1140
1141    /// Register a sequential workflow definition and return its ID.
1142    pub fn register_workflow(&self, workflow: Workflow) -> WorkflowId {
1143        let id = workflow.id;
1144        info!(workflow_id = %id, name = %workflow.name, "workflow registered");
1145        self.workflows.insert(id, workflow);
1146        id
1147    }
1148
1149    /// Register a DAG workflow definition and return its ID.
1150    ///
1151    /// Validates the workflow before registering. Returns an error with
1152    /// validation details if the workflow is invalid.
1153    pub fn register_dag_workflow(
1154        &self,
1155        workflow: DagWorkflow,
1156    ) -> Result<WorkflowId, Vec<ValidationError>> {
1157        let errors = validate_workflow(&workflow.steps);
1158        if !errors.is_empty() {
1159            return Err(errors);
1160        }
1161        let id = workflow.id;
1162        info!(workflow_id = %id, name = %workflow.name, "DAG workflow registered");
1163        self.dag_workflows.insert(id, workflow);
1164        Ok(id)
1165    }
1166
1167    /// Execute a sequential workflow with the given input string.
1168    #[instrument(skip(self, input, memory, driver, model_config), fields(%workflow_id))]
1169    pub async fn execute_workflow(
1170        &self,
1171        workflow_id: &WorkflowId,
1172        input: String,
1173        memory: Arc<MemorySubstrate>,
1174        driver: Arc<dyn LlmDriver>,
1175        model_config: &ModelConfig,
1176    ) -> PunchResult<WorkflowRunId> {
1177        let workflow = self
1178            .workflows
1179            .get(workflow_id)
1180            .ok_or_else(|| PunchError::Internal(format!("workflow {} not found", workflow_id)))?
1181            .clone();
1182
1183        let run_id = WorkflowRunId::new();
1184        let run = WorkflowRun {
1185            id: run_id,
1186            workflow_id: *workflow_id,
1187            status: WorkflowRunStatus::Running,
1188            step_results: Vec::new(),
1189            started_at: Utc::now(),
1190            completed_at: None,
1191            dead_letters: Vec::new(),
1192            execution_trace: Vec::new(),
1193        };
1194        self.runs.insert(run_id, run);
1195
1196        let mut current_input = input.clone();
1197        let mut step_results: Vec<StepResult> = Vec::new();
1198        let mut failed = false;
1199
1200        for step in &workflow.steps {
1201            let result = self
1202                .execute_single_step(
1203                    step,
1204                    &workflow.name,
1205                    &current_input,
1206                    &step_results,
1207                    &memory,
1208                    &driver,
1209                    model_config,
1210                )
1211                .await;
1212
1213            match result {
1214                Ok(step_result) => {
1215                    current_input = step_result.response.clone();
1216                    step_results.push(step_result);
1217                }
1218                Err(e) => {
1219                    let error_msg = format!("{e}");
1220                    match step.on_error {
1221                        OnError::SkipStep => {
1222                            warn!(step = %step.name, error = %error_msg, "step failed, skipping");
1223                            let skip_result = StepResult {
1224                                step_name: step.name.clone(),
1225                                response: String::new(),
1226                                tokens_used: 0,
1227                                duration_ms: 0,
1228                                error: Some(error_msg),
1229                                status: StepStatus::Skipped,
1230                                started_at: None,
1231                                completed_at: None,
1232                            };
1233                            step_results.push(skip_result);
1234                            continue;
1235                        }
1236                        OnError::RetryOnce => {
1237                            warn!(step = %step.name, error = %error_msg, "step failed, retrying once");
1238                            let retry_result = self
1239                                .execute_single_step(
1240                                    step,
1241                                    &workflow.name,
1242                                    &current_input,
1243                                    &step_results,
1244                                    &memory,
1245                                    &driver,
1246                                    model_config,
1247                                )
1248                                .await;
1249
1250                            match retry_result {
1251                                Ok(step_result) => {
1252                                    current_input = step_result.response.clone();
1253                                    step_results.push(step_result);
1254                                }
1255                                Err(retry_err) => {
1256                                    error!(step = %step.name, error = %retry_err, "step failed on retry");
1257                                    let fail_result = StepResult {
1258                                        step_name: step.name.clone(),
1259                                        response: String::new(),
1260                                        tokens_used: 0,
1261                                        duration_ms: 0,
1262                                        error: Some(format!("{retry_err}")),
1263                                        status: StepStatus::Failed,
1264                                        started_at: None,
1265                                        completed_at: None,
1266                                    };
1267                                    step_results.push(fail_result);
1268                                    failed = true;
1269                                    break;
1270                                }
1271                            }
1272                        }
1273                        OnError::FailWorkflow => {
1274                            error!(step = %step.name, error = %error_msg, "step failed, aborting workflow");
1275                            let fail_result = StepResult {
1276                                step_name: step.name.clone(),
1277                                response: String::new(),
1278                                tokens_used: 0,
1279                                duration_ms: 0,
1280                                error: Some(error_msg),
1281                                status: StepStatus::Failed,
1282                                started_at: None,
1283                                completed_at: None,
1284                            };
1285                            step_results.push(fail_result);
1286                            failed = true;
1287                            break;
1288                        }
1289                        _ => {
1290                            // Fallback/CatchAndContinue/CircuitBreaker in sequential mode
1291                            // just fail the workflow for now
1292                            let fail_result = StepResult {
1293                                step_name: step.name.clone(),
1294                                response: String::new(),
1295                                tokens_used: 0,
1296                                duration_ms: 0,
1297                                error: Some(error_msg),
1298                                status: StepStatus::Failed,
1299                                started_at: None,
1300                                completed_at: None,
1301                            };
1302                            step_results.push(fail_result);
1303                            failed = true;
1304                            break;
1305                        }
1306                    }
1307                }
1308            }
1309        }
1310
1311        // Update the run with results.
1312        if let Some(mut run) = self.runs.get_mut(&run_id) {
1313            run.step_results = step_results;
1314            run.status = if failed {
1315                WorkflowRunStatus::Failed
1316            } else {
1317                WorkflowRunStatus::Completed
1318            };
1319            run.completed_at = Some(Utc::now());
1320        }
1321
1322        Ok(run_id)
1323    }
1324
1325    /// Execute a single workflow step, creating a temporary fighter and running
1326    /// it through the fighter loop.
1327    #[allow(clippy::too_many_arguments)]
1328    async fn execute_single_step(
1329        &self,
1330        step: &WorkflowStep,
1331        workflow_name: &str,
1332        current_input: &str,
1333        step_results: &[StepResult],
1334        memory: &Arc<MemorySubstrate>,
1335        driver: &Arc<dyn LlmDriver>,
1336        model_config: &ModelConfig,
1337    ) -> PunchResult<StepResult> {
1338        let step_start = Instant::now();
1339        let started_at = Utc::now();
1340
1341        // Substitute variables in the prompt template.
1342        let prompt = expand_variables(
1343            &step.prompt_template,
1344            current_input,
1345            &step.name,
1346            step_results,
1347        );
1348
1349        // Create a temporary fighter for this step.
1350        let fighter_id = FighterId::new();
1351        let fighter_manifest = FighterManifest {
1352            name: step.fighter_name.clone(),
1353            description: format!("Workflow step: {}", step.name),
1354            model: model_config.clone(),
1355            system_prompt: format!(
1356                "You are executing step '{}' of workflow '{}'.",
1357                step.name, workflow_name
1358            ),
1359            capabilities: Vec::new(),
1360            weight_class: WeightClass::Middleweight,
1361            tenant_id: None,
1362        };
1363
1364        // Save the fighter and create a bout.
1365        if let Err(e) = memory
1366            .save_fighter(
1367                &fighter_id,
1368                &fighter_manifest,
1369                punch_types::FighterStatus::Idle,
1370            )
1371            .await
1372        {
1373            error!(error = %e, "failed to persist workflow fighter");
1374        }
1375
1376        let bout_id = memory.create_bout(&fighter_id).await.map_err(|e| {
1377            PunchError::Internal(format!(
1378                "failed to create bout for step '{}': {e}",
1379                step.name
1380            ))
1381        })?;
1382
1383        let available_tools = tools_for_capabilities(&fighter_manifest.capabilities);
1384        let timeout_secs = step.timeout_secs.unwrap_or(120);
1385
1386        let params = FighterLoopParams {
1387            manifest: fighter_manifest,
1388            user_message: prompt,
1389            bout_id,
1390            fighter_id,
1391            memory: Arc::clone(memory),
1392            driver: Arc::clone(driver),
1393            available_tools,
1394            max_iterations: Some(20),
1395            context_window: None,
1396            tool_timeout_secs: Some(timeout_secs),
1397            coordinator: None,
1398            approval_engine: None,
1399            sandbox: None,
1400            mcp_clients: None,
1401        };
1402
1403        let loop_result = tokio::time::timeout(
1404            std::time::Duration::from_secs(timeout_secs),
1405            run_fighter_loop(params),
1406        )
1407        .await;
1408
1409        match loop_result {
1410            Ok(Ok(result)) => {
1411                let step_result = StepResult {
1412                    step_name: step.name.clone(),
1413                    response: result.response,
1414                    tokens_used: result.usage.total(),
1415                    duration_ms: step_start.elapsed().as_millis() as u64,
1416                    error: None,
1417                    status: StepStatus::Completed,
1418                    started_at: Some(started_at),
1419                    completed_at: Some(Utc::now()),
1420                };
1421                info!(step = %step.name, tokens = step_result.tokens_used, "workflow step completed");
1422                Ok(step_result)
1423            }
1424            Ok(Err(e)) => Err(e),
1425            Err(_) => Err(PunchError::Internal(format!(
1426                "step '{}' timed out after {}s",
1427                step.name, timeout_secs
1428            ))),
1429        }
1430    }
1431
1432    /// Get a workflow run by its ID.
1433    pub fn get_run(&self, run_id: &WorkflowRunId) -> Option<WorkflowRun> {
1434        self.runs.get(run_id).map(|r| r.clone())
1435    }
1436
1437    /// List all registered sequential workflows.
1438    pub fn list_workflows(&self) -> Vec<Workflow> {
1439        self.workflows.iter().map(|w| w.value().clone()).collect()
1440    }
1441
1442    /// List all registered DAG workflows.
1443    pub fn list_dag_workflows(&self) -> Vec<DagWorkflow> {
1444        self.dag_workflows
1445            .iter()
1446            .map(|w| w.value().clone())
1447            .collect()
1448    }
1449
1450    /// List all workflow runs.
1451    pub fn list_runs(&self) -> Vec<WorkflowRun> {
1452        self.runs.iter().map(|r| r.value().clone()).collect()
1453    }
1454
1455    /// List workflow runs filtered by workflow ID.
1456    pub fn list_runs_for_workflow(&self, workflow_id: &WorkflowId) -> Vec<WorkflowRun> {
1457        self.runs
1458            .iter()
1459            .filter(|r| r.value().workflow_id == *workflow_id)
1460            .map(|r| r.value().clone())
1461            .collect()
1462    }
1463
1464    /// Get a sequential workflow by its ID.
1465    pub fn get_workflow(&self, id: &WorkflowId) -> Option<Workflow> {
1466        self.workflows.get(id).map(|w| w.clone())
1467    }
1468
1469    /// Get a DAG workflow by its ID.
1470    pub fn get_dag_workflow(&self, id: &WorkflowId) -> Option<DagWorkflow> {
1471        self.dag_workflows.get(id).map(|w| w.clone())
1472    }
1473}
1474
1475impl Default for WorkflowEngine {
1476    fn default() -> Self {
1477        Self::new()
1478    }
1479}
1480
1481// ---------------------------------------------------------------------------
1482// Tests
1483// ---------------------------------------------------------------------------
1484
1485#[cfg(test)]
1486mod tests {
1487    use super::*;
1488    use std::sync::atomic::{AtomicUsize, Ordering};
1489    use std::time::Duration;
1490
1491    // A mock step executor for testing
1492    struct MockExecutor {
1493        /// Map of step name -> response
1494        responses: HashMap<String, String>,
1495        /// Steps that should fail
1496        failing_steps: HashMap<String, String>,
1497        /// Track execution count per step
1498        execution_counts: DashMap<String, AtomicUsize>,
1499    }
1500
1501    impl MockExecutor {
1502        fn new() -> Self {
1503            Self {
1504                responses: HashMap::new(),
1505                failing_steps: HashMap::new(),
1506                execution_counts: DashMap::new(),
1507            }
1508        }
1509
1510        fn with_response(mut self, step: &str, response: &str) -> Self {
1511            self.responses
1512                .insert(step.to_string(), response.to_string());
1513            self
1514        }
1515
1516        fn with_failure(mut self, step: &str, error: &str) -> Self {
1517            self.failing_steps
1518                .insert(step.to_string(), error.to_string());
1519            self
1520        }
1521
1522        #[allow(dead_code)]
1523        fn execution_count(&self, step: &str) -> usize {
1524            self.execution_counts
1525                .get(step)
1526                .map(|c| c.load(Ordering::Relaxed))
1527                .unwrap_or(0)
1528        }
1529    }
1530
1531    #[async_trait::async_trait]
1532    impl StepExecutor for MockExecutor {
1533        async fn execute(
1534            &self,
1535            step: &DagWorkflowStep,
1536            input: &str,
1537            step_results: &HashMap<String, StepResult>,
1538            loop_state: Option<&LoopState>,
1539        ) -> Result<StepResult, String> {
1540            // Track execution
1541            self.execution_counts
1542                .entry(step.name.clone())
1543                .or_insert_with(|| AtomicUsize::new(0))
1544                .fetch_add(1, Ordering::Relaxed);
1545
1546            // Check if step should fail
1547            if let Some(err) = self.failing_steps.get(&step.name) {
1548                return Err(err.clone());
1549            }
1550
1551            let prompt = expand_dag_variables(
1552                &step.prompt_template,
1553                input,
1554                &step.name,
1555                step_results,
1556                loop_state,
1557            );
1558
1559            let response = self.responses.get(&step.name).cloned().unwrap_or(prompt);
1560
1561            Ok(StepResult {
1562                step_name: step.name.clone(),
1563                response,
1564                tokens_used: 10,
1565                duration_ms: 5,
1566                error: None,
1567                status: StepStatus::Completed,
1568                started_at: Some(Utc::now()),
1569                completed_at: Some(Utc::now()),
1570            })
1571        }
1572    }
1573
1574    /// A mock executor that adds a delay to simulate real execution time.
1575    struct TimedMockExecutor {
1576        delay_ms: u64,
1577    }
1578
1579    #[async_trait::async_trait]
1580    impl StepExecutor for TimedMockExecutor {
1581        async fn execute(
1582            &self,
1583            step: &DagWorkflowStep,
1584            _input: &str,
1585            _step_results: &HashMap<String, StepResult>,
1586            _loop_state: Option<&LoopState>,
1587        ) -> Result<StepResult, String> {
1588            tokio::time::sleep(Duration::from_millis(self.delay_ms)).await;
1589            Ok(StepResult {
1590                step_name: step.name.clone(),
1591                response: format!("done-{}", step.name),
1592                tokens_used: 10,
1593                duration_ms: self.delay_ms,
1594                error: None,
1595                status: StepStatus::Completed,
1596                started_at: Some(Utc::now()),
1597                completed_at: Some(Utc::now()),
1598            })
1599        }
1600    }
1601
1602    /// A mock executor that fails the first N attempts for a step.
1603    struct FailNTimesMockExecutor {
1604        fail_count: usize,
1605        attempts: DashMap<String, AtomicUsize>,
1606    }
1607
1608    impl FailNTimesMockExecutor {
1609        fn new(fail_count: usize) -> Self {
1610            Self {
1611                fail_count,
1612                attempts: DashMap::new(),
1613            }
1614        }
1615    }
1616
1617    #[async_trait::async_trait]
1618    impl StepExecutor for FailNTimesMockExecutor {
1619        async fn execute(
1620            &self,
1621            step: &DagWorkflowStep,
1622            _input: &str,
1623            _step_results: &HashMap<String, StepResult>,
1624            _loop_state: Option<&LoopState>,
1625        ) -> Result<StepResult, String> {
1626            let attempt = self
1627                .attempts
1628                .entry(step.name.clone())
1629                .or_insert_with(|| AtomicUsize::new(0))
1630                .fetch_add(1, Ordering::Relaxed);
1631
1632            if attempt < self.fail_count {
1633                return Err(format!("failure attempt {}", attempt + 1));
1634            }
1635
1636            Ok(StepResult {
1637                step_name: step.name.clone(),
1638                response: format!("success on attempt {}", attempt + 1),
1639                tokens_used: 10,
1640                duration_ms: 5,
1641                error: None,
1642                status: StepStatus::Completed,
1643                started_at: Some(Utc::now()),
1644                completed_at: Some(Utc::now()),
1645            })
1646        }
1647    }
1648
1649    fn dag_step(name: &str, deps: &[&str]) -> DagWorkflowStep {
1650        DagWorkflowStep {
1651            name: name.to_string(),
1652            fighter_name: "test".to_string(),
1653            prompt_template: "{{input}}".to_string(),
1654            timeout_secs: None,
1655            on_error: OnError::FailWorkflow,
1656            depends_on: deps.iter().map(|d| d.to_string()).collect(),
1657            condition: None,
1658            else_step: None,
1659            loop_config: None,
1660        }
1661    }
1662
1663    // ---- Existing sequential tests (preserved) ----
1664
1665    #[test]
1666    fn register_and_list_workflows() {
1667        let engine = WorkflowEngine::new();
1668
1669        let workflow = Workflow {
1670            id: WorkflowId::new(),
1671            name: "test-workflow".to_string(),
1672            steps: vec![
1673                WorkflowStep {
1674                    name: "step1".to_string(),
1675                    fighter_name: "analyzer".to_string(),
1676                    prompt_template: "Analyze: {{input}}".to_string(),
1677                    timeout_secs: None,
1678                    on_error: OnError::FailWorkflow,
1679                },
1680                WorkflowStep {
1681                    name: "step2".to_string(),
1682                    fighter_name: "summarizer".to_string(),
1683                    prompt_template: "Summarize the analysis: {{step1}}".to_string(),
1684                    timeout_secs: Some(60),
1685                    on_error: OnError::SkipStep,
1686                },
1687            ],
1688        };
1689
1690        let id = engine.register_workflow(workflow);
1691        let workflows = engine.list_workflows();
1692        assert_eq!(workflows.len(), 1);
1693        assert_eq!(workflows[0].name, "test-workflow");
1694        assert_eq!(workflows[0].steps.len(), 2);
1695
1696        let fetched = engine.get_workflow(&id).expect("workflow should exist");
1697        assert_eq!(fetched.name, "test-workflow");
1698    }
1699
1700    #[test]
1701    fn variable_substitution_basic() {
1702        let result = expand_variables(
1703            "Analyze {{input}} for step {{step_name}}",
1704            "hello world",
1705            "analysis",
1706            &[],
1707        );
1708        assert_eq!(result, "Analyze hello world for step analysis");
1709    }
1710
1711    #[test]
1712    fn variable_substitution_previous_output() {
1713        let result = expand_variables(
1714            "Continue from: {{previous_output}}",
1715            "step 1 output",
1716            "step2",
1717            &[],
1718        );
1719        assert_eq!(result, "Continue from: step 1 output");
1720    }
1721
1722    #[test]
1723    fn variable_substitution_step_refs() {
1724        let step_results = vec![
1725            StepResult {
1726                step_name: "analyze".to_string(),
1727                response: "analysis result".to_string(),
1728                tokens_used: 100,
1729                duration_ms: 500,
1730                error: None,
1731                status: StepStatus::Completed,
1732                started_at: None,
1733                completed_at: None,
1734            },
1735            StepResult {
1736                step_name: "review".to_string(),
1737                response: "review result".to_string(),
1738                tokens_used: 80,
1739                duration_ms: 400,
1740                error: None,
1741                status: StepStatus::Completed,
1742                started_at: None,
1743                completed_at: None,
1744            },
1745        ];
1746
1747        let result = expand_variables(
1748            "Step 1 said: {{step_1}}, Step 2 said: {{step_2}}",
1749            "current",
1750            "step3",
1751            &step_results,
1752        );
1753        assert_eq!(
1754            result,
1755            "Step 1 said: analysis result, Step 2 said: review result"
1756        );
1757
1758        let result = expand_variables(
1759            "Analysis: {{analyze}}, Review: {{review}}",
1760            "current",
1761            "step3",
1762            &step_results,
1763        );
1764        assert_eq!(result, "Analysis: analysis result, Review: review result");
1765    }
1766
1767    #[test]
1768    fn workflow_run_status_display() {
1769        assert_eq!(WorkflowRunStatus::Pending.to_string(), "pending");
1770        assert_eq!(WorkflowRunStatus::Running.to_string(), "running");
1771        assert_eq!(WorkflowRunStatus::Completed.to_string(), "completed");
1772        assert_eq!(WorkflowRunStatus::Failed.to_string(), "failed");
1773        assert_eq!(
1774            WorkflowRunStatus::PartiallyCompleted.to_string(),
1775            "partially_completed"
1776        );
1777    }
1778
1779    #[test]
1780    fn get_nonexistent_run_returns_none() {
1781        let engine = WorkflowEngine::new();
1782        let run_id = WorkflowRunId::new();
1783        assert!(engine.get_run(&run_id).is_none());
1784    }
1785
1786    #[test]
1787    fn get_nonexistent_workflow_returns_none() {
1788        let engine = WorkflowEngine::new();
1789        let id = WorkflowId::new();
1790        assert!(engine.get_workflow(&id).is_none());
1791    }
1792
1793    #[test]
1794    fn workflow_engine_default() {
1795        let engine = WorkflowEngine::default();
1796        assert!(engine.list_workflows().is_empty());
1797        assert!(engine.list_runs().is_empty());
1798    }
1799
1800    #[test]
1801    fn register_multiple_workflows() {
1802        let engine = WorkflowEngine::new();
1803
1804        for i in 0..5 {
1805            let workflow = Workflow {
1806                id: WorkflowId::new(),
1807                name: format!("workflow-{}", i),
1808                steps: vec![],
1809            };
1810            engine.register_workflow(workflow);
1811        }
1812
1813        assert_eq!(engine.list_workflows().len(), 5);
1814    }
1815
1816    #[test]
1817    fn register_workflow_returns_correct_id() {
1818        let engine = WorkflowEngine::new();
1819        let wf_id = WorkflowId::new();
1820        let workflow = Workflow {
1821            id: wf_id,
1822            name: "id-test".to_string(),
1823            steps: vec![],
1824        };
1825        let returned_id = engine.register_workflow(workflow);
1826        assert_eq!(returned_id, wf_id);
1827    }
1828
1829    #[test]
1830    fn workflow_id_display() {
1831        let id = WorkflowId::new();
1832        let s = format!("{}", id);
1833        assert!(!s.is_empty());
1834    }
1835
1836    #[test]
1837    fn workflow_run_id_display() {
1838        let id = WorkflowRunId::new();
1839        let s = format!("{}", id);
1840        assert!(!s.is_empty());
1841    }
1842
1843    #[test]
1844    fn workflow_id_default() {
1845        let id = WorkflowId::default();
1846        assert!(!id.0.is_nil());
1847    }
1848
1849    #[test]
1850    fn workflow_run_id_default() {
1851        let id = WorkflowRunId::default();
1852        assert!(!id.0.is_nil());
1853    }
1854
1855    #[test]
1856    fn variable_substitution_no_variables() {
1857        let result = expand_variables("plain text with no vars", "input", "step", &[]);
1858        assert_eq!(result, "plain text with no vars");
1859    }
1860
1861    #[test]
1862    fn variable_substitution_all_variables_at_once() {
1863        let step_results = vec![StepResult {
1864            step_name: "analysis".to_string(),
1865            response: "analyzed data".to_string(),
1866            tokens_used: 50,
1867            duration_ms: 100,
1868            error: None,
1869            status: StepStatus::Completed,
1870            started_at: None,
1871            completed_at: None,
1872        }];
1873
1874        let result = expand_variables(
1875            "Input: {{input}}, Prev: {{previous_output}}, Step: {{step_name}}, S1: {{step_1}}, Named: {{analysis}}",
1876            "my input",
1877            "current_step",
1878            &step_results,
1879        );
1880        assert_eq!(
1881            result,
1882            "Input: my input, Prev: my input, Step: current_step, S1: analyzed data, Named: analyzed data"
1883        );
1884    }
1885
1886    #[test]
1887    fn variable_substitution_empty_input() {
1888        let result = expand_variables("{{input}} is here", "", "step", &[]);
1889        assert_eq!(result, " is here");
1890    }
1891
1892    #[test]
1893    fn variable_substitution_multiple_same_var() {
1894        let result = expand_variables("{{input}} and {{input}} again", "hello", "step", &[]);
1895        assert_eq!(result, "hello and hello again");
1896    }
1897
1898    #[test]
1899    fn on_error_default_is_fail_workflow() {
1900        let on_error = OnError::default();
1901        assert!(matches!(on_error, OnError::FailWorkflow));
1902    }
1903
1904    #[test]
1905    fn list_runs_for_workflow_filters_correctly() {
1906        let engine = WorkflowEngine::new();
1907        let wf_id_1 = WorkflowId::new();
1908        let wf_id_2 = WorkflowId::new();
1909
1910        assert!(engine.list_runs_for_workflow(&wf_id_1).is_empty());
1911        assert!(engine.list_runs_for_workflow(&wf_id_2).is_empty());
1912    }
1913
1914    #[test]
1915    fn workflow_step_serialization() {
1916        let step = WorkflowStep {
1917            name: "test".to_string(),
1918            fighter_name: "fighter".to_string(),
1919            prompt_template: "Do {{input}}".to_string(),
1920            timeout_secs: Some(30),
1921            on_error: OnError::SkipStep,
1922        };
1923        let json = serde_json::to_string(&step).expect("serialize");
1924        let deserialized: WorkflowStep = serde_json::from_str(&json).expect("deserialize");
1925        assert_eq!(deserialized.name, "test");
1926        assert_eq!(deserialized.timeout_secs, Some(30));
1927    }
1928
1929    #[test]
1930    fn workflow_serialization_roundtrip() {
1931        let workflow = Workflow {
1932            id: WorkflowId::new(),
1933            name: "roundtrip".to_string(),
1934            steps: vec![WorkflowStep {
1935                name: "s1".to_string(),
1936                fighter_name: "f1".to_string(),
1937                prompt_template: "{{input}}".to_string(),
1938                timeout_secs: None,
1939                on_error: OnError::RetryOnce,
1940            }],
1941        };
1942        let json = serde_json::to_string(&workflow).expect("serialize");
1943        let deserialized: Workflow = serde_json::from_str(&json).expect("deserialize");
1944        assert_eq!(deserialized.name, "roundtrip");
1945        assert_eq!(deserialized.steps.len(), 1);
1946    }
1947
1948    #[test]
1949    fn step_result_with_error() {
1950        let sr = StepResult {
1951            step_name: "failing".to_string(),
1952            response: String::new(),
1953            tokens_used: 0,
1954            duration_ms: 0,
1955            error: Some("timeout".to_string()),
1956            status: StepStatus::Failed,
1957            started_at: None,
1958            completed_at: None,
1959        };
1960        assert!(sr.error.is_some());
1961        assert_eq!(sr.error.expect("error"), "timeout");
1962    }
1963
1964    #[test]
1965    fn variable_substitution_step_ref_by_number_out_of_range() {
1966        let step_results = vec![
1967            StepResult {
1968                step_name: "a".to_string(),
1969                response: "r1".to_string(),
1970                tokens_used: 0,
1971                duration_ms: 0,
1972                error: None,
1973                status: StepStatus::Completed,
1974                started_at: None,
1975                completed_at: None,
1976            },
1977            StepResult {
1978                step_name: "b".to_string(),
1979                response: "r2".to_string(),
1980                tokens_used: 0,
1981                duration_ms: 0,
1982                error: None,
1983                status: StepStatus::Completed,
1984                started_at: None,
1985                completed_at: None,
1986            },
1987        ];
1988        let result = expand_variables("{{step_5}}", "input", "step", &step_results);
1989        assert_eq!(result, "{{step_5}}");
1990    }
1991
1992    // ---- New DAG tests ----
1993
1994    #[tokio::test]
1995    async fn dag_linear_execution() {
1996        let steps = vec![
1997            dag_step("a", &[]),
1998            dag_step("b", &["a"]),
1999            dag_step("c", &["b"]),
2000        ];
2001        let executor = MockExecutor::new()
2002            .with_response("a", "result_a")
2003            .with_response("b", "result_b")
2004            .with_response("c", "result_c");
2005
2006        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2007        assert_eq!(result.status, WorkflowRunStatus::Completed);
2008        assert_eq!(result.step_results.len(), 3);
2009        assert_eq!(result.step_results["a"].response, "result_a");
2010        assert_eq!(result.step_results["b"].response, "result_b");
2011        assert_eq!(result.step_results["c"].response, "result_c");
2012    }
2013
2014    #[tokio::test]
2015    async fn dag_fan_out_execution() {
2016        let steps = vec![
2017            dag_step("root", &[]),
2018            dag_step("branch1", &["root"]),
2019            dag_step("branch2", &["root"]),
2020            dag_step("branch3", &["root"]),
2021        ];
2022        let executor = MockExecutor::new()
2023            .with_response("root", "root_out")
2024            .with_response("branch1", "b1_out")
2025            .with_response("branch2", "b2_out")
2026            .with_response("branch3", "b3_out");
2027
2028        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2029        assert_eq!(result.status, WorkflowRunStatus::Completed);
2030        assert_eq!(result.step_results.len(), 4);
2031        // All branches should have completed
2032        assert_eq!(result.step_results["branch1"].response, "b1_out");
2033        assert_eq!(result.step_results["branch2"].response, "b2_out");
2034        assert_eq!(result.step_results["branch3"].response, "b3_out");
2035    }
2036
2037    #[tokio::test]
2038    async fn dag_fan_in_execution() {
2039        let steps = vec![
2040            dag_step("a", &[]),
2041            dag_step("b", &[]),
2042            dag_step("c", &[]),
2043            dag_step("join", &["a", "b", "c"]),
2044        ];
2045        let executor = MockExecutor::new()
2046            .with_response("a", "ra")
2047            .with_response("b", "rb")
2048            .with_response("c", "rc")
2049            .with_response("join", "joined");
2050
2051        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2052        assert_eq!(result.status, WorkflowRunStatus::Completed);
2053        assert_eq!(result.step_results["join"].response, "joined");
2054        // a, b, c should have run in the same wave (first trace entry)
2055        assert_eq!(result.execution_trace.len(), 2);
2056        let first_wave = &result.execution_trace[0].steps;
2057        assert!(first_wave.contains(&"a".to_string()));
2058        assert!(first_wave.contains(&"b".to_string()));
2059        assert!(first_wave.contains(&"c".to_string()));
2060    }
2061
2062    #[tokio::test]
2063    async fn dag_diamond_execution() {
2064        let steps = vec![
2065            dag_step("root", &[]),
2066            dag_step("left", &["root"]),
2067            dag_step("right", &["root"]),
2068            dag_step("join", &["left", "right"]),
2069        ];
2070        let executor = MockExecutor::new()
2071            .with_response("root", "root_out")
2072            .with_response("left", "left_out")
2073            .with_response("right", "right_out")
2074            .with_response("join", "joined");
2075
2076        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2077        assert_eq!(result.status, WorkflowRunStatus::Completed);
2078        assert_eq!(result.step_results.len(), 4);
2079        // left and right should be in same wave
2080        let wave2 = &result.execution_trace[1].steps;
2081        assert!(wave2.contains(&"left".to_string()));
2082        assert!(wave2.contains(&"right".to_string()));
2083    }
2084
2085    #[tokio::test]
2086    async fn dag_parallel_actually_concurrent() {
2087        // Steps a, b, c have no deps, each takes 50ms.
2088        // If run sequentially: ~150ms. If parallel: ~50ms.
2089        let steps = vec![dag_step("a", &[]), dag_step("b", &[]), dag_step("c", &[])];
2090        let executor = TimedMockExecutor { delay_ms: 50 };
2091
2092        let start = Instant::now();
2093        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2094        let elapsed = start.elapsed();
2095
2096        assert_eq!(result.status, WorkflowRunStatus::Completed);
2097        assert_eq!(result.step_results.len(), 3);
2098        // Should complete in roughly 50ms (parallel), not 150ms (sequential)
2099        // Use generous bound to avoid flakiness
2100        assert!(
2101            elapsed.as_millis() < 120,
2102            "parallel execution took {}ms, expected ~50ms",
2103            elapsed.as_millis()
2104        );
2105    }
2106
2107    #[tokio::test]
2108    async fn dag_condition_if_success() {
2109        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2110        steps[1].condition = Some(Condition::IfSuccess {
2111            step: "a".to_string(),
2112        });
2113        let executor = MockExecutor::new()
2114            .with_response("a", "ok")
2115            .with_response("b", "b_ran");
2116
2117        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2118        assert_eq!(result.step_results["b"].status, StepStatus::Completed);
2119        assert_eq!(result.step_results["b"].response, "b_ran");
2120    }
2121
2122    #[tokio::test]
2123    async fn dag_condition_skips_step() {
2124        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2125        steps[1].condition = Some(Condition::IfFailure {
2126            step: "a".to_string(),
2127        });
2128        let executor = MockExecutor::new()
2129            .with_response("a", "ok")
2130            .with_response("b", "should_not_run");
2131
2132        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2133        assert_eq!(result.step_results["b"].status, StepStatus::Skipped);
2134    }
2135
2136    #[tokio::test]
2137    async fn dag_condition_if_output() {
2138        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2139        steps[1].condition = Some(Condition::IfOutput {
2140            step: "a".to_string(),
2141            contains: "magic".to_string(),
2142        });
2143        let executor = MockExecutor::new()
2144            .with_response("a", "this has magic inside")
2145            .with_response("b", "b_ran");
2146
2147        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2148        assert_eq!(result.step_results["b"].status, StepStatus::Completed);
2149    }
2150
2151    #[tokio::test]
2152    async fn dag_condition_if_output_no_match() {
2153        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2154        steps[1].condition = Some(Condition::IfOutput {
2155            step: "a".to_string(),
2156            contains: "magic".to_string(),
2157        });
2158        let executor = MockExecutor::new()
2159            .with_response("a", "no special word here")
2160            .with_response("b", "should_not_run");
2161
2162        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2163        assert_eq!(result.step_results["b"].status, StepStatus::Skipped);
2164    }
2165
2166    #[tokio::test]
2167    async fn dag_foreach_loop() {
2168        let mut steps = vec![dag_step("source", &[]), dag_step("process", &["source"])];
2169        steps[0].prompt_template = "{{input}}".to_string();
2170        steps[1].loop_config = Some(LoopConfig::ForEach {
2171            source_step: "source".to_string(),
2172            max_iterations: 100,
2173        });
2174        steps[1].prompt_template = "process item: {{loop.item}}".to_string();
2175
2176        let executor =
2177            MockExecutor::new().with_response("source", r#"["apple", "banana", "cherry"]"#);
2178
2179        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2180        assert_eq!(result.status, WorkflowRunStatus::Completed);
2181        let process_result = &result.step_results["process"];
2182        // Should have processed all 3 items
2183        assert!(
2184            process_result.response.contains("process item: apple"),
2185            "response: {}",
2186            process_result.response
2187        );
2188    }
2189
2190    #[tokio::test]
2191    async fn dag_while_loop() {
2192        let mut steps = vec![dag_step("counter", &[])];
2193        steps[0].loop_config = Some(LoopConfig::While {
2194            condition: Condition::Expression("true".to_string()),
2195            max_iterations: 5,
2196        });
2197
2198        let executor = MockExecutor::new().with_response("counter", "tick");
2199
2200        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2201        assert_eq!(result.status, WorkflowRunStatus::Completed);
2202        let counter_result = &result.step_results["counter"];
2203        // Should have 5 "tick" entries
2204        let ticks: Vec<&str> = counter_result.response.split('\n').collect();
2205        assert_eq!(ticks.len(), 5);
2206    }
2207
2208    #[tokio::test]
2209    async fn dag_retry_loop_succeeds_eventually() {
2210        let mut steps = vec![dag_step("flaky", &[])];
2211        steps[0].loop_config = Some(LoopConfig::Retry {
2212            max_retries: 3,
2213            backoff_ms: 1, // minimal backoff for tests
2214            backoff_multiplier: 1.0,
2215        });
2216
2217        // Fails first 2 times, succeeds on 3rd
2218        let executor = FailNTimesMockExecutor::new(2);
2219
2220        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2221        assert_eq!(result.status, WorkflowRunStatus::Completed);
2222        assert!(result.step_results["flaky"].error.is_none());
2223        assert!(
2224            result.step_results["flaky"]
2225                .response
2226                .contains("success on attempt 3")
2227        );
2228    }
2229
2230    #[tokio::test]
2231    async fn dag_retry_loop_exhausts_retries() {
2232        let mut steps = vec![dag_step("flaky", &[])];
2233        steps[0].loop_config = Some(LoopConfig::Retry {
2234            max_retries: 2,
2235            backoff_ms: 1,
2236            backoff_multiplier: 1.0,
2237        });
2238
2239        // Fails all attempts (need 4 failures to exhaust 1 attempt + 2 retries + 1 more)
2240        let executor = FailNTimesMockExecutor::new(10);
2241
2242        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2243        assert!(result.step_results["flaky"].error.is_some());
2244    }
2245
2246    #[tokio::test]
2247    async fn dag_step_failure_with_skip() {
2248        let mut steps = vec![
2249            dag_step("a", &[]),
2250            dag_step("b", &["a"]),
2251            dag_step("c", &["b"]),
2252        ];
2253        steps[1].on_error = OnError::SkipStep;
2254
2255        let executor = MockExecutor::new()
2256            .with_response("a", "ok")
2257            .with_failure("b", "b failed")
2258            .with_response("c", "c_ran");
2259
2260        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2261        // b failed but was skipped, c should still run
2262        // since b is in step_results (as skipped/failed), c's deps are met
2263        assert!(result.step_results.contains_key("c"));
2264    }
2265
2266    #[tokio::test]
2267    async fn dag_step_failure_cascades() {
2268        let steps = vec![
2269            dag_step("a", &[]),
2270            dag_step("b", &["a"]),
2271            dag_step("c", &["b"]),
2272        ];
2273
2274        let executor = MockExecutor::new()
2275            .with_response("a", "ok")
2276            .with_failure("b", "b failed")
2277            .with_response("c", "should_not_run");
2278
2279        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2280        assert!(result.step_results["b"].error.is_some());
2281        // c should be cancelled since b failed (FailWorkflow is default)
2282        assert_eq!(result.step_results["c"].status, StepStatus::Cancelled);
2283    }
2284
2285    #[tokio::test]
2286    async fn dag_empty_workflow() {
2287        let executor = MockExecutor::new();
2288        let result = execute_dag("test", &[], "input", Arc::new(executor)).await;
2289        assert_eq!(result.status, WorkflowRunStatus::Failed);
2290        assert!(!result.validation_errors.is_empty());
2291    }
2292
2293    #[tokio::test]
2294    async fn dag_single_step() {
2295        let steps = vec![dag_step("only", &[])];
2296        let executor = MockExecutor::new().with_response("only", "done");
2297
2298        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2299        assert_eq!(result.status, WorkflowRunStatus::Completed);
2300        assert_eq!(result.step_results.len(), 1);
2301        assert_eq!(result.step_results["only"].response, "done");
2302    }
2303
2304    #[tokio::test]
2305    async fn dag_all_steps_fail() {
2306        let steps = vec![dag_step("a", &[]), dag_step("b", &[])];
2307
2308        let executor = MockExecutor::new()
2309            .with_failure("a", "a failed")
2310            .with_failure("b", "b failed");
2311
2312        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2313        assert_eq!(result.status, WorkflowRunStatus::Failed);
2314        assert!(!result.dead_letters.is_empty());
2315    }
2316
2317    #[tokio::test]
2318    async fn dag_partial_completion() {
2319        let steps = vec![dag_step("good", &[]), dag_step("bad", &[])];
2320
2321        let executor = MockExecutor::new()
2322            .with_response("good", "ok")
2323            .with_failure("bad", "nope");
2324
2325        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2326        assert_eq!(result.status, WorkflowRunStatus::PartiallyCompleted);
2327    }
2328
2329    #[tokio::test]
2330    async fn dag_validation_rejects_cycle() {
2331        let steps = vec![dag_step("a", &["b"]), dag_step("b", &["a"])];
2332        let executor = MockExecutor::new();
2333        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2334        assert_eq!(result.status, WorkflowRunStatus::Failed);
2335        assert!(!result.validation_errors.is_empty());
2336    }
2337
2338    #[tokio::test]
2339    async fn dag_all_steps_skipped() {
2340        let mut steps = vec![dag_step("a", &[]), dag_step("b", &[])];
2341        steps[0].condition = Some(Condition::Expression("false".to_string()));
2342        steps[1].condition = Some(Condition::Expression("false".to_string()));
2343
2344        let executor = MockExecutor::new();
2345        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2346        // All skipped = no failures, no successes -> Completed
2347        assert_eq!(result.status, WorkflowRunStatus::Completed);
2348        assert_eq!(result.step_results["a"].status, StepStatus::Skipped);
2349        assert_eq!(result.step_results["b"].status, StepStatus::Skipped);
2350    }
2351
2352    // ---- DAG variable substitution tests ----
2353
2354    #[test]
2355    fn dag_variables_step_output() {
2356        let mut results = HashMap::new();
2357        results.insert(
2358            "analyze".to_string(),
2359            StepResult {
2360                step_name: "analyze".to_string(),
2361                response: "found 3 bugs".to_string(),
2362                tokens_used: 100,
2363                duration_ms: 500,
2364                error: None,
2365                status: StepStatus::Completed,
2366                started_at: None,
2367                completed_at: None,
2368            },
2369        );
2370
2371        let expanded = expand_dag_variables(
2372            "Result: {{analyze.output}}",
2373            "input",
2374            "next",
2375            &results,
2376            None,
2377        );
2378        assert_eq!(expanded, "Result: found 3 bugs");
2379    }
2380
2381    #[test]
2382    fn dag_variables_step_status() {
2383        let mut results = HashMap::new();
2384        results.insert(
2385            "build".to_string(),
2386            StepResult {
2387                step_name: "build".to_string(),
2388                response: "ok".to_string(),
2389                tokens_used: 50,
2390                duration_ms: 300,
2391                error: None,
2392                status: StepStatus::Completed,
2393                started_at: None,
2394                completed_at: None,
2395            },
2396        );
2397
2398        let expanded = expand_dag_variables(
2399            "Build status: {{build.status}}",
2400            "input",
2401            "deploy",
2402            &results,
2403            None,
2404        );
2405        assert_eq!(expanded, "Build status: completed");
2406    }
2407
2408    #[test]
2409    fn dag_variables_step_duration() {
2410        let mut results = HashMap::new();
2411        results.insert(
2412            "fetch".to_string(),
2413            StepResult {
2414                step_name: "fetch".to_string(),
2415                response: "data".to_string(),
2416                tokens_used: 10,
2417                duration_ms: 1234,
2418                error: None,
2419                status: StepStatus::Completed,
2420                started_at: None,
2421                completed_at: None,
2422            },
2423        );
2424
2425        let expanded = expand_dag_variables(
2426            "Fetch took {{fetch.duration_ms}}ms",
2427            "input",
2428            "next",
2429            &results,
2430            None,
2431        );
2432        assert_eq!(expanded, "Fetch took 1234ms");
2433    }
2434
2435    #[test]
2436    fn dag_variables_loop_state() {
2437        let results = HashMap::new();
2438        let mut loop_state = LoopState::new();
2439        loop_state.index = 2;
2440        loop_state.item = Some("banana".to_string());
2441
2442        let expanded = expand_dag_variables(
2443            "Item {{loop.index}}: {{loop.item}}",
2444            "input",
2445            "process",
2446            &results,
2447            Some(&loop_state),
2448        );
2449        assert_eq!(expanded, "Item 2: banana");
2450    }
2451
2452    #[test]
2453    fn dag_variables_json_path() {
2454        let mut results = HashMap::new();
2455        results.insert(
2456            "api".to_string(),
2457            StepResult {
2458                step_name: "api".to_string(),
2459                response: r#"{"user": {"name": "Alice", "age": 30}}"#.to_string(),
2460                tokens_used: 10,
2461                duration_ms: 100,
2462                error: None,
2463                status: StepStatus::Completed,
2464                started_at: None,
2465                completed_at: None,
2466            },
2467        );
2468
2469        let expanded = expand_dag_variables(
2470            "Name: {{api.output.user.name}}",
2471            "input",
2472            "next",
2473            &results,
2474            None,
2475        );
2476        assert_eq!(expanded, "Name: Alice");
2477    }
2478
2479    #[test]
2480    fn dag_variables_transform_uppercase() {
2481        let mut results = HashMap::new();
2482        results.insert(
2483            "greet".to_string(),
2484            StepResult {
2485                step_name: "greet".to_string(),
2486                response: "hello world".to_string(),
2487                tokens_used: 10,
2488                duration_ms: 50,
2489                error: None,
2490                status: StepStatus::Completed,
2491                started_at: None,
2492                completed_at: None,
2493            },
2494        );
2495
2496        let expanded = expand_dag_variables(
2497            "{{greet.output | uppercase}}",
2498            "input",
2499            "next",
2500            &results,
2501            None,
2502        );
2503        assert_eq!(expanded, "HELLO WORLD");
2504    }
2505
2506    #[test]
2507    fn dag_variables_transform_lowercase() {
2508        let mut results = HashMap::new();
2509        results.insert(
2510            "shout".to_string(),
2511            StepResult {
2512                step_name: "shout".to_string(),
2513                response: "LOUD NOISE".to_string(),
2514                tokens_used: 10,
2515                duration_ms: 50,
2516                error: None,
2517                status: StepStatus::Completed,
2518                started_at: None,
2519                completed_at: None,
2520            },
2521        );
2522
2523        let expanded = expand_dag_variables(
2524            "{{shout.output | lowercase}}",
2525            "input",
2526            "next",
2527            &results,
2528            None,
2529        );
2530        assert_eq!(expanded, "loud noise");
2531    }
2532
2533    #[test]
2534    fn dag_variables_transform_json_extract() {
2535        let mut results = HashMap::new();
2536        results.insert(
2537            "data".to_string(),
2538            StepResult {
2539                step_name: "data".to_string(),
2540                response: r#"{"key": "value123"}"#.to_string(),
2541                tokens_used: 10,
2542                duration_ms: 50,
2543                error: None,
2544                status: StepStatus::Completed,
2545                started_at: None,
2546                completed_at: None,
2547            },
2548        );
2549
2550        let expanded = expand_dag_variables(
2551            "{{data.output | json_extract \"$.key\"}}",
2552            "input",
2553            "next",
2554            &results,
2555            None,
2556        );
2557        assert_eq!(expanded, "value123");
2558    }
2559
2560    #[test]
2561    fn json_path_extract_simple() {
2562        let result = json_path_extract(r#"{"name": "Bob"}"#, "name");
2563        assert_eq!(result, "Bob");
2564    }
2565
2566    #[test]
2567    fn json_path_extract_nested() {
2568        let result = json_path_extract(r#"{"a": {"b": {"c": 42}}}"#, "a.b.c");
2569        assert_eq!(result, "42");
2570    }
2571
2572    #[test]
2573    fn json_path_extract_dollar_prefix() {
2574        let result = json_path_extract(r#"{"key": "val"}"#, "$.key");
2575        assert_eq!(result, "val");
2576    }
2577
2578    #[test]
2579    fn json_path_extract_missing_key() {
2580        let result = json_path_extract(r#"{"key": "val"}"#, "missing");
2581        assert_eq!(result, "");
2582    }
2583
2584    #[test]
2585    fn json_path_extract_invalid_json() {
2586        let result = json_path_extract("not json", "key");
2587        assert_eq!(result, "not json");
2588    }
2589
2590    // ---- Step status tests ----
2591
2592    #[test]
2593    fn step_status_display() {
2594        assert_eq!(StepStatus::Pending.to_string(), "pending");
2595        assert_eq!(StepStatus::Running.to_string(), "running");
2596        assert_eq!(StepStatus::Completed.to_string(), "completed");
2597        assert_eq!(StepStatus::Failed.to_string(), "failed");
2598        assert_eq!(StepStatus::Skipped.to_string(), "skipped");
2599        assert_eq!(StepStatus::Cancelled.to_string(), "cancelled");
2600    }
2601
2602    // ---- On error variant tests ----
2603
2604    #[test]
2605    fn on_error_fallback_serialization() {
2606        let on_error = OnError::Fallback {
2607            step: "backup".to_string(),
2608        };
2609        let json = serde_json::to_string(&on_error).expect("serialize");
2610        let deser: OnError = serde_json::from_str(&json).expect("deserialize");
2611        assert!(matches!(deser, OnError::Fallback { step } if step == "backup"));
2612    }
2613
2614    #[test]
2615    fn on_error_catch_and_continue_serialization() {
2616        let on_error = OnError::CatchAndContinue {
2617            error_handler: "handler".to_string(),
2618        };
2619        let json = serde_json::to_string(&on_error).expect("serialize");
2620        let deser: OnError = serde_json::from_str(&json).expect("deserialize");
2621        assert!(
2622            matches!(deser, OnError::CatchAndContinue { error_handler } if error_handler == "handler")
2623        );
2624    }
2625
2626    #[test]
2627    fn on_error_circuit_breaker_serialization() {
2628        let on_error = OnError::CircuitBreaker {
2629            max_failures: 5,
2630            cooldown_secs: 60,
2631        };
2632        let json = serde_json::to_string(&on_error).expect("serialize");
2633        let deser: OnError = serde_json::from_str(&json).expect("deserialize");
2634        assert!(matches!(
2635            deser,
2636            OnError::CircuitBreaker {
2637                max_failures: 5,
2638                cooldown_secs: 60
2639            }
2640        ));
2641    }
2642
2643    // ---- Circuit breaker tests ----
2644
2645    #[test]
2646    fn circuit_breaker_default_closed() {
2647        let cb = CircuitBreakerState::default();
2648        assert!(!cb.is_open(3, 60));
2649    }
2650
2651    #[test]
2652    fn circuit_breaker_opens_after_max_failures() {
2653        let mut cb = CircuitBreakerState::default();
2654        cb.record_failure();
2655        cb.record_failure();
2656        cb.record_failure();
2657        assert!(cb.is_open(3, 60));
2658    }
2659
2660    #[test]
2661    fn circuit_breaker_resets_on_success() {
2662        let mut cb = CircuitBreakerState::default();
2663        cb.record_failure();
2664        cb.record_failure();
2665        cb.record_success();
2666        assert!(!cb.is_open(3, 60));
2667        assert_eq!(cb.consecutive_failures, 0);
2668    }
2669
2670    // ---- DAG workflow registration tests ----
2671
2672    #[test]
2673    fn register_dag_workflow_valid() {
2674        let engine = WorkflowEngine::new();
2675        let wf = DagWorkflow {
2676            id: WorkflowId::new(),
2677            name: "test-dag".to_string(),
2678            steps: vec![dag_step("a", &[]), dag_step("b", &["a"])],
2679        };
2680        let result = engine.register_dag_workflow(wf);
2681        assert!(result.is_ok());
2682    }
2683
2684    #[test]
2685    fn register_dag_workflow_with_cycle_fails() {
2686        let engine = WorkflowEngine::new();
2687        let wf = DagWorkflow {
2688            id: WorkflowId::new(),
2689            name: "bad-dag".to_string(),
2690            steps: vec![dag_step("a", &["b"]), dag_step("b", &["a"])],
2691        };
2692        let result = engine.register_dag_workflow(wf);
2693        assert!(result.is_err());
2694    }
2695
2696    #[test]
2697    fn list_dag_workflows() {
2698        let engine = WorkflowEngine::new();
2699        let wf = DagWorkflow {
2700            id: WorkflowId::new(),
2701            name: "dag1".to_string(),
2702            steps: vec![dag_step("a", &[])],
2703        };
2704        engine.register_dag_workflow(wf).expect("should register");
2705        assert_eq!(engine.list_dag_workflows().len(), 1);
2706    }
2707
2708    #[test]
2709    fn get_dag_workflow() {
2710        let engine = WorkflowEngine::new();
2711        let id = WorkflowId::new();
2712        let wf = DagWorkflow {
2713            id,
2714            name: "dag1".to_string(),
2715            steps: vec![dag_step("a", &[])],
2716        };
2717        engine.register_dag_workflow(wf).expect("should register");
2718        let fetched = engine.get_dag_workflow(&id).expect("should exist");
2719        assert_eq!(fetched.name, "dag1");
2720    }
2721
2722    #[test]
2723    fn get_nonexistent_dag_workflow() {
2724        let engine = WorkflowEngine::new();
2725        assert!(engine.get_dag_workflow(&WorkflowId::new()).is_none());
2726    }
2727
2728    // ---- Dead letter queue tests ----
2729
2730    #[tokio::test]
2731    async fn dag_dead_letters_populated_on_failure() {
2732        let steps = vec![dag_step("a", &[])];
2733        let executor = MockExecutor::new().with_failure("a", "catastrophic failure");
2734
2735        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2736        assert!(!result.dead_letters.is_empty());
2737        assert_eq!(result.dead_letters[0].step_name, "a");
2738        assert_eq!(result.dead_letters[0].error, "catastrophic failure");
2739    }
2740
2741    // ---- Execution trace tests ----
2742
2743    #[tokio::test]
2744    async fn dag_execution_trace_records_waves() {
2745        let steps = vec![
2746            dag_step("a", &[]),
2747            dag_step("b", &["a"]),
2748            dag_step("c", &["b"]),
2749        ];
2750        let executor = MockExecutor::new()
2751            .with_response("a", "ok")
2752            .with_response("b", "ok")
2753            .with_response("c", "ok");
2754
2755        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2756        // 3 waves for a linear chain
2757        assert_eq!(result.execution_trace.len(), 3);
2758        assert_eq!(result.execution_trace[0].steps, vec!["a"]);
2759        assert_eq!(result.execution_trace[1].steps, vec!["b"]);
2760        assert_eq!(result.execution_trace[2].steps, vec!["c"]);
2761    }
2762
2763    // ---- DagWorkflowStep helper tests ----
2764
2765    #[test]
2766    fn dag_step_fallback_step_extraction() {
2767        let mut step = dag_step("test", &[]);
2768        assert!(step.fallback_step().is_none());
2769
2770        step.on_error = OnError::Fallback {
2771            step: "backup".to_string(),
2772        };
2773        assert_eq!(step.fallback_step(), Some("backup".to_string()));
2774
2775        step.on_error = OnError::CatchAndContinue {
2776            error_handler: "handler".to_string(),
2777        };
2778        assert_eq!(step.fallback_step(), Some("handler".to_string()));
2779    }
2780
2781    // ---- Serialization tests for new types ----
2782
2783    #[test]
2784    fn dag_workflow_serialization_roundtrip() {
2785        let wf = DagWorkflow {
2786            id: WorkflowId::new(),
2787            name: "test-dag".to_string(),
2788            steps: vec![dag_step("a", &[]), dag_step("b", &["a"])],
2789        };
2790        let json = serde_json::to_string(&wf).expect("serialize");
2791        let deser: DagWorkflow = serde_json::from_str(&json).expect("deserialize");
2792        assert_eq!(deser.name, "test-dag");
2793        assert_eq!(deser.steps.len(), 2);
2794    }
2795
2796    #[test]
2797    fn dag_workflow_step_with_condition_serialization() {
2798        let mut step = dag_step("test", &["dep1"]);
2799        step.condition = Some(Condition::IfSuccess {
2800            step: "dep1".to_string(),
2801        });
2802        step.else_step = Some("fallback".to_string());
2803        let json = serde_json::to_string(&step).expect("serialize");
2804        let deser: DagWorkflowStep = serde_json::from_str(&json).expect("deserialize");
2805        assert!(deser.condition.is_some());
2806        assert_eq!(deser.else_step, Some("fallback".to_string()));
2807    }
2808
2809    #[test]
2810    fn dead_letter_entry_serialization() {
2811        let entry = DeadLetterEntry {
2812            step_name: "failed_step".to_string(),
2813            error: "boom".to_string(),
2814            input: "test input".to_string(),
2815            failed_at: Utc::now(),
2816        };
2817        let json = serde_json::to_string(&entry).expect("serialize");
2818        let deser: DeadLetterEntry = serde_json::from_str(&json).expect("deserialize");
2819        assert_eq!(deser.step_name, "failed_step");
2820        assert_eq!(deser.error, "boom");
2821    }
2822
2823    #[test]
2824    fn execution_trace_entry_serialization() {
2825        let entry = ExecutionTraceEntry {
2826            steps: vec!["a".to_string(), "b".to_string()],
2827            started_at: Utc::now(),
2828            completed_at: Some(Utc::now()),
2829        };
2830        let json = serde_json::to_string(&entry).expect("serialize");
2831        let deser: ExecutionTraceEntry = serde_json::from_str(&json).expect("deserialize");
2832        assert_eq!(deser.steps.len(), 2);
2833    }
2834
2835    #[test]
2836    fn workflow_run_with_new_fields_serialization() {
2837        let run = WorkflowRun {
2838            id: WorkflowRunId::new(),
2839            workflow_id: WorkflowId::new(),
2840            status: WorkflowRunStatus::PartiallyCompleted,
2841            step_results: Vec::new(),
2842            started_at: Utc::now(),
2843            completed_at: None,
2844            dead_letters: vec![DeadLetterEntry {
2845                step_name: "x".to_string(),
2846                error: "err".to_string(),
2847                input: "in".to_string(),
2848                failed_at: Utc::now(),
2849            }],
2850            execution_trace: Vec::new(),
2851        };
2852        let json = serde_json::to_string(&run).expect("serialize");
2853        let deser: WorkflowRun = serde_json::from_str(&json).expect("deserialize");
2854        assert_eq!(deser.status, WorkflowRunStatus::PartiallyCompleted);
2855        assert_eq!(deser.dead_letters.len(), 1);
2856    }
2857
2858    #[test]
2859    fn step_result_with_new_fields() {
2860        let sr = StepResult {
2861            step_name: "test".to_string(),
2862            response: "ok".to_string(),
2863            tokens_used: 10,
2864            duration_ms: 100,
2865            error: None,
2866            status: StepStatus::Completed,
2867            started_at: Some(Utc::now()),
2868            completed_at: Some(Utc::now()),
2869        };
2870        let json = serde_json::to_string(&sr).expect("serialize");
2871        let deser: StepResult = serde_json::from_str(&json).expect("deserialize");
2872        assert_eq!(deser.status, StepStatus::Completed);
2873        assert!(deser.started_at.is_some());
2874    }
2875
2876    // ---- Fallback error handling test ----
2877
2878    #[tokio::test]
2879    async fn dag_fallback_on_error() {
2880        let mut steps = vec![dag_step("main", &[]), dag_step("backup", &[])];
2881        steps[0].on_error = OnError::Fallback {
2882            step: "backup".to_string(),
2883        };
2884
2885        let executor = MockExecutor::new()
2886            .with_failure("main", "main failed")
2887            .with_response("backup", "backup result");
2888
2889        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2890        // The main step should have used backup's result
2891        // In our implementation, the step result gets the backup response
2892        let main_result = &result.step_results["main"];
2893        assert_eq!(main_result.response, "backup result");
2894    }
2895
2896    #[tokio::test]
2897    async fn dag_catch_and_continue() {
2898        let mut steps = vec![
2899            dag_step("risky", &[]),
2900            dag_step("handler", &[]),
2901            dag_step("next", &["risky"]),
2902        ];
2903        steps[0].on_error = OnError::CatchAndContinue {
2904            error_handler: "handler".to_string(),
2905        };
2906
2907        let executor = MockExecutor::new()
2908            .with_failure("risky", "oops")
2909            .with_response("handler", "handled")
2910            .with_response("next", "continued");
2911
2912        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2913        // "next" should have run because CatchAndContinue removes the failure
2914        assert!(result.step_results.contains_key("next"));
2915    }
2916
2917    // ---- Parallel execution proof tests ----
2918
2919    /// A timed executor that records start/end times to prove concurrency.
2920    struct ConcurrencyProofExecutor {
2921        delay_ms: u64,
2922        /// Track (step_name, start_instant, end_instant) for each execution.
2923        timings: Arc<tokio::sync::Mutex<Vec<(String, Instant, Instant)>>>,
2924    }
2925
2926    impl ConcurrencyProofExecutor {
2927        fn new(delay_ms: u64) -> Self {
2928            Self {
2929                delay_ms,
2930                timings: Arc::new(tokio::sync::Mutex::new(Vec::new())),
2931            }
2932        }
2933    }
2934
2935    #[async_trait::async_trait]
2936    impl StepExecutor for ConcurrencyProofExecutor {
2937        async fn execute(
2938            &self,
2939            step: &DagWorkflowStep,
2940            _input: &str,
2941            _step_results: &HashMap<String, StepResult>,
2942            _loop_state: Option<&LoopState>,
2943        ) -> Result<StepResult, String> {
2944            let start = Instant::now();
2945            tokio::time::sleep(Duration::from_millis(self.delay_ms)).await;
2946            let end = Instant::now();
2947
2948            self.timings
2949                .lock()
2950                .await
2951                .push((step.name.clone(), start, end));
2952
2953            Ok(StepResult {
2954                step_name: step.name.clone(),
2955                response: format!("done-{}", step.name),
2956                tokens_used: 10,
2957                duration_ms: self.delay_ms,
2958                error: None,
2959                status: StepStatus::Completed,
2960                started_at: Some(Utc::now()),
2961                completed_at: Some(Utc::now()),
2962            })
2963        }
2964    }
2965
2966    /// Prove 3 independent steps with 50ms sleep each complete in ~50-70ms (not 150ms).
2967    #[tokio::test]
2968    async fn dag_three_independent_steps_parallel_timing() {
2969        let steps = vec![dag_step("x", &[]), dag_step("y", &[]), dag_step("z", &[])];
2970        let executor = ConcurrencyProofExecutor::new(50);
2971        let timings = Arc::clone(&executor.timings);
2972
2973        let start = Instant::now();
2974        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2975        let elapsed = start.elapsed();
2976
2977        assert_eq!(result.status, WorkflowRunStatus::Completed);
2978        assert_eq!(result.step_results.len(), 3);
2979        // Parallel: should finish in ~50ms, not 150ms
2980        assert!(
2981            elapsed.as_millis() < 100,
2982            "3 independent 50ms steps took {}ms, should be ~50ms for parallel execution",
2983            elapsed.as_millis()
2984        );
2985
2986        // Verify that the steps overlapped in time
2987        let recorded = timings.lock().await;
2988        assert_eq!(recorded.len(), 3);
2989        // All should have started within a few ms of each other
2990        let starts: Vec<_> = recorded.iter().map(|(_, s, _)| *s).collect();
2991        let earliest = starts.iter().min().copied().expect("should have starts");
2992        for s in &starts {
2993            let diff = s.duration_since(earliest).as_millis();
2994            assert!(
2995                diff < 20,
2996                "start time spread {}ms too large for parallel execution",
2997                diff
2998            );
2999        }
3000    }
3001
3002    /// Fan-out: step A -> steps B,C,D in parallel -> step E waits for all.
3003    #[tokio::test]
3004    async fn dag_fan_out_fan_in_timing() {
3005        let steps = vec![
3006            dag_step("a", &[]),
3007            dag_step("b", &["a"]),
3008            dag_step("c", &["a"]),
3009            dag_step("d", &["a"]),
3010            dag_step("e", &["b", "c", "d"]),
3011        ];
3012        let executor = TimedMockExecutor { delay_ms: 30 };
3013
3014        let start = Instant::now();
3015        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3016        let elapsed = start.elapsed();
3017
3018        assert_eq!(result.status, WorkflowRunStatus::Completed);
3019        assert_eq!(result.step_results.len(), 5);
3020
3021        // 3 waves: A (30ms) + B,C,D parallel (30ms) + E (30ms) = ~90ms
3022        // Sequential would be 5*30 = 150ms
3023        assert!(
3024            elapsed.as_millis() < 130,
3025            "fan-out/fan-in took {}ms, expected ~90ms",
3026            elapsed.as_millis()
3027        );
3028
3029        // Verify execution trace shows 3 waves
3030        assert_eq!(result.execution_trace.len(), 3);
3031        // Wave 2 should have B, C, D
3032        let wave2 = &result.execution_trace[1].steps;
3033        assert_eq!(wave2.len(), 3);
3034    }
3035
3036    /// Fan-in: multiple parallel roots feed into one join step.
3037    #[tokio::test]
3038    async fn dag_fan_in_parallel_roots() {
3039        let steps = vec![
3040            dag_step("r1", &[]),
3041            dag_step("r2", &[]),
3042            dag_step("r3", &[]),
3043            dag_step("join", &["r1", "r2", "r3"]),
3044        ];
3045        let executor = MockExecutor::new()
3046            .with_response("r1", "out1")
3047            .with_response("r2", "out2")
3048            .with_response("r3", "out3")
3049            .with_response("join", "merged");
3050
3051        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3052        assert_eq!(result.status, WorkflowRunStatus::Completed);
3053        assert_eq!(result.step_results["join"].response, "merged");
3054        // r1, r2, r3 in wave 1, join in wave 2
3055        assert_eq!(result.execution_trace.len(), 2);
3056        assert_eq!(result.execution_trace[0].steps.len(), 3);
3057    }
3058
3059    /// Diamond dependency: A -> B,C -> D (D depends on both B and C).
3060    #[tokio::test]
3061    async fn dag_diamond_dependency_parallel() {
3062        let steps = vec![
3063            dag_step("a", &[]),
3064            dag_step("b", &["a"]),
3065            dag_step("c", &["a"]),
3066            dag_step("d", &["b", "c"]),
3067        ];
3068        let executor = TimedMockExecutor { delay_ms: 30 };
3069
3070        let start = Instant::now();
3071        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3072        let elapsed = start.elapsed();
3073
3074        assert_eq!(result.status, WorkflowRunStatus::Completed);
3075        // 3 waves: A, B+C parallel, D
3076        assert_eq!(result.execution_trace.len(), 3);
3077        // B and C should be in the same wave
3078        let wave2 = &result.execution_trace[1].steps;
3079        assert!(wave2.contains(&"b".to_string()));
3080        assert!(wave2.contains(&"c".to_string()));
3081        // Total should be ~90ms (3 waves * 30ms), not 120ms (4 sequential)
3082        assert!(
3083            elapsed.as_millis() < 120,
3084            "diamond took {}ms, expected ~90ms",
3085            elapsed.as_millis()
3086        );
3087    }
3088
3089    /// Conditional skipping in a DAG.
3090    #[tokio::test]
3091    async fn dag_conditional_skip_in_dag() {
3092        let mut steps = vec![
3093            dag_step("check", &[]),
3094            dag_step("true_branch", &["check"]),
3095            dag_step("false_branch", &["check"]),
3096        ];
3097        // true_branch runs only if check succeeds (it will)
3098        steps[1].condition = Some(Condition::IfSuccess {
3099            step: "check".to_string(),
3100        });
3101        // false_branch runs only if check fails (it won't)
3102        steps[2].condition = Some(Condition::IfFailure {
3103            step: "check".to_string(),
3104        });
3105
3106        let executor = MockExecutor::new()
3107            .with_response("check", "all good")
3108            .with_response("true_branch", "ran")
3109            .with_response("false_branch", "should_not_run");
3110
3111        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3112        assert_eq!(
3113            result.step_results["true_branch"].status,
3114            StepStatus::Completed
3115        );
3116        assert_eq!(
3117            result.step_results["false_branch"].status,
3118            StepStatus::Skipped
3119        );
3120    }
3121
3122    /// Loop execution within a DAG step (ForEach).
3123    #[tokio::test]
3124    async fn dag_loop_foreach_within_dag() {
3125        let mut steps = vec![
3126            dag_step("data", &[]),
3127            dag_step("process", &["data"]),
3128            dag_step("summary", &["process"]),
3129        ];
3130        steps[1].loop_config = Some(LoopConfig::ForEach {
3131            source_step: "data".to_string(),
3132            max_iterations: 10,
3133        });
3134        steps[1].prompt_template = "process: {{loop.item}}".to_string();
3135
3136        let executor = MockExecutor::new()
3137            .with_response("data", r#"["red", "green", "blue"]"#)
3138            .with_response("summary", "done");
3139
3140        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3141        assert_eq!(result.status, WorkflowRunStatus::Completed);
3142        let process_out = &result.step_results["process"].response;
3143        // Should contain output from all 3 loop iterations
3144        assert!(process_out.contains("process: red"));
3145        assert!(process_out.contains("process: green"));
3146        assert!(process_out.contains("process: blue"));
3147    }
3148
3149    /// Partial failure: one parallel branch fails, others succeed.
3150    #[tokio::test]
3151    async fn dag_partial_failure_parallel_branches() {
3152        let steps = vec![
3153            dag_step("root", &[]),
3154            dag_step("ok_branch", &["root"]),
3155            dag_step("fail_branch", &["root"]),
3156            dag_step("ok_branch2", &["root"]),
3157        ];
3158
3159        let executor = MockExecutor::new()
3160            .with_response("root", "start")
3161            .with_response("ok_branch", "success1")
3162            .with_failure("fail_branch", "branch failed")
3163            .with_response("ok_branch2", "success2");
3164
3165        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3166        assert_eq!(result.status, WorkflowRunStatus::PartiallyCompleted);
3167        assert_eq!(
3168            result.step_results["ok_branch"].status,
3169            StepStatus::Completed
3170        );
3171        assert_eq!(
3172            result.step_results["ok_branch2"].status,
3173            StepStatus::Completed
3174        );
3175        assert!(result.step_results["fail_branch"].error.is_some());
3176    }
3177
3178    /// Fallback step execution on failure.
3179    #[tokio::test]
3180    async fn dag_fallback_step_runs_on_failure() {
3181        let mut steps = vec![
3182            dag_step("primary", &[]),
3183            dag_step("fallback_handler", &[]),
3184            dag_step("downstream", &["primary"]),
3185        ];
3186        steps[0].on_error = OnError::Fallback {
3187            step: "fallback_handler".to_string(),
3188        };
3189
3190        let executor = MockExecutor::new()
3191            .with_failure("primary", "primary broke")
3192            .with_response("fallback_handler", "recovered via fallback")
3193            .with_response("downstream", "downstream ran");
3194
3195        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3196        // primary should have the fallback result
3197        let primary_result = &result.step_results["primary"];
3198        assert_eq!(primary_result.response, "recovered via fallback");
3199        // downstream should have run since fallback recovered
3200        assert!(result.step_results.contains_key("downstream"));
3201    }
3202
3203    /// Circuit breaker triggering after N failures.
3204    #[tokio::test]
3205    async fn dag_circuit_breaker_triggers() {
3206        let mut steps = vec![dag_step("cb_step", &[])];
3207        steps[0].on_error = OnError::CircuitBreaker {
3208            max_failures: 2,
3209            cooldown_secs: 300,
3210        };
3211
3212        // First run: fail twice to trip the breaker
3213        let executor1 = MockExecutor::new().with_failure("cb_step", "fail1");
3214        let result1 = execute_dag("test", &steps, "input", Arc::new(executor1)).await;
3215        assert!(result1.step_results["cb_step"].error.is_some());
3216
3217        // The circuit breaker state is per-run, so we test within a single run
3218        // with a step that has CircuitBreaker and fails. The breaker opens internally
3219        // after max_failures. Let's verify the circuit breaker state logic directly.
3220        let mut cb = CircuitBreakerState::default();
3221        cb.record_failure();
3222        assert!(!cb.is_open(2, 300), "should not be open after 1 failure");
3223        cb.record_failure();
3224        assert!(cb.is_open(2, 300), "should be open after 2 failures");
3225        // After cooldown, it should close — but since cooldown is 300s, it's still open
3226        assert!(cb.is_open(2, 300));
3227    }
3228
3229    /// Variable substitution works across parallel branches.
3230    #[tokio::test]
3231    async fn dag_variable_substitution_across_parallel_branches() {
3232        let mut steps = vec![
3233            dag_step("source_a", &[]),
3234            dag_step("source_b", &[]),
3235            dag_step("consumer", &["source_a", "source_b"]),
3236        ];
3237        steps[2].prompt_template = "A={{source_a.output}}, B={{source_b.output}}".to_string();
3238
3239        let executor = MockExecutor::new()
3240            .with_response("source_a", "value_from_a")
3241            .with_response("source_b", "value_from_b");
3242        // consumer doesn't have a fixed response, so it will use the expanded prompt
3243
3244        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3245        assert_eq!(result.status, WorkflowRunStatus::Completed);
3246        let consumer_out = &result.step_results["consumer"].response;
3247        assert!(
3248            consumer_out.contains("value_from_a"),
3249            "consumer should see source_a output, got: {consumer_out}"
3250        );
3251        assert!(
3252            consumer_out.contains("value_from_b"),
3253            "consumer should see source_b output, got: {consumer_out}"
3254        );
3255    }
3256
3257    /// Wide parallel fan-out with timing proof.
3258    #[tokio::test]
3259    async fn dag_wide_parallel_fan_out_timing() {
3260        // 10 independent steps each taking 30ms
3261        let steps: Vec<DagWorkflowStep> =
3262            (0..10).map(|i| dag_step(&format!("s{i}"), &[])).collect();
3263        let executor = TimedMockExecutor { delay_ms: 30 };
3264
3265        let start = Instant::now();
3266        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3267        let elapsed = start.elapsed();
3268
3269        assert_eq!(result.status, WorkflowRunStatus::Completed);
3270        assert_eq!(result.step_results.len(), 10);
3271        // All 10 should run in one wave (~30ms), not sequentially (~300ms)
3272        assert!(
3273            elapsed.as_millis() < 80,
3274            "10 parallel 30ms steps took {}ms, expected ~30ms",
3275            elapsed.as_millis()
3276        );
3277        assert_eq!(result.execution_trace.len(), 1);
3278        assert_eq!(result.execution_trace[0].steps.len(), 10);
3279    }
3280
3281    /// While loop with condition that eventually terminates.
3282    #[tokio::test]
3283    async fn dag_while_loop_with_condition() {
3284        let mut steps = vec![dag_step("looper", &[])];
3285        steps[0].loop_config = Some(LoopConfig::While {
3286            condition: Condition::Expression("true".to_string()),
3287            max_iterations: 3,
3288        });
3289
3290        let executor = MockExecutor::new().with_response("looper", "iteration");
3291
3292        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3293        assert_eq!(result.status, WorkflowRunStatus::Completed);
3294        let output = &result.step_results["looper"].response;
3295        // Should have 3 iterations
3296        let lines: Vec<&str> = output.split('\n').collect();
3297        assert_eq!(lines.len(), 3);
3298    }
3299
3300    /// Retry loop succeeds on second attempt.
3301    #[tokio::test]
3302    async fn dag_retry_succeeds_on_retry() {
3303        let mut steps = vec![dag_step("retry_step", &[])];
3304        steps[0].loop_config = Some(LoopConfig::Retry {
3305            max_retries: 2,
3306            backoff_ms: 1,
3307            backoff_multiplier: 1.0,
3308        });
3309
3310        let executor = FailNTimesMockExecutor::new(1);
3311
3312        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3313        assert_eq!(result.status, WorkflowRunStatus::Completed);
3314        assert!(result.step_results["retry_step"].error.is_none());
3315        assert!(
3316            result.step_results["retry_step"]
3317                .response
3318                .contains("success on attempt 2")
3319        );
3320    }
3321}