punch_kernel/
workflow.rs

1//! Multi-step agent workflow engine with DAG execution.
2//!
3//! The [`WorkflowEngine`] allows registering named workflows composed of
4//! sequential steps or DAG-structured steps with parallel fan-out, conditional
5//! branching, loops, and advanced error handling.
6//!
7//! ## Variable substitution
8//!
9//! Prompt templates support:
10//! - `{{input}}` / `{{previous_output}}` — current pipeline input
11//! - `{{step_name}}` — name of the current step
12//! - `{{step_N}}` — output of step N (1-indexed, sequential mode)
13//! - `{{some_step_name}}` — output of a step by name
14//! - `{{step_name.output}}` — explicit step output reference
15//! - `{{step_name.status}}` — step completion status
16//! - `{{step_name.duration_ms}}` — step duration
17//! - `{{loop.index}}` — current loop iteration
18//! - `{{loop.item}}` — current loop item (ForEach)
19//! - `{{step_name.output.field.nested}}` — JSON path into step output
20//! - `{{step_name.output | uppercase}}` — data transformation
21
22use std::collections::HashMap;
23use std::sync::Arc;
24use std::time::Instant;
25
26use chrono::{DateTime, Utc};
27use dashmap::DashMap;
28use serde::{Deserialize, Serialize};
29use tracing::{debug, error, info, instrument, warn};
30use uuid::Uuid;
31
32use punch_memory::MemorySubstrate;
33use punch_runtime::{FighterLoopParams, LlmDriver, run_fighter_loop, tools_for_capabilities};
34use punch_types::{FighterId, FighterManifest, ModelConfig, PunchError, PunchResult, WeightClass};
35
36use crate::workflow_conditions::{Condition, evaluate_condition};
37use crate::workflow_loops::{LoopConfig, LoopState, calculate_backoff, parse_foreach_items};
38use crate::workflow_validation::{ValidationError, topological_sort, validate_workflow};
39
40// ---------------------------------------------------------------------------
41// ID types
42// ---------------------------------------------------------------------------
43
44/// Unique identifier for a workflow definition.
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46#[serde(transparent)]
47pub struct WorkflowId(pub Uuid);
48
49impl WorkflowId {
50    pub fn new() -> Self {
51        Self(Uuid::new_v4())
52    }
53}
54
55impl Default for WorkflowId {
56    fn default() -> Self {
57        Self::new()
58    }
59}
60
61impl std::fmt::Display for WorkflowId {
62    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
63        write!(f, "{}", self.0)
64    }
65}
66
67/// Unique identifier for a workflow run (execution instance).
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
69#[serde(transparent)]
70pub struct WorkflowRunId(pub Uuid);
71
72impl WorkflowRunId {
73    pub fn new() -> Self {
74        Self(Uuid::new_v4())
75    }
76}
77
78impl Default for WorkflowRunId {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl std::fmt::Display for WorkflowRunId {
85    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86        write!(f, "{}", self.0)
87    }
88}
89
90// ---------------------------------------------------------------------------
91// Workflow types
92// ---------------------------------------------------------------------------
93
94/// What to do when a workflow step fails.
95#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
96#[serde(rename_all = "snake_case")]
97#[derive(Default)]
98pub enum OnError {
99    /// Abort the entire workflow.
100    #[default]
101    FailWorkflow,
102    /// Skip the failed step and continue.
103    SkipStep,
104    /// Retry the step once, then fail if it fails again.
105    RetryOnce,
106    /// On error, run a fallback step instead.
107    Fallback { step: String },
108    /// Run an error handler step, then continue the workflow.
109    CatchAndContinue { error_handler: String },
110    /// Stop trying after N consecutive failures, with a cooldown.
111    CircuitBreaker {
112        max_failures: usize,
113        cooldown_secs: u64,
114    },
115}
116
117/// Per-step execution status.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum StepStatus {
121    Pending,
122    Running,
123    Completed,
124    Failed,
125    Skipped,
126    Cancelled,
127}
128
129impl std::fmt::Display for StepStatus {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        match self {
132            Self::Pending => write!(f, "pending"),
133            Self::Running => write!(f, "running"),
134            Self::Completed => write!(f, "completed"),
135            Self::Failed => write!(f, "failed"),
136            Self::Skipped => write!(f, "skipped"),
137            Self::Cancelled => write!(f, "cancelled"),
138        }
139    }
140}
141
142/// A single step within a sequential workflow (legacy format, still supported).
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct WorkflowStep {
145    /// Human-readable name for this step.
146    pub name: String,
147    /// The fighter name to use for this step.
148    pub fighter_name: String,
149    /// Prompt template with variable substitution.
150    pub prompt_template: String,
151    /// Maximum time in seconds for this step (default 120).
152    pub timeout_secs: Option<u64>,
153    /// Error handling strategy.
154    #[serde(default)]
155    pub on_error: OnError,
156}
157
158/// A single step within a DAG workflow.
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct DagWorkflowStep {
161    /// Human-readable name for this step (must be unique within the workflow).
162    pub name: String,
163    /// The fighter name to use for this step.
164    pub fighter_name: String,
165    /// Prompt template with variable substitution.
166    pub prompt_template: String,
167    /// Maximum time in seconds for this step (default 120).
168    pub timeout_secs: Option<u64>,
169    /// Error handling strategy.
170    #[serde(default)]
171    pub on_error: OnError,
172    /// Steps that must complete before this one runs.
173    #[serde(default)]
174    pub depends_on: Vec<String>,
175    /// Optional condition — step is skipped if condition evaluates to false.
176    #[serde(default)]
177    pub condition: Option<Condition>,
178    /// If condition is false, run this step instead (if/else branching).
179    #[serde(default)]
180    pub else_step: Option<String>,
181    /// Optional loop configuration.
182    #[serde(default)]
183    pub loop_config: Option<LoopConfig>,
184}
185
186impl DagWorkflowStep {
187    /// Extract the fallback step name from the on_error strategy, if any.
188    pub fn fallback_step(&self) -> Option<String> {
189        match &self.on_error {
190            OnError::Fallback { step } => Some(step.clone()),
191            OnError::CatchAndContinue { error_handler } => Some(error_handler.clone()),
192            _ => None,
193        }
194    }
195}
196
197/// A workflow definition composed of sequential steps (legacy).
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct Workflow {
200    /// Unique identifier.
201    pub id: WorkflowId,
202    /// Human-readable name.
203    pub name: String,
204    /// Ordered steps to execute.
205    pub steps: Vec<WorkflowStep>,
206}
207
208/// A DAG workflow definition with parallel execution support.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct DagWorkflow {
211    /// Unique identifier.
212    pub id: WorkflowId,
213    /// Human-readable name.
214    pub name: String,
215    /// DAG steps (order in vec doesn't matter — execution order is determined by dependencies).
216    pub steps: Vec<DagWorkflowStep>,
217}
218
219/// Status of a workflow run.
220#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
221#[serde(rename_all = "snake_case")]
222pub enum WorkflowRunStatus {
223    Pending,
224    Running,
225    Completed,
226    Failed,
227    /// Some branches succeeded, some failed.
228    PartiallyCompleted,
229}
230
231impl std::fmt::Display for WorkflowRunStatus {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        match self {
234            Self::Pending => write!(f, "pending"),
235            Self::Running => write!(f, "running"),
236            Self::Completed => write!(f, "completed"),
237            Self::Failed => write!(f, "failed"),
238            Self::PartiallyCompleted => write!(f, "partially_completed"),
239        }
240    }
241}
242
243/// Result of executing a single workflow step.
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct StepResult {
246    /// Name of the step.
247    pub step_name: String,
248    /// The response text from the fighter.
249    pub response: String,
250    /// Tokens consumed.
251    pub tokens_used: u64,
252    /// Duration in milliseconds.
253    pub duration_ms: u64,
254    /// Error message, if any.
255    pub error: Option<String>,
256    /// Per-step status.
257    #[serde(default = "default_step_status")]
258    pub status: StepStatus,
259    /// When the step started executing.
260    #[serde(default)]
261    pub started_at: Option<DateTime<Utc>>,
262    /// When the step finished executing.
263    #[serde(default)]
264    pub completed_at: Option<DateTime<Utc>>,
265}
266
267fn default_step_status() -> StepStatus {
268    StepStatus::Pending
269}
270
271/// A failed step result stored in the dead letter queue.
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub struct DeadLetterEntry {
274    /// The step name that failed.
275    pub step_name: String,
276    /// The error message.
277    pub error: String,
278    /// The input that was provided to the step.
279    pub input: String,
280    /// When the failure occurred.
281    pub failed_at: DateTime<Utc>,
282}
283
284/// A single execution of a workflow.
285#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct WorkflowRun {
287    /// Unique run identifier.
288    pub id: WorkflowRunId,
289    /// The workflow that was executed.
290    pub workflow_id: WorkflowId,
291    /// Current status.
292    pub status: WorkflowRunStatus,
293    /// Results of each completed step.
294    pub step_results: Vec<StepResult>,
295    /// When the run started.
296    pub started_at: DateTime<Utc>,
297    /// When the run completed (or failed).
298    pub completed_at: Option<DateTime<Utc>>,
299    /// Dead letter queue for failed steps.
300    #[serde(default)]
301    pub dead_letters: Vec<DeadLetterEntry>,
302    /// Execution trace showing which steps ran in parallel.
303    #[serde(default)]
304    pub execution_trace: Vec<ExecutionTraceEntry>,
305}
306
307/// An entry in the execution trace showing what happened at each "wave" of execution.
308#[derive(Debug, Clone, Serialize, Deserialize)]
309pub struct ExecutionTraceEntry {
310    /// Steps that executed in this wave (parallel batch).
311    pub steps: Vec<String>,
312    /// When this wave started.
313    pub started_at: DateTime<Utc>,
314    /// When this wave completed.
315    pub completed_at: Option<DateTime<Utc>>,
316}
317
318// ---------------------------------------------------------------------------
319// Variable substitution
320// ---------------------------------------------------------------------------
321
322/// Replace template variables in a prompt string (sequential mode).
323///
324/// Supported variables:
325/// - `{{input}}` — the current input (original input or previous step's output)
326/// - `{{previous_output}}` — alias for `{{input}}`
327/// - `{{step_name}}` — the name of the current step
328/// - `{{step_1}}` / `{{step_N}}` — output of step N (1-indexed)
329/// - `{{some_step_name}}` — output of a step referenced by its name
330fn expand_variables(
331    template: &str,
332    current_input: &str,
333    step_name: &str,
334    step_results: &[StepResult],
335) -> String {
336    let mut result = template.to_string();
337
338    // {{input}} and {{previous_output}} both resolve to the current pipeline input
339    result = result.replace("{{input}}", current_input);
340    result = result.replace("{{previous_output}}", current_input);
341
342    // {{step_name}} resolves to the current step's name
343    result = result.replace("{{step_name}}", step_name);
344
345    // {{step_N}} resolves to the output of the Nth step (1-indexed)
346    for (i, sr) in step_results.iter().enumerate() {
347        let var = format!("{{{{step_{}}}}}", i + 1);
348        result = result.replace(&var, &sr.response);
349    }
350
351    // {{step_result_name}} resolves to the output of a step by name
352    for sr in step_results {
353        let var = format!("{{{{{}}}}}", sr.step_name);
354        result = result.replace(&var, &sr.response);
355    }
356
357    result
358}
359
360/// Replace template variables in a prompt string (DAG mode).
361///
362/// Supports all the sequential variables plus:
363/// - `{{step_name.output}}` — explicit output reference
364/// - `{{step_name.status}}` — step status
365/// - `{{step_name.duration_ms}}` — step duration
366/// - `{{loop.index}}` — current loop iteration
367/// - `{{loop.item}}` — current loop item
368/// - `{{step_name.output.field.nested}}` — JSON path
369/// - `{{step_name.output | uppercase}}` — transformations
370pub fn expand_dag_variables(
371    template: &str,
372    current_input: &str,
373    step_name: &str,
374    step_results: &HashMap<String, StepResult>,
375    loop_state: Option<&LoopState>,
376) -> String {
377    let mut result = template.to_string();
378
379    // Basic variables
380    result = result.replace("{{input}}", current_input);
381    result = result.replace("{{previous_output}}", current_input);
382    result = result.replace("{{step_name}}", step_name);
383
384    // Loop variables
385    if let Some(ls) = loop_state {
386        result = result.replace("{{loop.index}}", &ls.index.to_string());
387        if let Some(ref item) = ls.item {
388            result = result.replace("{{loop.item}}", item);
389        }
390    }
391
392    // Process {{name.property}} and {{name.output.path}} patterns
393    // We need to find all {{...}} patterns and resolve them
394    let mut output = String::with_capacity(result.len());
395    let mut remaining = result.as_str();
396
397    while let Some(start) = remaining.find("{{") {
398        output.push_str(&remaining[..start]);
399        let after_start = &remaining[start + 2..];
400        if let Some(end) = after_start.find("}}") {
401            let var_content = &after_start[..end];
402            let resolved = resolve_dag_variable(var_content, step_results);
403            output.push_str(&resolved);
404            remaining = &after_start[end + 2..];
405        } else {
406            output.push_str("{{");
407            remaining = after_start;
408        }
409    }
410    output.push_str(remaining);
411
412    output
413}
414
415/// Resolve a single variable expression like `step_name.output` or `step_name.output | uppercase`.
416fn resolve_dag_variable(var: &str, step_results: &HashMap<String, StepResult>) -> String {
417    // Check for pipe transformation: `expr | transform`
418    let (expr, transform) = if let Some(pipe_pos) = var.find(" | ") {
419        let expr = var[..pipe_pos].trim();
420        let transform = var[pipe_pos + 3..].trim();
421        (expr, Some(transform))
422    } else {
423        (var.trim(), None)
424    };
425
426    // Resolve the expression
427    let value = resolve_dag_expression(expr, step_results);
428
429    // Apply transformation if present
430    match transform {
431        Some("uppercase") => value.to_uppercase(),
432        Some("lowercase") => value.to_lowercase(),
433        Some("trim") => value.trim().to_string(),
434        Some("len") | Some("length") => value.len().to_string(),
435        Some(t) if t.starts_with("json_extract ") => {
436            let path = t
437                .strip_prefix("json_extract ")
438                .unwrap_or("")
439                .trim_matches('"');
440            json_path_extract(&value, path)
441        }
442        _ => value,
443    }
444}
445
446/// Resolve a dotted expression like `step_name.output.field.nested`.
447fn resolve_dag_expression(expr: &str, step_results: &HashMap<String, StepResult>) -> String {
448    let parts: Vec<&str> = expr.splitn(2, '.').collect();
449    if parts.len() < 2 {
450        // Plain step name reference
451        return step_results
452            .get(parts[0])
453            .map(|r| r.response.clone())
454            .unwrap_or_else(|| format!("{{{{{expr}}}}}"));
455    }
456
457    let step_name = parts[0];
458    let property = parts[1];
459
460    let step_result = match step_results.get(step_name) {
461        Some(r) => r,
462        None => return format!("{{{{{expr}}}}}"),
463    };
464
465    match property {
466        "output" => step_result.response.clone(),
467        "status" => step_result.status.to_string(),
468        "duration_ms" => step_result.duration_ms.to_string(),
469        "error" => step_result
470            .error
471            .clone()
472            .unwrap_or_else(|| "none".to_string()),
473        _ if property.starts_with("output.") => {
474            let json_path = property.strip_prefix("output.").unwrap_or("");
475            json_path_extract(&step_result.response, json_path)
476        }
477        _ => format!("{{{{{expr}}}}}"),
478    }
479}
480
481/// Extract a value from a JSON string using a dot-separated path.
482///
483/// Supports paths like `field`, `field.nested`, `$.key` (strips leading `$.`).
484fn json_path_extract(json_str: &str, path: &str) -> String {
485    let path = path.strip_prefix("$.").unwrap_or(path);
486    let parsed: serde_json::Value = match serde_json::from_str(json_str) {
487        Ok(v) => v,
488        Err(_) => return json_str.to_string(),
489    };
490
491    let mut current = &parsed;
492    for segment in path.split('.') {
493        if segment.is_empty() {
494            continue;
495        }
496        match current.get(segment) {
497            Some(v) => current = v,
498            None => return String::new(),
499        }
500    }
501
502    match current {
503        serde_json::Value::String(s) => s.clone(),
504        other => other.to_string(),
505    }
506}
507
508// ---------------------------------------------------------------------------
509// Circuit breaker state
510// ---------------------------------------------------------------------------
511
512/// Tracks circuit breaker state per-step across workflow runs.
513#[derive(Debug, Clone, Default)]
514pub struct CircuitBreakerState {
515    /// Number of consecutive failures.
516    pub consecutive_failures: usize,
517    /// When the circuit was last tripped (entered open state).
518    pub last_trip_time: Option<Instant>,
519}
520
521impl CircuitBreakerState {
522    /// Check if the circuit is currently open (blocking execution).
523    pub fn is_open(&self, max_failures: usize, cooldown_secs: u64) -> bool {
524        if self.consecutive_failures < max_failures {
525            return false;
526        }
527        // Check if cooldown has elapsed
528        match self.last_trip_time {
529            Some(trip_time) => trip_time.elapsed().as_secs() < cooldown_secs,
530            None => true,
531        }
532    }
533
534    /// Record a failure.
535    pub fn record_failure(&mut self) {
536        self.consecutive_failures += 1;
537        self.last_trip_time = Some(Instant::now());
538    }
539
540    /// Record a success, resetting the counter.
541    pub fn record_success(&mut self) {
542        self.consecutive_failures = 0;
543        self.last_trip_time = None;
544    }
545}
546
547// ---------------------------------------------------------------------------
548// DAG Executor (testable without LLM)
549// ---------------------------------------------------------------------------
550
551/// A step executor trait that allows testing the DAG engine without real LLM calls.
552#[async_trait::async_trait]
553pub trait StepExecutor: Send + Sync {
554    /// Execute a single step and return its result.
555    async fn execute(
556        &self,
557        step: &DagWorkflowStep,
558        input: &str,
559        step_results: &HashMap<String, StepResult>,
560        loop_state: Option<&LoopState>,
561    ) -> Result<StepResult, String>;
562}
563
564/// Execute a DAG workflow using the provided step executor.
565///
566/// This is the core DAG execution engine. Steps with no dependencies (roots) run
567/// first. When a step completes, any step whose dependencies are now all satisfied
568/// is scheduled. Steps with no mutual dependencies run concurrently using
569/// `tokio::task::JoinSet` for true multi-threaded parallelism.
570pub async fn execute_dag(
571    workflow_name: &str,
572    steps: &[DagWorkflowStep],
573    input: &str,
574    executor: Arc<dyn StepExecutor>,
575) -> DagExecutionResult {
576    // Validate first
577    let validation_errors = validate_workflow(steps);
578    if !validation_errors.is_empty() {
579        return DagExecutionResult {
580            status: WorkflowRunStatus::Failed,
581            step_results: HashMap::new(),
582            dead_letters: Vec::new(),
583            execution_trace: Vec::new(),
584            validation_errors,
585        };
586    }
587
588    // Get topological order
589    let topo_order = match topological_sort(steps) {
590        Ok(order) => order,
591        Err(_) => {
592            return DagExecutionResult {
593                status: WorkflowRunStatus::Failed,
594                step_results: HashMap::new(),
595                dead_letters: Vec::new(),
596                execution_trace: Vec::new(),
597                validation_errors: vec![ValidationError::CycleDetected {
598                    steps: steps.iter().map(|s| s.name.clone()).collect(),
599                }],
600            };
601        }
602    };
603
604    let step_map: HashMap<&str, &DagWorkflowStep> =
605        steps.iter().map(|s| (s.name.as_str(), s)).collect();
606
607    let mut completed: HashMap<String, StepResult> = HashMap::new();
608    let mut dead_letters: Vec<DeadLetterEntry> = Vec::new();
609    let mut execution_trace: Vec<ExecutionTraceEntry> = Vec::new();
610    let mut circuit_breakers: HashMap<String, CircuitBreakerState> = HashMap::new();
611    let mut skipped_steps: std::collections::HashSet<String> = std::collections::HashSet::new();
612    let mut failed_steps: std::collections::HashSet<String> = std::collections::HashSet::new();
613
614    // Process in waves: each wave contains steps whose dependencies are all satisfied
615    let mut remaining: Vec<String> = topo_order;
616
617    while !remaining.is_empty() {
618        // Find all steps that can run now (all deps satisfied)
619        let (ready, not_ready): (Vec<String>, Vec<String>) =
620            remaining.into_iter().partition(|name| {
621                let step = match step_map.get(name.as_str()) {
622                    Some(s) => s,
623                    None => return false,
624                };
625                step.depends_on.iter().all(|dep| {
626                    // A dependency is satisfied if it completed (not in failed_steps)
627                    // or was explicitly skipped/handled
628                    let is_done = completed.contains_key(dep) || skipped_steps.contains(dep);
629                    let is_blocking_failure = failed_steps.contains(dep);
630                    is_done && !is_blocking_failure
631                })
632            });
633
634        if ready.is_empty() {
635            // No progress possible — remaining steps have unmet deps (likely due to failures)
636            for name in &not_ready {
637                skipped_steps.insert(name.clone());
638                completed.insert(
639                    name.clone(),
640                    StepResult {
641                        step_name: name.clone(),
642                        response: String::new(),
643                        tokens_used: 0,
644                        duration_ms: 0,
645                        error: Some("cancelled: unmet dependencies".to_string()),
646                        status: StepStatus::Cancelled,
647                        started_at: None,
648                        completed_at: None,
649                    },
650                );
651            }
652            break;
653        }
654
655        remaining = not_ready;
656
657        let wave_start = Utc::now();
658        let wave_step_names: Vec<String> = ready.to_vec();
659
660        // Execute all ready steps concurrently using tokio::task::JoinSet
661        // for true multi-threaded parallelism.
662        let mut wave_results: Vec<(String, Result<StepResult, String>, Option<String>)> =
663            Vec::new();
664        let mut join_set: tokio::task::JoinSet<(
665            String,
666            Result<StepResult, String>,
667            Option<String>,
668        )> = tokio::task::JoinSet::new();
669
670        for step_name in &wave_step_names {
671            let step = match step_map.get(step_name.as_str()) {
672                Some(s) => (*s).clone(),
673                None => continue,
674            };
675
676            // Check condition
677            let should_run = match &step.condition {
678                Some(cond) => evaluate_condition(cond, &completed),
679                None => true,
680            };
681
682            if !should_run {
683                let else_step_name = step.else_step.clone();
684                wave_results.push((
685                    step_name.clone(),
686                    Ok(StepResult {
687                        step_name: step_name.clone(),
688                        response: String::new(),
689                        tokens_used: 0,
690                        duration_ms: 0,
691                        error: None,
692                        status: StepStatus::Skipped,
693                        started_at: Some(Utc::now()),
694                        completed_at: Some(Utc::now()),
695                    }),
696                    else_step_name,
697                ));
698                continue;
699            }
700
701            // Check circuit breaker
702            let cb_state = circuit_breakers
703                .entry(step_name.clone())
704                .or_default()
705                .clone();
706            if let OnError::CircuitBreaker {
707                max_failures,
708                cooldown_secs,
709            } = &step.on_error
710                && cb_state.is_open(*max_failures, *cooldown_secs)
711            {
712                wave_results.push((
713                    step_name.clone(),
714                    Ok(StepResult {
715                        step_name: step_name.clone(),
716                        response: String::new(),
717                        tokens_used: 0,
718                        duration_ms: 0,
719                        error: Some("circuit breaker open".to_string()),
720                        status: StepStatus::Failed,
721                        started_at: Some(Utc::now()),
722                        completed_at: Some(Utc::now()),
723                    }),
724                    None,
725                ));
726                continue;
727            }
728
729            let sn = step_name.clone();
730            let completed_snapshot = completed.clone();
731            let input_clone = input.to_string();
732            let executor_clone = Arc::clone(&executor);
733
734            join_set.spawn(async move {
735                let result = execute_step_with_loops(
736                    &step,
737                    &input_clone,
738                    &completed_snapshot,
739                    executor_clone.as_ref(),
740                )
741                .await;
742                (sn, result, None::<String>)
743            });
744        }
745
746        // Wait for all spawned tasks to complete
747        while let Some(join_result) = join_set.join_next().await {
748            match join_result {
749                Ok(task_result) => wave_results.push(task_result),
750                Err(join_err) => {
751                    // A JoinError means the task panicked or was cancelled
752                    error!(error = %join_err, "spawned step task failed unexpectedly");
753                }
754            }
755        }
756
757        // Process results
758        for (step_name, result, _else_step) in wave_results {
759            match result {
760                Ok(mut step_result) => {
761                    if step_result.status == StepStatus::Skipped {
762                        skipped_steps.insert(step_name.clone());
763                        debug!(step = %step_name, workflow = %workflow_name, "step skipped (condition false)");
764                    } else if step_result.error.is_some() {
765                        failed_steps.insert(step_name.clone());
766                        // Update circuit breaker
767                        circuit_breakers
768                            .entry(step_name.clone())
769                            .or_default()
770                            .record_failure();
771
772                        let step = step_map.get(step_name.as_str());
773                        if let Some(step) = step {
774                            match &step.on_error {
775                                OnError::Fallback { step: fb_step } => {
776                                    // Try to execute fallback
777                                    if let Some(fb) = step_map.get(fb_step.as_str()) {
778                                        let fb_result =
779                                            executor.execute(fb, input, &completed, None).await;
780                                        match fb_result {
781                                            Ok(fb_res) => {
782                                                step_result = fb_res;
783                                                step_result.step_name = step_name.clone();
784                                                failed_steps.remove(&step_name);
785                                            }
786                                            Err(fb_err) => {
787                                                dead_letters.push(DeadLetterEntry {
788                                                    step_name: step_name.clone(),
789                                                    error: fb_err,
790                                                    input: input.to_string(),
791                                                    failed_at: Utc::now(),
792                                                });
793                                            }
794                                        }
795                                    }
796                                }
797                                OnError::CatchAndContinue { error_handler } => {
798                                    // Run the error handler
799                                    if let Some(handler) = step_map.get(error_handler.as_str()) {
800                                        let _ = executor
801                                            .execute(handler, input, &completed, None)
802                                            .await;
803                                    }
804                                    // Continue anyway — mark as handled
805                                    failed_steps.remove(&step_name);
806                                }
807                                OnError::SkipStep => {
808                                    skipped_steps.insert(step_name.clone());
809                                    failed_steps.remove(&step_name);
810                                }
811                                OnError::FailWorkflow => {
812                                    dead_letters.push(DeadLetterEntry {
813                                        step_name: step_name.clone(),
814                                        error: step_result.error.clone().unwrap_or_default(),
815                                        input: input.to_string(),
816                                        failed_at: Utc::now(),
817                                    });
818                                }
819                                _ => {}
820                            }
821                        }
822                    } else {
823                        // Success
824                        circuit_breakers
825                            .entry(step_name.clone())
826                            .or_default()
827                            .record_success();
828                        info!(step = %step_name, workflow = %workflow_name, "DAG step completed");
829                    }
830                    completed.insert(step_name, step_result);
831                }
832                Err(e) => {
833                    failed_steps.insert(step_name.clone());
834                    circuit_breakers
835                        .entry(step_name.clone())
836                        .or_default()
837                        .record_failure();
838
839                    let mut step_result = StepResult {
840                        step_name: step_name.clone(),
841                        response: String::new(),
842                        tokens_used: 0,
843                        duration_ms: 0,
844                        error: Some(e.clone()),
845                        status: StepStatus::Failed,
846                        started_at: Some(Utc::now()),
847                        completed_at: Some(Utc::now()),
848                    };
849
850                    // Try error recovery strategies
851                    let step = step_map.get(step_name.as_str());
852                    if let Some(step) = step {
853                        match &step.on_error {
854                            OnError::Fallback { step: fb_step } => {
855                                if let Some(fb) = step_map.get(fb_step.as_str())
856                                    && let Ok(fb_res) =
857                                        executor.execute(fb, input, &completed, None).await
858                                {
859                                    step_result = fb_res;
860                                    step_result.step_name = step_name.clone();
861                                    step_result.error = None;
862                                    step_result.status = StepStatus::Completed;
863                                    failed_steps.remove(&step_name);
864                                }
865                            }
866                            OnError::CatchAndContinue { error_handler } => {
867                                if let Some(handler) = step_map.get(error_handler.as_str()) {
868                                    let _ =
869                                        executor.execute(handler, input, &completed, None).await;
870                                }
871                                failed_steps.remove(&step_name);
872                            }
873                            OnError::SkipStep => {
874                                step_result.status = StepStatus::Skipped;
875                                skipped_steps.insert(step_name.clone());
876                                failed_steps.remove(&step_name);
877                            }
878                            OnError::FailWorkflow => {
879                                dead_letters.push(DeadLetterEntry {
880                                    step_name: step_name.clone(),
881                                    error: e,
882                                    input: input.to_string(),
883                                    failed_at: Utc::now(),
884                                });
885                            }
886                            _ => {
887                                dead_letters.push(DeadLetterEntry {
888                                    step_name: step_name.clone(),
889                                    error: e,
890                                    input: input.to_string(),
891                                    failed_at: Utc::now(),
892                                });
893                            }
894                        }
895                    } else {
896                        dead_letters.push(DeadLetterEntry {
897                            step_name: step_name.clone(),
898                            error: e,
899                            input: input.to_string(),
900                            failed_at: Utc::now(),
901                        });
902                    }
903
904                    completed.insert(step_name, step_result);
905                }
906            }
907        }
908
909        execution_trace.push(ExecutionTraceEntry {
910            steps: wave_step_names,
911            started_at: wave_start,
912            completed_at: Some(Utc::now()),
913        });
914    }
915
916    // Determine final status
917    let has_failures = completed.values().any(|r| r.status == StepStatus::Failed);
918    let has_successes = completed
919        .values()
920        .any(|r| r.status == StepStatus::Completed);
921
922    let status = if has_failures && has_successes {
923        WorkflowRunStatus::PartiallyCompleted
924    } else if has_failures {
925        WorkflowRunStatus::Failed
926    } else {
927        WorkflowRunStatus::Completed
928    };
929
930    DagExecutionResult {
931        status,
932        step_results: completed,
933        dead_letters,
934        execution_trace,
935        validation_errors: Vec::new(),
936    }
937}
938
939/// Execute a step, handling loop configurations.
940async fn execute_step_with_loops(
941    step: &DagWorkflowStep,
942    input: &str,
943    completed: &HashMap<String, StepResult>,
944    executor: &dyn StepExecutor,
945) -> Result<StepResult, String> {
946    match &step.loop_config {
947        None => executor.execute(step, input, completed, None).await,
948        Some(LoopConfig::ForEach {
949            source_step,
950            max_iterations,
951        }) => {
952            let source_output = completed
953                .get(source_step)
954                .map(|r| r.response.as_str())
955                .unwrap_or("[]");
956            let items = parse_foreach_items(source_output)?;
957            let max = (*max_iterations).min(items.len());
958
959            let mut loop_state = LoopState::new();
960            let start = Utc::now();
961            let instant = Instant::now();
962
963            for (i, item) in items.into_iter().take(max).enumerate() {
964                loop_state.index = i;
965                loop_state.item = Some(item);
966
967                let result = executor
968                    .execute(step, input, completed, Some(&loop_state))
969                    .await;
970
971                match result {
972                    Ok(r) => {
973                        // Check for break/continue signals in output
974                        if r.response.contains("__BREAK__") {
975                            loop_state.push_result(r.response.replace("__BREAK__", ""));
976                            break;
977                        }
978                        if r.response.contains("__CONTINUE__") {
979                            continue;
980                        }
981                        loop_state.push_result(r.response);
982                    }
983                    Err(e) => return Err(e),
984                }
985            }
986
987            let combined = loop_state.accumulated_results.join("\n");
988            Ok(StepResult {
989                step_name: step.name.clone(),
990                response: combined,
991                tokens_used: 0,
992                duration_ms: instant.elapsed().as_millis() as u64,
993                error: None,
994                status: StepStatus::Completed,
995                started_at: Some(start),
996                completed_at: Some(Utc::now()),
997            })
998        }
999        Some(LoopConfig::While {
1000            condition,
1001            max_iterations,
1002        }) => {
1003            let mut loop_state = LoopState::new();
1004            let start = Utc::now();
1005            let instant = Instant::now();
1006
1007            for i in 0..*max_iterations {
1008                // Evaluate the condition with current completed results
1009                // For while loops, we add the accumulated results as a synthetic step
1010                let mut extended = completed.clone();
1011                if !loop_state.accumulated_results.is_empty() {
1012                    extended.insert(
1013                        step.name.clone(),
1014                        StepResult {
1015                            step_name: step.name.clone(),
1016                            response: loop_state
1017                                .accumulated_results
1018                                .last()
1019                                .cloned()
1020                                .unwrap_or_default(),
1021                            tokens_used: 0,
1022                            duration_ms: 0,
1023                            error: None,
1024                            status: StepStatus::Completed,
1025                            started_at: None,
1026                            completed_at: None,
1027                        },
1028                    );
1029                }
1030
1031                if !evaluate_condition(condition, &extended) {
1032                    break;
1033                }
1034
1035                loop_state.index = i;
1036                let result = executor
1037                    .execute(step, input, &extended, Some(&loop_state))
1038                    .await;
1039
1040                match result {
1041                    Ok(r) => {
1042                        if r.response.contains("__BREAK__") {
1043                            loop_state.push_result(r.response.replace("__BREAK__", ""));
1044                            break;
1045                        }
1046                        loop_state.push_result(r.response);
1047                    }
1048                    Err(e) => return Err(e),
1049                }
1050            }
1051
1052            let combined = loop_state.accumulated_results.join("\n");
1053            Ok(StepResult {
1054                step_name: step.name.clone(),
1055                response: combined,
1056                tokens_used: 0,
1057                duration_ms: instant.elapsed().as_millis() as u64,
1058                error: None,
1059                status: StepStatus::Completed,
1060                started_at: Some(start),
1061                completed_at: Some(Utc::now()),
1062            })
1063        }
1064        Some(LoopConfig::Retry {
1065            max_retries,
1066            backoff_ms,
1067            backoff_multiplier,
1068        }) => {
1069            let start = Utc::now();
1070            let instant = Instant::now();
1071            let mut last_error = String::new();
1072
1073            for attempt in 0..=*max_retries {
1074                if attempt > 0 {
1075                    let wait = calculate_backoff(attempt - 1, *backoff_ms, *backoff_multiplier);
1076                    tokio::time::sleep(std::time::Duration::from_millis(wait)).await;
1077                }
1078
1079                match executor.execute(step, input, completed, None).await {
1080                    Ok(r) => return Ok(r),
1081                    Err(e) => {
1082                        last_error = e;
1083                        warn!(step = %step.name, attempt = attempt + 1, "retry attempt failed");
1084                    }
1085                }
1086            }
1087
1088            Ok(StepResult {
1089                step_name: step.name.clone(),
1090                response: String::new(),
1091                tokens_used: 0,
1092                duration_ms: instant.elapsed().as_millis() as u64,
1093                error: Some(last_error),
1094                status: StepStatus::Failed,
1095                started_at: Some(start),
1096                completed_at: Some(Utc::now()),
1097            })
1098        }
1099    }
1100}
1101
1102/// Result of executing a DAG workflow.
1103#[derive(Debug, Clone)]
1104pub struct DagExecutionResult {
1105    /// Overall workflow status.
1106    pub status: WorkflowRunStatus,
1107    /// Per-step results keyed by step name.
1108    pub step_results: HashMap<String, StepResult>,
1109    /// Dead letter entries for failed steps.
1110    pub dead_letters: Vec<DeadLetterEntry>,
1111    /// Execution trace.
1112    pub execution_trace: Vec<ExecutionTraceEntry>,
1113    /// Validation errors (if any — non-empty means workflow didn't execute).
1114    pub validation_errors: Vec<ValidationError>,
1115}
1116
1117// ---------------------------------------------------------------------------
1118// WorkflowEngine
1119// ---------------------------------------------------------------------------
1120
1121/// Engine for registering and executing multi-step agent workflows.
1122pub struct WorkflowEngine {
1123    /// Registered workflow definitions (sequential).
1124    workflows: DashMap<WorkflowId, Workflow>,
1125    /// Registered DAG workflow definitions.
1126    dag_workflows: DashMap<WorkflowId, DagWorkflow>,
1127    /// Workflow execution runs.
1128    runs: DashMap<WorkflowRunId, WorkflowRun>,
1129}
1130
1131impl WorkflowEngine {
1132    /// Create a new workflow engine.
1133    pub fn new() -> Self {
1134        Self {
1135            workflows: DashMap::new(),
1136            dag_workflows: DashMap::new(),
1137            runs: DashMap::new(),
1138        }
1139    }
1140
1141    /// Register a sequential workflow definition and return its ID.
1142    pub fn register_workflow(&self, workflow: Workflow) -> WorkflowId {
1143        let id = workflow.id;
1144        info!(workflow_id = %id, name = %workflow.name, "workflow registered");
1145        self.workflows.insert(id, workflow);
1146        id
1147    }
1148
1149    /// Register a DAG workflow definition and return its ID.
1150    ///
1151    /// Validates the workflow before registering. Returns an error with
1152    /// validation details if the workflow is invalid.
1153    pub fn register_dag_workflow(
1154        &self,
1155        workflow: DagWorkflow,
1156    ) -> Result<WorkflowId, Vec<ValidationError>> {
1157        let errors = validate_workflow(&workflow.steps);
1158        if !errors.is_empty() {
1159            return Err(errors);
1160        }
1161        let id = workflow.id;
1162        info!(workflow_id = %id, name = %workflow.name, "DAG workflow registered");
1163        self.dag_workflows.insert(id, workflow);
1164        Ok(id)
1165    }
1166
1167    /// Execute a sequential workflow with the given input string.
1168    #[instrument(skip(self, input, memory, driver, model_config), fields(%workflow_id))]
1169    pub async fn execute_workflow(
1170        &self,
1171        workflow_id: &WorkflowId,
1172        input: String,
1173        memory: Arc<MemorySubstrate>,
1174        driver: Arc<dyn LlmDriver>,
1175        model_config: &ModelConfig,
1176    ) -> PunchResult<WorkflowRunId> {
1177        let workflow = self
1178            .workflows
1179            .get(workflow_id)
1180            .ok_or_else(|| PunchError::Internal(format!("workflow {} not found", workflow_id)))?
1181            .clone();
1182
1183        let run_id = WorkflowRunId::new();
1184        let run = WorkflowRun {
1185            id: run_id,
1186            workflow_id: *workflow_id,
1187            status: WorkflowRunStatus::Running,
1188            step_results: Vec::new(),
1189            started_at: Utc::now(),
1190            completed_at: None,
1191            dead_letters: Vec::new(),
1192            execution_trace: Vec::new(),
1193        };
1194        self.runs.insert(run_id, run);
1195
1196        let mut current_input = input.clone();
1197        let mut step_results: Vec<StepResult> = Vec::new();
1198        let mut failed = false;
1199
1200        for step in &workflow.steps {
1201            let result = self
1202                .execute_single_step(
1203                    step,
1204                    &workflow.name,
1205                    &current_input,
1206                    &step_results,
1207                    &memory,
1208                    &driver,
1209                    model_config,
1210                )
1211                .await;
1212
1213            match result {
1214                Ok(step_result) => {
1215                    current_input = step_result.response.clone();
1216                    step_results.push(step_result);
1217                }
1218                Err(e) => {
1219                    let error_msg = format!("{e}");
1220                    match step.on_error {
1221                        OnError::SkipStep => {
1222                            warn!(step = %step.name, error = %error_msg, "step failed, skipping");
1223                            let skip_result = StepResult {
1224                                step_name: step.name.clone(),
1225                                response: String::new(),
1226                                tokens_used: 0,
1227                                duration_ms: 0,
1228                                error: Some(error_msg),
1229                                status: StepStatus::Skipped,
1230                                started_at: None,
1231                                completed_at: None,
1232                            };
1233                            step_results.push(skip_result);
1234                            continue;
1235                        }
1236                        OnError::RetryOnce => {
1237                            warn!(step = %step.name, error = %error_msg, "step failed, retrying once");
1238                            let retry_result = self
1239                                .execute_single_step(
1240                                    step,
1241                                    &workflow.name,
1242                                    &current_input,
1243                                    &step_results,
1244                                    &memory,
1245                                    &driver,
1246                                    model_config,
1247                                )
1248                                .await;
1249
1250                            match retry_result {
1251                                Ok(step_result) => {
1252                                    current_input = step_result.response.clone();
1253                                    step_results.push(step_result);
1254                                }
1255                                Err(retry_err) => {
1256                                    error!(step = %step.name, error = %retry_err, "step failed on retry");
1257                                    let fail_result = StepResult {
1258                                        step_name: step.name.clone(),
1259                                        response: String::new(),
1260                                        tokens_used: 0,
1261                                        duration_ms: 0,
1262                                        error: Some(format!("{retry_err}")),
1263                                        status: StepStatus::Failed,
1264                                        started_at: None,
1265                                        completed_at: None,
1266                                    };
1267                                    step_results.push(fail_result);
1268                                    failed = true;
1269                                    break;
1270                                }
1271                            }
1272                        }
1273                        OnError::FailWorkflow => {
1274                            error!(step = %step.name, error = %error_msg, "step failed, aborting workflow");
1275                            let fail_result = StepResult {
1276                                step_name: step.name.clone(),
1277                                response: String::new(),
1278                                tokens_used: 0,
1279                                duration_ms: 0,
1280                                error: Some(error_msg),
1281                                status: StepStatus::Failed,
1282                                started_at: None,
1283                                completed_at: None,
1284                            };
1285                            step_results.push(fail_result);
1286                            failed = true;
1287                            break;
1288                        }
1289                        _ => {
1290                            // Fallback/CatchAndContinue/CircuitBreaker in sequential mode
1291                            // just fail the workflow for now
1292                            let fail_result = StepResult {
1293                                step_name: step.name.clone(),
1294                                response: String::new(),
1295                                tokens_used: 0,
1296                                duration_ms: 0,
1297                                error: Some(error_msg),
1298                                status: StepStatus::Failed,
1299                                started_at: None,
1300                                completed_at: None,
1301                            };
1302                            step_results.push(fail_result);
1303                            failed = true;
1304                            break;
1305                        }
1306                    }
1307                }
1308            }
1309        }
1310
1311        // Update the run with results.
1312        if let Some(mut run) = self.runs.get_mut(&run_id) {
1313            run.step_results = step_results;
1314            run.status = if failed {
1315                WorkflowRunStatus::Failed
1316            } else {
1317                WorkflowRunStatus::Completed
1318            };
1319            run.completed_at = Some(Utc::now());
1320        }
1321
1322        Ok(run_id)
1323    }
1324
1325    /// Execute a single workflow step, creating a temporary fighter and running
1326    /// it through the fighter loop.
1327    #[allow(clippy::too_many_arguments)]
1328    async fn execute_single_step(
1329        &self,
1330        step: &WorkflowStep,
1331        workflow_name: &str,
1332        current_input: &str,
1333        step_results: &[StepResult],
1334        memory: &Arc<MemorySubstrate>,
1335        driver: &Arc<dyn LlmDriver>,
1336        model_config: &ModelConfig,
1337    ) -> PunchResult<StepResult> {
1338        let step_start = Instant::now();
1339        let started_at = Utc::now();
1340
1341        // Substitute variables in the prompt template.
1342        let prompt = expand_variables(
1343            &step.prompt_template,
1344            current_input,
1345            &step.name,
1346            step_results,
1347        );
1348
1349        // Create a temporary fighter for this step.
1350        let fighter_id = FighterId::new();
1351        let fighter_manifest = FighterManifest {
1352            name: step.fighter_name.clone(),
1353            description: format!("Workflow step: {}", step.name),
1354            model: model_config.clone(),
1355            system_prompt: format!(
1356                "You are executing step '{}' of workflow '{}'.",
1357                step.name, workflow_name
1358            ),
1359            capabilities: Vec::new(),
1360            weight_class: WeightClass::Middleweight,
1361            tenant_id: None,
1362        };
1363
1364        // Save the fighter and create a bout.
1365        if let Err(e) = memory
1366            .save_fighter(
1367                &fighter_id,
1368                &fighter_manifest,
1369                punch_types::FighterStatus::Idle,
1370            )
1371            .await
1372        {
1373            error!(error = %e, "failed to persist workflow fighter");
1374        }
1375
1376        let bout_id = memory.create_bout(&fighter_id).await.map_err(|e| {
1377            PunchError::Internal(format!(
1378                "failed to create bout for step '{}': {e}",
1379                step.name
1380            ))
1381        })?;
1382
1383        let available_tools = tools_for_capabilities(&fighter_manifest.capabilities);
1384        let timeout_secs = step.timeout_secs.unwrap_or(120);
1385
1386        let params = FighterLoopParams {
1387            manifest: fighter_manifest,
1388            user_message: prompt,
1389            bout_id,
1390            fighter_id,
1391            memory: Arc::clone(memory),
1392            driver: Arc::clone(driver),
1393            available_tools,
1394            max_iterations: Some(20),
1395            context_window: None,
1396            tool_timeout_secs: Some(timeout_secs),
1397            coordinator: None,
1398            approval_engine: None,
1399            sandbox: None,
1400        };
1401
1402        let loop_result = tokio::time::timeout(
1403            std::time::Duration::from_secs(timeout_secs),
1404            run_fighter_loop(params),
1405        )
1406        .await;
1407
1408        match loop_result {
1409            Ok(Ok(result)) => {
1410                let step_result = StepResult {
1411                    step_name: step.name.clone(),
1412                    response: result.response,
1413                    tokens_used: result.usage.total(),
1414                    duration_ms: step_start.elapsed().as_millis() as u64,
1415                    error: None,
1416                    status: StepStatus::Completed,
1417                    started_at: Some(started_at),
1418                    completed_at: Some(Utc::now()),
1419                };
1420                info!(step = %step.name, tokens = step_result.tokens_used, "workflow step completed");
1421                Ok(step_result)
1422            }
1423            Ok(Err(e)) => Err(e),
1424            Err(_) => Err(PunchError::Internal(format!(
1425                "step '{}' timed out after {}s",
1426                step.name, timeout_secs
1427            ))),
1428        }
1429    }
1430
1431    /// Get a workflow run by its ID.
1432    pub fn get_run(&self, run_id: &WorkflowRunId) -> Option<WorkflowRun> {
1433        self.runs.get(run_id).map(|r| r.clone())
1434    }
1435
1436    /// List all registered sequential workflows.
1437    pub fn list_workflows(&self) -> Vec<Workflow> {
1438        self.workflows.iter().map(|w| w.value().clone()).collect()
1439    }
1440
1441    /// List all registered DAG workflows.
1442    pub fn list_dag_workflows(&self) -> Vec<DagWorkflow> {
1443        self.dag_workflows
1444            .iter()
1445            .map(|w| w.value().clone())
1446            .collect()
1447    }
1448
1449    /// List all workflow runs.
1450    pub fn list_runs(&self) -> Vec<WorkflowRun> {
1451        self.runs.iter().map(|r| r.value().clone()).collect()
1452    }
1453
1454    /// List workflow runs filtered by workflow ID.
1455    pub fn list_runs_for_workflow(&self, workflow_id: &WorkflowId) -> Vec<WorkflowRun> {
1456        self.runs
1457            .iter()
1458            .filter(|r| r.value().workflow_id == *workflow_id)
1459            .map(|r| r.value().clone())
1460            .collect()
1461    }
1462
1463    /// Get a sequential workflow by its ID.
1464    pub fn get_workflow(&self, id: &WorkflowId) -> Option<Workflow> {
1465        self.workflows.get(id).map(|w| w.clone())
1466    }
1467
1468    /// Get a DAG workflow by its ID.
1469    pub fn get_dag_workflow(&self, id: &WorkflowId) -> Option<DagWorkflow> {
1470        self.dag_workflows.get(id).map(|w| w.clone())
1471    }
1472}
1473
1474impl Default for WorkflowEngine {
1475    fn default() -> Self {
1476        Self::new()
1477    }
1478}
1479
1480// ---------------------------------------------------------------------------
1481// Tests
1482// ---------------------------------------------------------------------------
1483
1484#[cfg(test)]
1485mod tests {
1486    use super::*;
1487    use std::sync::atomic::{AtomicUsize, Ordering};
1488    use std::time::Duration;
1489
1490    // A mock step executor for testing
1491    struct MockExecutor {
1492        /// Map of step name -> response
1493        responses: HashMap<String, String>,
1494        /// Steps that should fail
1495        failing_steps: HashMap<String, String>,
1496        /// Track execution count per step
1497        execution_counts: DashMap<String, AtomicUsize>,
1498    }
1499
1500    impl MockExecutor {
1501        fn new() -> Self {
1502            Self {
1503                responses: HashMap::new(),
1504                failing_steps: HashMap::new(),
1505                execution_counts: DashMap::new(),
1506            }
1507        }
1508
1509        fn with_response(mut self, step: &str, response: &str) -> Self {
1510            self.responses
1511                .insert(step.to_string(), response.to_string());
1512            self
1513        }
1514
1515        fn with_failure(mut self, step: &str, error: &str) -> Self {
1516            self.failing_steps
1517                .insert(step.to_string(), error.to_string());
1518            self
1519        }
1520
1521        #[allow(dead_code)]
1522        fn execution_count(&self, step: &str) -> usize {
1523            self.execution_counts
1524                .get(step)
1525                .map(|c| c.load(Ordering::Relaxed))
1526                .unwrap_or(0)
1527        }
1528    }
1529
1530    #[async_trait::async_trait]
1531    impl StepExecutor for MockExecutor {
1532        async fn execute(
1533            &self,
1534            step: &DagWorkflowStep,
1535            input: &str,
1536            step_results: &HashMap<String, StepResult>,
1537            loop_state: Option<&LoopState>,
1538        ) -> Result<StepResult, String> {
1539            // Track execution
1540            self.execution_counts
1541                .entry(step.name.clone())
1542                .or_insert_with(|| AtomicUsize::new(0))
1543                .fetch_add(1, Ordering::Relaxed);
1544
1545            // Check if step should fail
1546            if let Some(err) = self.failing_steps.get(&step.name) {
1547                return Err(err.clone());
1548            }
1549
1550            let prompt = expand_dag_variables(
1551                &step.prompt_template,
1552                input,
1553                &step.name,
1554                step_results,
1555                loop_state,
1556            );
1557
1558            let response = self.responses.get(&step.name).cloned().unwrap_or(prompt);
1559
1560            Ok(StepResult {
1561                step_name: step.name.clone(),
1562                response,
1563                tokens_used: 10,
1564                duration_ms: 5,
1565                error: None,
1566                status: StepStatus::Completed,
1567                started_at: Some(Utc::now()),
1568                completed_at: Some(Utc::now()),
1569            })
1570        }
1571    }
1572
1573    /// A mock executor that adds a delay to simulate real execution time.
1574    struct TimedMockExecutor {
1575        delay_ms: u64,
1576    }
1577
1578    #[async_trait::async_trait]
1579    impl StepExecutor for TimedMockExecutor {
1580        async fn execute(
1581            &self,
1582            step: &DagWorkflowStep,
1583            _input: &str,
1584            _step_results: &HashMap<String, StepResult>,
1585            _loop_state: Option<&LoopState>,
1586        ) -> Result<StepResult, String> {
1587            tokio::time::sleep(Duration::from_millis(self.delay_ms)).await;
1588            Ok(StepResult {
1589                step_name: step.name.clone(),
1590                response: format!("done-{}", step.name),
1591                tokens_used: 10,
1592                duration_ms: self.delay_ms,
1593                error: None,
1594                status: StepStatus::Completed,
1595                started_at: Some(Utc::now()),
1596                completed_at: Some(Utc::now()),
1597            })
1598        }
1599    }
1600
1601    /// A mock executor that fails the first N attempts for a step.
1602    struct FailNTimesMockExecutor {
1603        fail_count: usize,
1604        attempts: DashMap<String, AtomicUsize>,
1605    }
1606
1607    impl FailNTimesMockExecutor {
1608        fn new(fail_count: usize) -> Self {
1609            Self {
1610                fail_count,
1611                attempts: DashMap::new(),
1612            }
1613        }
1614    }
1615
1616    #[async_trait::async_trait]
1617    impl StepExecutor for FailNTimesMockExecutor {
1618        async fn execute(
1619            &self,
1620            step: &DagWorkflowStep,
1621            _input: &str,
1622            _step_results: &HashMap<String, StepResult>,
1623            _loop_state: Option<&LoopState>,
1624        ) -> Result<StepResult, String> {
1625            let attempt = self
1626                .attempts
1627                .entry(step.name.clone())
1628                .or_insert_with(|| AtomicUsize::new(0))
1629                .fetch_add(1, Ordering::Relaxed);
1630
1631            if attempt < self.fail_count {
1632                return Err(format!("failure attempt {}", attempt + 1));
1633            }
1634
1635            Ok(StepResult {
1636                step_name: step.name.clone(),
1637                response: format!("success on attempt {}", attempt + 1),
1638                tokens_used: 10,
1639                duration_ms: 5,
1640                error: None,
1641                status: StepStatus::Completed,
1642                started_at: Some(Utc::now()),
1643                completed_at: Some(Utc::now()),
1644            })
1645        }
1646    }
1647
1648    fn dag_step(name: &str, deps: &[&str]) -> DagWorkflowStep {
1649        DagWorkflowStep {
1650            name: name.to_string(),
1651            fighter_name: "test".to_string(),
1652            prompt_template: "{{input}}".to_string(),
1653            timeout_secs: None,
1654            on_error: OnError::FailWorkflow,
1655            depends_on: deps.iter().map(|d| d.to_string()).collect(),
1656            condition: None,
1657            else_step: None,
1658            loop_config: None,
1659        }
1660    }
1661
1662    // ---- Existing sequential tests (preserved) ----
1663
1664    #[test]
1665    fn register_and_list_workflows() {
1666        let engine = WorkflowEngine::new();
1667
1668        let workflow = Workflow {
1669            id: WorkflowId::new(),
1670            name: "test-workflow".to_string(),
1671            steps: vec![
1672                WorkflowStep {
1673                    name: "step1".to_string(),
1674                    fighter_name: "analyzer".to_string(),
1675                    prompt_template: "Analyze: {{input}}".to_string(),
1676                    timeout_secs: None,
1677                    on_error: OnError::FailWorkflow,
1678                },
1679                WorkflowStep {
1680                    name: "step2".to_string(),
1681                    fighter_name: "summarizer".to_string(),
1682                    prompt_template: "Summarize the analysis: {{step1}}".to_string(),
1683                    timeout_secs: Some(60),
1684                    on_error: OnError::SkipStep,
1685                },
1686            ],
1687        };
1688
1689        let id = engine.register_workflow(workflow);
1690        let workflows = engine.list_workflows();
1691        assert_eq!(workflows.len(), 1);
1692        assert_eq!(workflows[0].name, "test-workflow");
1693        assert_eq!(workflows[0].steps.len(), 2);
1694
1695        let fetched = engine.get_workflow(&id).expect("workflow should exist");
1696        assert_eq!(fetched.name, "test-workflow");
1697    }
1698
1699    #[test]
1700    fn variable_substitution_basic() {
1701        let result = expand_variables(
1702            "Analyze {{input}} for step {{step_name}}",
1703            "hello world",
1704            "analysis",
1705            &[],
1706        );
1707        assert_eq!(result, "Analyze hello world for step analysis");
1708    }
1709
1710    #[test]
1711    fn variable_substitution_previous_output() {
1712        let result = expand_variables(
1713            "Continue from: {{previous_output}}",
1714            "step 1 output",
1715            "step2",
1716            &[],
1717        );
1718        assert_eq!(result, "Continue from: step 1 output");
1719    }
1720
1721    #[test]
1722    fn variable_substitution_step_refs() {
1723        let step_results = vec![
1724            StepResult {
1725                step_name: "analyze".to_string(),
1726                response: "analysis result".to_string(),
1727                tokens_used: 100,
1728                duration_ms: 500,
1729                error: None,
1730                status: StepStatus::Completed,
1731                started_at: None,
1732                completed_at: None,
1733            },
1734            StepResult {
1735                step_name: "review".to_string(),
1736                response: "review result".to_string(),
1737                tokens_used: 80,
1738                duration_ms: 400,
1739                error: None,
1740                status: StepStatus::Completed,
1741                started_at: None,
1742                completed_at: None,
1743            },
1744        ];
1745
1746        let result = expand_variables(
1747            "Step 1 said: {{step_1}}, Step 2 said: {{step_2}}",
1748            "current",
1749            "step3",
1750            &step_results,
1751        );
1752        assert_eq!(
1753            result,
1754            "Step 1 said: analysis result, Step 2 said: review result"
1755        );
1756
1757        let result = expand_variables(
1758            "Analysis: {{analyze}}, Review: {{review}}",
1759            "current",
1760            "step3",
1761            &step_results,
1762        );
1763        assert_eq!(result, "Analysis: analysis result, Review: review result");
1764    }
1765
1766    #[test]
1767    fn workflow_run_status_display() {
1768        assert_eq!(WorkflowRunStatus::Pending.to_string(), "pending");
1769        assert_eq!(WorkflowRunStatus::Running.to_string(), "running");
1770        assert_eq!(WorkflowRunStatus::Completed.to_string(), "completed");
1771        assert_eq!(WorkflowRunStatus::Failed.to_string(), "failed");
1772        assert_eq!(
1773            WorkflowRunStatus::PartiallyCompleted.to_string(),
1774            "partially_completed"
1775        );
1776    }
1777
1778    #[test]
1779    fn get_nonexistent_run_returns_none() {
1780        let engine = WorkflowEngine::new();
1781        let run_id = WorkflowRunId::new();
1782        assert!(engine.get_run(&run_id).is_none());
1783    }
1784
1785    #[test]
1786    fn get_nonexistent_workflow_returns_none() {
1787        let engine = WorkflowEngine::new();
1788        let id = WorkflowId::new();
1789        assert!(engine.get_workflow(&id).is_none());
1790    }
1791
1792    #[test]
1793    fn workflow_engine_default() {
1794        let engine = WorkflowEngine::default();
1795        assert!(engine.list_workflows().is_empty());
1796        assert!(engine.list_runs().is_empty());
1797    }
1798
1799    #[test]
1800    fn register_multiple_workflows() {
1801        let engine = WorkflowEngine::new();
1802
1803        for i in 0..5 {
1804            let workflow = Workflow {
1805                id: WorkflowId::new(),
1806                name: format!("workflow-{}", i),
1807                steps: vec![],
1808            };
1809            engine.register_workflow(workflow);
1810        }
1811
1812        assert_eq!(engine.list_workflows().len(), 5);
1813    }
1814
1815    #[test]
1816    fn register_workflow_returns_correct_id() {
1817        let engine = WorkflowEngine::new();
1818        let wf_id = WorkflowId::new();
1819        let workflow = Workflow {
1820            id: wf_id,
1821            name: "id-test".to_string(),
1822            steps: vec![],
1823        };
1824        let returned_id = engine.register_workflow(workflow);
1825        assert_eq!(returned_id, wf_id);
1826    }
1827
1828    #[test]
1829    fn workflow_id_display() {
1830        let id = WorkflowId::new();
1831        let s = format!("{}", id);
1832        assert!(!s.is_empty());
1833    }
1834
1835    #[test]
1836    fn workflow_run_id_display() {
1837        let id = WorkflowRunId::new();
1838        let s = format!("{}", id);
1839        assert!(!s.is_empty());
1840    }
1841
1842    #[test]
1843    fn workflow_id_default() {
1844        let id = WorkflowId::default();
1845        assert!(!id.0.is_nil());
1846    }
1847
1848    #[test]
1849    fn workflow_run_id_default() {
1850        let id = WorkflowRunId::default();
1851        assert!(!id.0.is_nil());
1852    }
1853
1854    #[test]
1855    fn variable_substitution_no_variables() {
1856        let result = expand_variables("plain text with no vars", "input", "step", &[]);
1857        assert_eq!(result, "plain text with no vars");
1858    }
1859
1860    #[test]
1861    fn variable_substitution_all_variables_at_once() {
1862        let step_results = vec![StepResult {
1863            step_name: "analysis".to_string(),
1864            response: "analyzed data".to_string(),
1865            tokens_used: 50,
1866            duration_ms: 100,
1867            error: None,
1868            status: StepStatus::Completed,
1869            started_at: None,
1870            completed_at: None,
1871        }];
1872
1873        let result = expand_variables(
1874            "Input: {{input}}, Prev: {{previous_output}}, Step: {{step_name}}, S1: {{step_1}}, Named: {{analysis}}",
1875            "my input",
1876            "current_step",
1877            &step_results,
1878        );
1879        assert_eq!(
1880            result,
1881            "Input: my input, Prev: my input, Step: current_step, S1: analyzed data, Named: analyzed data"
1882        );
1883    }
1884
1885    #[test]
1886    fn variable_substitution_empty_input() {
1887        let result = expand_variables("{{input}} is here", "", "step", &[]);
1888        assert_eq!(result, " is here");
1889    }
1890
1891    #[test]
1892    fn variable_substitution_multiple_same_var() {
1893        let result = expand_variables("{{input}} and {{input}} again", "hello", "step", &[]);
1894        assert_eq!(result, "hello and hello again");
1895    }
1896
1897    #[test]
1898    fn on_error_default_is_fail_workflow() {
1899        let on_error = OnError::default();
1900        assert!(matches!(on_error, OnError::FailWorkflow));
1901    }
1902
1903    #[test]
1904    fn list_runs_for_workflow_filters_correctly() {
1905        let engine = WorkflowEngine::new();
1906        let wf_id_1 = WorkflowId::new();
1907        let wf_id_2 = WorkflowId::new();
1908
1909        assert!(engine.list_runs_for_workflow(&wf_id_1).is_empty());
1910        assert!(engine.list_runs_for_workflow(&wf_id_2).is_empty());
1911    }
1912
1913    #[test]
1914    fn workflow_step_serialization() {
1915        let step = WorkflowStep {
1916            name: "test".to_string(),
1917            fighter_name: "fighter".to_string(),
1918            prompt_template: "Do {{input}}".to_string(),
1919            timeout_secs: Some(30),
1920            on_error: OnError::SkipStep,
1921        };
1922        let json = serde_json::to_string(&step).expect("serialize");
1923        let deserialized: WorkflowStep = serde_json::from_str(&json).expect("deserialize");
1924        assert_eq!(deserialized.name, "test");
1925        assert_eq!(deserialized.timeout_secs, Some(30));
1926    }
1927
1928    #[test]
1929    fn workflow_serialization_roundtrip() {
1930        let workflow = Workflow {
1931            id: WorkflowId::new(),
1932            name: "roundtrip".to_string(),
1933            steps: vec![WorkflowStep {
1934                name: "s1".to_string(),
1935                fighter_name: "f1".to_string(),
1936                prompt_template: "{{input}}".to_string(),
1937                timeout_secs: None,
1938                on_error: OnError::RetryOnce,
1939            }],
1940        };
1941        let json = serde_json::to_string(&workflow).expect("serialize");
1942        let deserialized: Workflow = serde_json::from_str(&json).expect("deserialize");
1943        assert_eq!(deserialized.name, "roundtrip");
1944        assert_eq!(deserialized.steps.len(), 1);
1945    }
1946
1947    #[test]
1948    fn step_result_with_error() {
1949        let sr = StepResult {
1950            step_name: "failing".to_string(),
1951            response: String::new(),
1952            tokens_used: 0,
1953            duration_ms: 0,
1954            error: Some("timeout".to_string()),
1955            status: StepStatus::Failed,
1956            started_at: None,
1957            completed_at: None,
1958        };
1959        assert!(sr.error.is_some());
1960        assert_eq!(sr.error.expect("error"), "timeout");
1961    }
1962
1963    #[test]
1964    fn variable_substitution_step_ref_by_number_out_of_range() {
1965        let step_results = vec![
1966            StepResult {
1967                step_name: "a".to_string(),
1968                response: "r1".to_string(),
1969                tokens_used: 0,
1970                duration_ms: 0,
1971                error: None,
1972                status: StepStatus::Completed,
1973                started_at: None,
1974                completed_at: None,
1975            },
1976            StepResult {
1977                step_name: "b".to_string(),
1978                response: "r2".to_string(),
1979                tokens_used: 0,
1980                duration_ms: 0,
1981                error: None,
1982                status: StepStatus::Completed,
1983                started_at: None,
1984                completed_at: None,
1985            },
1986        ];
1987        let result = expand_variables("{{step_5}}", "input", "step", &step_results);
1988        assert_eq!(result, "{{step_5}}");
1989    }
1990
1991    // ---- New DAG tests ----
1992
1993    #[tokio::test]
1994    async fn dag_linear_execution() {
1995        let steps = vec![
1996            dag_step("a", &[]),
1997            dag_step("b", &["a"]),
1998            dag_step("c", &["b"]),
1999        ];
2000        let executor = MockExecutor::new()
2001            .with_response("a", "result_a")
2002            .with_response("b", "result_b")
2003            .with_response("c", "result_c");
2004
2005        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2006        assert_eq!(result.status, WorkflowRunStatus::Completed);
2007        assert_eq!(result.step_results.len(), 3);
2008        assert_eq!(result.step_results["a"].response, "result_a");
2009        assert_eq!(result.step_results["b"].response, "result_b");
2010        assert_eq!(result.step_results["c"].response, "result_c");
2011    }
2012
2013    #[tokio::test]
2014    async fn dag_fan_out_execution() {
2015        let steps = vec![
2016            dag_step("root", &[]),
2017            dag_step("branch1", &["root"]),
2018            dag_step("branch2", &["root"]),
2019            dag_step("branch3", &["root"]),
2020        ];
2021        let executor = MockExecutor::new()
2022            .with_response("root", "root_out")
2023            .with_response("branch1", "b1_out")
2024            .with_response("branch2", "b2_out")
2025            .with_response("branch3", "b3_out");
2026
2027        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2028        assert_eq!(result.status, WorkflowRunStatus::Completed);
2029        assert_eq!(result.step_results.len(), 4);
2030        // All branches should have completed
2031        assert_eq!(result.step_results["branch1"].response, "b1_out");
2032        assert_eq!(result.step_results["branch2"].response, "b2_out");
2033        assert_eq!(result.step_results["branch3"].response, "b3_out");
2034    }
2035
2036    #[tokio::test]
2037    async fn dag_fan_in_execution() {
2038        let steps = vec![
2039            dag_step("a", &[]),
2040            dag_step("b", &[]),
2041            dag_step("c", &[]),
2042            dag_step("join", &["a", "b", "c"]),
2043        ];
2044        let executor = MockExecutor::new()
2045            .with_response("a", "ra")
2046            .with_response("b", "rb")
2047            .with_response("c", "rc")
2048            .with_response("join", "joined");
2049
2050        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2051        assert_eq!(result.status, WorkflowRunStatus::Completed);
2052        assert_eq!(result.step_results["join"].response, "joined");
2053        // a, b, c should have run in the same wave (first trace entry)
2054        assert_eq!(result.execution_trace.len(), 2);
2055        let first_wave = &result.execution_trace[0].steps;
2056        assert!(first_wave.contains(&"a".to_string()));
2057        assert!(first_wave.contains(&"b".to_string()));
2058        assert!(first_wave.contains(&"c".to_string()));
2059    }
2060
2061    #[tokio::test]
2062    async fn dag_diamond_execution() {
2063        let steps = vec![
2064            dag_step("root", &[]),
2065            dag_step("left", &["root"]),
2066            dag_step("right", &["root"]),
2067            dag_step("join", &["left", "right"]),
2068        ];
2069        let executor = MockExecutor::new()
2070            .with_response("root", "root_out")
2071            .with_response("left", "left_out")
2072            .with_response("right", "right_out")
2073            .with_response("join", "joined");
2074
2075        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2076        assert_eq!(result.status, WorkflowRunStatus::Completed);
2077        assert_eq!(result.step_results.len(), 4);
2078        // left and right should be in same wave
2079        let wave2 = &result.execution_trace[1].steps;
2080        assert!(wave2.contains(&"left".to_string()));
2081        assert!(wave2.contains(&"right".to_string()));
2082    }
2083
2084    #[tokio::test]
2085    async fn dag_parallel_actually_concurrent() {
2086        // Steps a, b, c have no deps, each takes 50ms.
2087        // If run sequentially: ~150ms. If parallel: ~50ms.
2088        let steps = vec![dag_step("a", &[]), dag_step("b", &[]), dag_step("c", &[])];
2089        let executor = TimedMockExecutor { delay_ms: 50 };
2090
2091        let start = Instant::now();
2092        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2093        let elapsed = start.elapsed();
2094
2095        assert_eq!(result.status, WorkflowRunStatus::Completed);
2096        assert_eq!(result.step_results.len(), 3);
2097        // Should complete in roughly 50ms (parallel), not 150ms (sequential)
2098        // Use generous bound to avoid flakiness
2099        assert!(
2100            elapsed.as_millis() < 120,
2101            "parallel execution took {}ms, expected ~50ms",
2102            elapsed.as_millis()
2103        );
2104    }
2105
2106    #[tokio::test]
2107    async fn dag_condition_if_success() {
2108        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2109        steps[1].condition = Some(Condition::IfSuccess {
2110            step: "a".to_string(),
2111        });
2112        let executor = MockExecutor::new()
2113            .with_response("a", "ok")
2114            .with_response("b", "b_ran");
2115
2116        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2117        assert_eq!(result.step_results["b"].status, StepStatus::Completed);
2118        assert_eq!(result.step_results["b"].response, "b_ran");
2119    }
2120
2121    #[tokio::test]
2122    async fn dag_condition_skips_step() {
2123        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2124        steps[1].condition = Some(Condition::IfFailure {
2125            step: "a".to_string(),
2126        });
2127        let executor = MockExecutor::new()
2128            .with_response("a", "ok")
2129            .with_response("b", "should_not_run");
2130
2131        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2132        assert_eq!(result.step_results["b"].status, StepStatus::Skipped);
2133    }
2134
2135    #[tokio::test]
2136    async fn dag_condition_if_output() {
2137        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2138        steps[1].condition = Some(Condition::IfOutput {
2139            step: "a".to_string(),
2140            contains: "magic".to_string(),
2141        });
2142        let executor = MockExecutor::new()
2143            .with_response("a", "this has magic inside")
2144            .with_response("b", "b_ran");
2145
2146        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2147        assert_eq!(result.step_results["b"].status, StepStatus::Completed);
2148    }
2149
2150    #[tokio::test]
2151    async fn dag_condition_if_output_no_match() {
2152        let mut steps = vec![dag_step("a", &[]), dag_step("b", &["a"])];
2153        steps[1].condition = Some(Condition::IfOutput {
2154            step: "a".to_string(),
2155            contains: "magic".to_string(),
2156        });
2157        let executor = MockExecutor::new()
2158            .with_response("a", "no special word here")
2159            .with_response("b", "should_not_run");
2160
2161        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2162        assert_eq!(result.step_results["b"].status, StepStatus::Skipped);
2163    }
2164
2165    #[tokio::test]
2166    async fn dag_foreach_loop() {
2167        let mut steps = vec![dag_step("source", &[]), dag_step("process", &["source"])];
2168        steps[0].prompt_template = "{{input}}".to_string();
2169        steps[1].loop_config = Some(LoopConfig::ForEach {
2170            source_step: "source".to_string(),
2171            max_iterations: 100,
2172        });
2173        steps[1].prompt_template = "process item: {{loop.item}}".to_string();
2174
2175        let executor =
2176            MockExecutor::new().with_response("source", r#"["apple", "banana", "cherry"]"#);
2177
2178        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2179        assert_eq!(result.status, WorkflowRunStatus::Completed);
2180        let process_result = &result.step_results["process"];
2181        // Should have processed all 3 items
2182        assert!(
2183            process_result.response.contains("process item: apple"),
2184            "response: {}",
2185            process_result.response
2186        );
2187    }
2188
2189    #[tokio::test]
2190    async fn dag_while_loop() {
2191        let mut steps = vec![dag_step("counter", &[])];
2192        steps[0].loop_config = Some(LoopConfig::While {
2193            condition: Condition::Expression("true".to_string()),
2194            max_iterations: 5,
2195        });
2196
2197        let executor = MockExecutor::new().with_response("counter", "tick");
2198
2199        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2200        assert_eq!(result.status, WorkflowRunStatus::Completed);
2201        let counter_result = &result.step_results["counter"];
2202        // Should have 5 "tick" entries
2203        let ticks: Vec<&str> = counter_result.response.split('\n').collect();
2204        assert_eq!(ticks.len(), 5);
2205    }
2206
2207    #[tokio::test]
2208    async fn dag_retry_loop_succeeds_eventually() {
2209        let mut steps = vec![dag_step("flaky", &[])];
2210        steps[0].loop_config = Some(LoopConfig::Retry {
2211            max_retries: 3,
2212            backoff_ms: 1, // minimal backoff for tests
2213            backoff_multiplier: 1.0,
2214        });
2215
2216        // Fails first 2 times, succeeds on 3rd
2217        let executor = FailNTimesMockExecutor::new(2);
2218
2219        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2220        assert_eq!(result.status, WorkflowRunStatus::Completed);
2221        assert!(result.step_results["flaky"].error.is_none());
2222        assert!(
2223            result.step_results["flaky"]
2224                .response
2225                .contains("success on attempt 3")
2226        );
2227    }
2228
2229    #[tokio::test]
2230    async fn dag_retry_loop_exhausts_retries() {
2231        let mut steps = vec![dag_step("flaky", &[])];
2232        steps[0].loop_config = Some(LoopConfig::Retry {
2233            max_retries: 2,
2234            backoff_ms: 1,
2235            backoff_multiplier: 1.0,
2236        });
2237
2238        // Fails all attempts (need 4 failures to exhaust 1 attempt + 2 retries + 1 more)
2239        let executor = FailNTimesMockExecutor::new(10);
2240
2241        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2242        assert!(result.step_results["flaky"].error.is_some());
2243    }
2244
2245    #[tokio::test]
2246    async fn dag_step_failure_with_skip() {
2247        let mut steps = vec![
2248            dag_step("a", &[]),
2249            dag_step("b", &["a"]),
2250            dag_step("c", &["b"]),
2251        ];
2252        steps[1].on_error = OnError::SkipStep;
2253
2254        let executor = MockExecutor::new()
2255            .with_response("a", "ok")
2256            .with_failure("b", "b failed")
2257            .with_response("c", "c_ran");
2258
2259        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2260        // b failed but was skipped, c should still run
2261        // since b is in step_results (as skipped/failed), c's deps are met
2262        assert!(result.step_results.contains_key("c"));
2263    }
2264
2265    #[tokio::test]
2266    async fn dag_step_failure_cascades() {
2267        let steps = vec![
2268            dag_step("a", &[]),
2269            dag_step("b", &["a"]),
2270            dag_step("c", &["b"]),
2271        ];
2272
2273        let executor = MockExecutor::new()
2274            .with_response("a", "ok")
2275            .with_failure("b", "b failed")
2276            .with_response("c", "should_not_run");
2277
2278        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2279        assert!(result.step_results["b"].error.is_some());
2280        // c should be cancelled since b failed (FailWorkflow is default)
2281        assert_eq!(result.step_results["c"].status, StepStatus::Cancelled);
2282    }
2283
2284    #[tokio::test]
2285    async fn dag_empty_workflow() {
2286        let executor = MockExecutor::new();
2287        let result = execute_dag("test", &[], "input", Arc::new(executor)).await;
2288        assert_eq!(result.status, WorkflowRunStatus::Failed);
2289        assert!(!result.validation_errors.is_empty());
2290    }
2291
2292    #[tokio::test]
2293    async fn dag_single_step() {
2294        let steps = vec![dag_step("only", &[])];
2295        let executor = MockExecutor::new().with_response("only", "done");
2296
2297        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2298        assert_eq!(result.status, WorkflowRunStatus::Completed);
2299        assert_eq!(result.step_results.len(), 1);
2300        assert_eq!(result.step_results["only"].response, "done");
2301    }
2302
2303    #[tokio::test]
2304    async fn dag_all_steps_fail() {
2305        let steps = vec![dag_step("a", &[]), dag_step("b", &[])];
2306
2307        let executor = MockExecutor::new()
2308            .with_failure("a", "a failed")
2309            .with_failure("b", "b failed");
2310
2311        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2312        assert_eq!(result.status, WorkflowRunStatus::Failed);
2313        assert!(!result.dead_letters.is_empty());
2314    }
2315
2316    #[tokio::test]
2317    async fn dag_partial_completion() {
2318        let steps = vec![dag_step("good", &[]), dag_step("bad", &[])];
2319
2320        let executor = MockExecutor::new()
2321            .with_response("good", "ok")
2322            .with_failure("bad", "nope");
2323
2324        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2325        assert_eq!(result.status, WorkflowRunStatus::PartiallyCompleted);
2326    }
2327
2328    #[tokio::test]
2329    async fn dag_validation_rejects_cycle() {
2330        let steps = vec![dag_step("a", &["b"]), dag_step("b", &["a"])];
2331        let executor = MockExecutor::new();
2332        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2333        assert_eq!(result.status, WorkflowRunStatus::Failed);
2334        assert!(!result.validation_errors.is_empty());
2335    }
2336
2337    #[tokio::test]
2338    async fn dag_all_steps_skipped() {
2339        let mut steps = vec![dag_step("a", &[]), dag_step("b", &[])];
2340        steps[0].condition = Some(Condition::Expression("false".to_string()));
2341        steps[1].condition = Some(Condition::Expression("false".to_string()));
2342
2343        let executor = MockExecutor::new();
2344        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2345        // All skipped = no failures, no successes -> Completed
2346        assert_eq!(result.status, WorkflowRunStatus::Completed);
2347        assert_eq!(result.step_results["a"].status, StepStatus::Skipped);
2348        assert_eq!(result.step_results["b"].status, StepStatus::Skipped);
2349    }
2350
2351    // ---- DAG variable substitution tests ----
2352
2353    #[test]
2354    fn dag_variables_step_output() {
2355        let mut results = HashMap::new();
2356        results.insert(
2357            "analyze".to_string(),
2358            StepResult {
2359                step_name: "analyze".to_string(),
2360                response: "found 3 bugs".to_string(),
2361                tokens_used: 100,
2362                duration_ms: 500,
2363                error: None,
2364                status: StepStatus::Completed,
2365                started_at: None,
2366                completed_at: None,
2367            },
2368        );
2369
2370        let expanded = expand_dag_variables(
2371            "Result: {{analyze.output}}",
2372            "input",
2373            "next",
2374            &results,
2375            None,
2376        );
2377        assert_eq!(expanded, "Result: found 3 bugs");
2378    }
2379
2380    #[test]
2381    fn dag_variables_step_status() {
2382        let mut results = HashMap::new();
2383        results.insert(
2384            "build".to_string(),
2385            StepResult {
2386                step_name: "build".to_string(),
2387                response: "ok".to_string(),
2388                tokens_used: 50,
2389                duration_ms: 300,
2390                error: None,
2391                status: StepStatus::Completed,
2392                started_at: None,
2393                completed_at: None,
2394            },
2395        );
2396
2397        let expanded = expand_dag_variables(
2398            "Build status: {{build.status}}",
2399            "input",
2400            "deploy",
2401            &results,
2402            None,
2403        );
2404        assert_eq!(expanded, "Build status: completed");
2405    }
2406
2407    #[test]
2408    fn dag_variables_step_duration() {
2409        let mut results = HashMap::new();
2410        results.insert(
2411            "fetch".to_string(),
2412            StepResult {
2413                step_name: "fetch".to_string(),
2414                response: "data".to_string(),
2415                tokens_used: 10,
2416                duration_ms: 1234,
2417                error: None,
2418                status: StepStatus::Completed,
2419                started_at: None,
2420                completed_at: None,
2421            },
2422        );
2423
2424        let expanded = expand_dag_variables(
2425            "Fetch took {{fetch.duration_ms}}ms",
2426            "input",
2427            "next",
2428            &results,
2429            None,
2430        );
2431        assert_eq!(expanded, "Fetch took 1234ms");
2432    }
2433
2434    #[test]
2435    fn dag_variables_loop_state() {
2436        let results = HashMap::new();
2437        let mut loop_state = LoopState::new();
2438        loop_state.index = 2;
2439        loop_state.item = Some("banana".to_string());
2440
2441        let expanded = expand_dag_variables(
2442            "Item {{loop.index}}: {{loop.item}}",
2443            "input",
2444            "process",
2445            &results,
2446            Some(&loop_state),
2447        );
2448        assert_eq!(expanded, "Item 2: banana");
2449    }
2450
2451    #[test]
2452    fn dag_variables_json_path() {
2453        let mut results = HashMap::new();
2454        results.insert(
2455            "api".to_string(),
2456            StepResult {
2457                step_name: "api".to_string(),
2458                response: r#"{"user": {"name": "Alice", "age": 30}}"#.to_string(),
2459                tokens_used: 10,
2460                duration_ms: 100,
2461                error: None,
2462                status: StepStatus::Completed,
2463                started_at: None,
2464                completed_at: None,
2465            },
2466        );
2467
2468        let expanded = expand_dag_variables(
2469            "Name: {{api.output.user.name}}",
2470            "input",
2471            "next",
2472            &results,
2473            None,
2474        );
2475        assert_eq!(expanded, "Name: Alice");
2476    }
2477
2478    #[test]
2479    fn dag_variables_transform_uppercase() {
2480        let mut results = HashMap::new();
2481        results.insert(
2482            "greet".to_string(),
2483            StepResult {
2484                step_name: "greet".to_string(),
2485                response: "hello world".to_string(),
2486                tokens_used: 10,
2487                duration_ms: 50,
2488                error: None,
2489                status: StepStatus::Completed,
2490                started_at: None,
2491                completed_at: None,
2492            },
2493        );
2494
2495        let expanded = expand_dag_variables(
2496            "{{greet.output | uppercase}}",
2497            "input",
2498            "next",
2499            &results,
2500            None,
2501        );
2502        assert_eq!(expanded, "HELLO WORLD");
2503    }
2504
2505    #[test]
2506    fn dag_variables_transform_lowercase() {
2507        let mut results = HashMap::new();
2508        results.insert(
2509            "shout".to_string(),
2510            StepResult {
2511                step_name: "shout".to_string(),
2512                response: "LOUD NOISE".to_string(),
2513                tokens_used: 10,
2514                duration_ms: 50,
2515                error: None,
2516                status: StepStatus::Completed,
2517                started_at: None,
2518                completed_at: None,
2519            },
2520        );
2521
2522        let expanded = expand_dag_variables(
2523            "{{shout.output | lowercase}}",
2524            "input",
2525            "next",
2526            &results,
2527            None,
2528        );
2529        assert_eq!(expanded, "loud noise");
2530    }
2531
2532    #[test]
2533    fn dag_variables_transform_json_extract() {
2534        let mut results = HashMap::new();
2535        results.insert(
2536            "data".to_string(),
2537            StepResult {
2538                step_name: "data".to_string(),
2539                response: r#"{"key": "value123"}"#.to_string(),
2540                tokens_used: 10,
2541                duration_ms: 50,
2542                error: None,
2543                status: StepStatus::Completed,
2544                started_at: None,
2545                completed_at: None,
2546            },
2547        );
2548
2549        let expanded = expand_dag_variables(
2550            "{{data.output | json_extract \"$.key\"}}",
2551            "input",
2552            "next",
2553            &results,
2554            None,
2555        );
2556        assert_eq!(expanded, "value123");
2557    }
2558
2559    #[test]
2560    fn json_path_extract_simple() {
2561        let result = json_path_extract(r#"{"name": "Bob"}"#, "name");
2562        assert_eq!(result, "Bob");
2563    }
2564
2565    #[test]
2566    fn json_path_extract_nested() {
2567        let result = json_path_extract(r#"{"a": {"b": {"c": 42}}}"#, "a.b.c");
2568        assert_eq!(result, "42");
2569    }
2570
2571    #[test]
2572    fn json_path_extract_dollar_prefix() {
2573        let result = json_path_extract(r#"{"key": "val"}"#, "$.key");
2574        assert_eq!(result, "val");
2575    }
2576
2577    #[test]
2578    fn json_path_extract_missing_key() {
2579        let result = json_path_extract(r#"{"key": "val"}"#, "missing");
2580        assert_eq!(result, "");
2581    }
2582
2583    #[test]
2584    fn json_path_extract_invalid_json() {
2585        let result = json_path_extract("not json", "key");
2586        assert_eq!(result, "not json");
2587    }
2588
2589    // ---- Step status tests ----
2590
2591    #[test]
2592    fn step_status_display() {
2593        assert_eq!(StepStatus::Pending.to_string(), "pending");
2594        assert_eq!(StepStatus::Running.to_string(), "running");
2595        assert_eq!(StepStatus::Completed.to_string(), "completed");
2596        assert_eq!(StepStatus::Failed.to_string(), "failed");
2597        assert_eq!(StepStatus::Skipped.to_string(), "skipped");
2598        assert_eq!(StepStatus::Cancelled.to_string(), "cancelled");
2599    }
2600
2601    // ---- On error variant tests ----
2602
2603    #[test]
2604    fn on_error_fallback_serialization() {
2605        let on_error = OnError::Fallback {
2606            step: "backup".to_string(),
2607        };
2608        let json = serde_json::to_string(&on_error).expect("serialize");
2609        let deser: OnError = serde_json::from_str(&json).expect("deserialize");
2610        assert!(matches!(deser, OnError::Fallback { step } if step == "backup"));
2611    }
2612
2613    #[test]
2614    fn on_error_catch_and_continue_serialization() {
2615        let on_error = OnError::CatchAndContinue {
2616            error_handler: "handler".to_string(),
2617        };
2618        let json = serde_json::to_string(&on_error).expect("serialize");
2619        let deser: OnError = serde_json::from_str(&json).expect("deserialize");
2620        assert!(
2621            matches!(deser, OnError::CatchAndContinue { error_handler } if error_handler == "handler")
2622        );
2623    }
2624
2625    #[test]
2626    fn on_error_circuit_breaker_serialization() {
2627        let on_error = OnError::CircuitBreaker {
2628            max_failures: 5,
2629            cooldown_secs: 60,
2630        };
2631        let json = serde_json::to_string(&on_error).expect("serialize");
2632        let deser: OnError = serde_json::from_str(&json).expect("deserialize");
2633        assert!(matches!(
2634            deser,
2635            OnError::CircuitBreaker {
2636                max_failures: 5,
2637                cooldown_secs: 60
2638            }
2639        ));
2640    }
2641
2642    // ---- Circuit breaker tests ----
2643
2644    #[test]
2645    fn circuit_breaker_default_closed() {
2646        let cb = CircuitBreakerState::default();
2647        assert!(!cb.is_open(3, 60));
2648    }
2649
2650    #[test]
2651    fn circuit_breaker_opens_after_max_failures() {
2652        let mut cb = CircuitBreakerState::default();
2653        cb.record_failure();
2654        cb.record_failure();
2655        cb.record_failure();
2656        assert!(cb.is_open(3, 60));
2657    }
2658
2659    #[test]
2660    fn circuit_breaker_resets_on_success() {
2661        let mut cb = CircuitBreakerState::default();
2662        cb.record_failure();
2663        cb.record_failure();
2664        cb.record_success();
2665        assert!(!cb.is_open(3, 60));
2666        assert_eq!(cb.consecutive_failures, 0);
2667    }
2668
2669    // ---- DAG workflow registration tests ----
2670
2671    #[test]
2672    fn register_dag_workflow_valid() {
2673        let engine = WorkflowEngine::new();
2674        let wf = DagWorkflow {
2675            id: WorkflowId::new(),
2676            name: "test-dag".to_string(),
2677            steps: vec![dag_step("a", &[]), dag_step("b", &["a"])],
2678        };
2679        let result = engine.register_dag_workflow(wf);
2680        assert!(result.is_ok());
2681    }
2682
2683    #[test]
2684    fn register_dag_workflow_with_cycle_fails() {
2685        let engine = WorkflowEngine::new();
2686        let wf = DagWorkflow {
2687            id: WorkflowId::new(),
2688            name: "bad-dag".to_string(),
2689            steps: vec![dag_step("a", &["b"]), dag_step("b", &["a"])],
2690        };
2691        let result = engine.register_dag_workflow(wf);
2692        assert!(result.is_err());
2693    }
2694
2695    #[test]
2696    fn list_dag_workflows() {
2697        let engine = WorkflowEngine::new();
2698        let wf = DagWorkflow {
2699            id: WorkflowId::new(),
2700            name: "dag1".to_string(),
2701            steps: vec![dag_step("a", &[])],
2702        };
2703        engine.register_dag_workflow(wf).expect("should register");
2704        assert_eq!(engine.list_dag_workflows().len(), 1);
2705    }
2706
2707    #[test]
2708    fn get_dag_workflow() {
2709        let engine = WorkflowEngine::new();
2710        let id = WorkflowId::new();
2711        let wf = DagWorkflow {
2712            id,
2713            name: "dag1".to_string(),
2714            steps: vec![dag_step("a", &[])],
2715        };
2716        engine.register_dag_workflow(wf).expect("should register");
2717        let fetched = engine.get_dag_workflow(&id).expect("should exist");
2718        assert_eq!(fetched.name, "dag1");
2719    }
2720
2721    #[test]
2722    fn get_nonexistent_dag_workflow() {
2723        let engine = WorkflowEngine::new();
2724        assert!(engine.get_dag_workflow(&WorkflowId::new()).is_none());
2725    }
2726
2727    // ---- Dead letter queue tests ----
2728
2729    #[tokio::test]
2730    async fn dag_dead_letters_populated_on_failure() {
2731        let steps = vec![dag_step("a", &[])];
2732        let executor = MockExecutor::new().with_failure("a", "catastrophic failure");
2733
2734        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2735        assert!(!result.dead_letters.is_empty());
2736        assert_eq!(result.dead_letters[0].step_name, "a");
2737        assert_eq!(result.dead_letters[0].error, "catastrophic failure");
2738    }
2739
2740    // ---- Execution trace tests ----
2741
2742    #[tokio::test]
2743    async fn dag_execution_trace_records_waves() {
2744        let steps = vec![
2745            dag_step("a", &[]),
2746            dag_step("b", &["a"]),
2747            dag_step("c", &["b"]),
2748        ];
2749        let executor = MockExecutor::new()
2750            .with_response("a", "ok")
2751            .with_response("b", "ok")
2752            .with_response("c", "ok");
2753
2754        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2755        // 3 waves for a linear chain
2756        assert_eq!(result.execution_trace.len(), 3);
2757        assert_eq!(result.execution_trace[0].steps, vec!["a"]);
2758        assert_eq!(result.execution_trace[1].steps, vec!["b"]);
2759        assert_eq!(result.execution_trace[2].steps, vec!["c"]);
2760    }
2761
2762    // ---- DagWorkflowStep helper tests ----
2763
2764    #[test]
2765    fn dag_step_fallback_step_extraction() {
2766        let mut step = dag_step("test", &[]);
2767        assert!(step.fallback_step().is_none());
2768
2769        step.on_error = OnError::Fallback {
2770            step: "backup".to_string(),
2771        };
2772        assert_eq!(step.fallback_step(), Some("backup".to_string()));
2773
2774        step.on_error = OnError::CatchAndContinue {
2775            error_handler: "handler".to_string(),
2776        };
2777        assert_eq!(step.fallback_step(), Some("handler".to_string()));
2778    }
2779
2780    // ---- Serialization tests for new types ----
2781
2782    #[test]
2783    fn dag_workflow_serialization_roundtrip() {
2784        let wf = DagWorkflow {
2785            id: WorkflowId::new(),
2786            name: "test-dag".to_string(),
2787            steps: vec![dag_step("a", &[]), dag_step("b", &["a"])],
2788        };
2789        let json = serde_json::to_string(&wf).expect("serialize");
2790        let deser: DagWorkflow = serde_json::from_str(&json).expect("deserialize");
2791        assert_eq!(deser.name, "test-dag");
2792        assert_eq!(deser.steps.len(), 2);
2793    }
2794
2795    #[test]
2796    fn dag_workflow_step_with_condition_serialization() {
2797        let mut step = dag_step("test", &["dep1"]);
2798        step.condition = Some(Condition::IfSuccess {
2799            step: "dep1".to_string(),
2800        });
2801        step.else_step = Some("fallback".to_string());
2802        let json = serde_json::to_string(&step).expect("serialize");
2803        let deser: DagWorkflowStep = serde_json::from_str(&json).expect("deserialize");
2804        assert!(deser.condition.is_some());
2805        assert_eq!(deser.else_step, Some("fallback".to_string()));
2806    }
2807
2808    #[test]
2809    fn dead_letter_entry_serialization() {
2810        let entry = DeadLetterEntry {
2811            step_name: "failed_step".to_string(),
2812            error: "boom".to_string(),
2813            input: "test input".to_string(),
2814            failed_at: Utc::now(),
2815        };
2816        let json = serde_json::to_string(&entry).expect("serialize");
2817        let deser: DeadLetterEntry = serde_json::from_str(&json).expect("deserialize");
2818        assert_eq!(deser.step_name, "failed_step");
2819        assert_eq!(deser.error, "boom");
2820    }
2821
2822    #[test]
2823    fn execution_trace_entry_serialization() {
2824        let entry = ExecutionTraceEntry {
2825            steps: vec!["a".to_string(), "b".to_string()],
2826            started_at: Utc::now(),
2827            completed_at: Some(Utc::now()),
2828        };
2829        let json = serde_json::to_string(&entry).expect("serialize");
2830        let deser: ExecutionTraceEntry = serde_json::from_str(&json).expect("deserialize");
2831        assert_eq!(deser.steps.len(), 2);
2832    }
2833
2834    #[test]
2835    fn workflow_run_with_new_fields_serialization() {
2836        let run = WorkflowRun {
2837            id: WorkflowRunId::new(),
2838            workflow_id: WorkflowId::new(),
2839            status: WorkflowRunStatus::PartiallyCompleted,
2840            step_results: Vec::new(),
2841            started_at: Utc::now(),
2842            completed_at: None,
2843            dead_letters: vec![DeadLetterEntry {
2844                step_name: "x".to_string(),
2845                error: "err".to_string(),
2846                input: "in".to_string(),
2847                failed_at: Utc::now(),
2848            }],
2849            execution_trace: Vec::new(),
2850        };
2851        let json = serde_json::to_string(&run).expect("serialize");
2852        let deser: WorkflowRun = serde_json::from_str(&json).expect("deserialize");
2853        assert_eq!(deser.status, WorkflowRunStatus::PartiallyCompleted);
2854        assert_eq!(deser.dead_letters.len(), 1);
2855    }
2856
2857    #[test]
2858    fn step_result_with_new_fields() {
2859        let sr = StepResult {
2860            step_name: "test".to_string(),
2861            response: "ok".to_string(),
2862            tokens_used: 10,
2863            duration_ms: 100,
2864            error: None,
2865            status: StepStatus::Completed,
2866            started_at: Some(Utc::now()),
2867            completed_at: Some(Utc::now()),
2868        };
2869        let json = serde_json::to_string(&sr).expect("serialize");
2870        let deser: StepResult = serde_json::from_str(&json).expect("deserialize");
2871        assert_eq!(deser.status, StepStatus::Completed);
2872        assert!(deser.started_at.is_some());
2873    }
2874
2875    // ---- Fallback error handling test ----
2876
2877    #[tokio::test]
2878    async fn dag_fallback_on_error() {
2879        let mut steps = vec![dag_step("main", &[]), dag_step("backup", &[])];
2880        steps[0].on_error = OnError::Fallback {
2881            step: "backup".to_string(),
2882        };
2883
2884        let executor = MockExecutor::new()
2885            .with_failure("main", "main failed")
2886            .with_response("backup", "backup result");
2887
2888        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2889        // The main step should have used backup's result
2890        // In our implementation, the step result gets the backup response
2891        let main_result = &result.step_results["main"];
2892        assert_eq!(main_result.response, "backup result");
2893    }
2894
2895    #[tokio::test]
2896    async fn dag_catch_and_continue() {
2897        let mut steps = vec![
2898            dag_step("risky", &[]),
2899            dag_step("handler", &[]),
2900            dag_step("next", &["risky"]),
2901        ];
2902        steps[0].on_error = OnError::CatchAndContinue {
2903            error_handler: "handler".to_string(),
2904        };
2905
2906        let executor = MockExecutor::new()
2907            .with_failure("risky", "oops")
2908            .with_response("handler", "handled")
2909            .with_response("next", "continued");
2910
2911        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2912        // "next" should have run because CatchAndContinue removes the failure
2913        assert!(result.step_results.contains_key("next"));
2914    }
2915
2916    // ---- Parallel execution proof tests ----
2917
2918    /// A timed executor that records start/end times to prove concurrency.
2919    struct ConcurrencyProofExecutor {
2920        delay_ms: u64,
2921        /// Track (step_name, start_instant, end_instant) for each execution.
2922        timings: Arc<tokio::sync::Mutex<Vec<(String, Instant, Instant)>>>,
2923    }
2924
2925    impl ConcurrencyProofExecutor {
2926        fn new(delay_ms: u64) -> Self {
2927            Self {
2928                delay_ms,
2929                timings: Arc::new(tokio::sync::Mutex::new(Vec::new())),
2930            }
2931        }
2932    }
2933
2934    #[async_trait::async_trait]
2935    impl StepExecutor for ConcurrencyProofExecutor {
2936        async fn execute(
2937            &self,
2938            step: &DagWorkflowStep,
2939            _input: &str,
2940            _step_results: &HashMap<String, StepResult>,
2941            _loop_state: Option<&LoopState>,
2942        ) -> Result<StepResult, String> {
2943            let start = Instant::now();
2944            tokio::time::sleep(Duration::from_millis(self.delay_ms)).await;
2945            let end = Instant::now();
2946
2947            self.timings
2948                .lock()
2949                .await
2950                .push((step.name.clone(), start, end));
2951
2952            Ok(StepResult {
2953                step_name: step.name.clone(),
2954                response: format!("done-{}", step.name),
2955                tokens_used: 10,
2956                duration_ms: self.delay_ms,
2957                error: None,
2958                status: StepStatus::Completed,
2959                started_at: Some(Utc::now()),
2960                completed_at: Some(Utc::now()),
2961            })
2962        }
2963    }
2964
2965    /// Prove 3 independent steps with 50ms sleep each complete in ~50-70ms (not 150ms).
2966    #[tokio::test]
2967    async fn dag_three_independent_steps_parallel_timing() {
2968        let steps = vec![dag_step("x", &[]), dag_step("y", &[]), dag_step("z", &[])];
2969        let executor = ConcurrencyProofExecutor::new(50);
2970        let timings = Arc::clone(&executor.timings);
2971
2972        let start = Instant::now();
2973        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
2974        let elapsed = start.elapsed();
2975
2976        assert_eq!(result.status, WorkflowRunStatus::Completed);
2977        assert_eq!(result.step_results.len(), 3);
2978        // Parallel: should finish in ~50ms, not 150ms
2979        assert!(
2980            elapsed.as_millis() < 100,
2981            "3 independent 50ms steps took {}ms, should be ~50ms for parallel execution",
2982            elapsed.as_millis()
2983        );
2984
2985        // Verify that the steps overlapped in time
2986        let recorded = timings.lock().await;
2987        assert_eq!(recorded.len(), 3);
2988        // All should have started within a few ms of each other
2989        let starts: Vec<_> = recorded.iter().map(|(_, s, _)| *s).collect();
2990        let earliest = starts.iter().min().copied().expect("should have starts");
2991        for s in &starts {
2992            let diff = s.duration_since(earliest).as_millis();
2993            assert!(
2994                diff < 20,
2995                "start time spread {}ms too large for parallel execution",
2996                diff
2997            );
2998        }
2999    }
3000
3001    /// Fan-out: step A -> steps B,C,D in parallel -> step E waits for all.
3002    #[tokio::test]
3003    async fn dag_fan_out_fan_in_timing() {
3004        let steps = vec![
3005            dag_step("a", &[]),
3006            dag_step("b", &["a"]),
3007            dag_step("c", &["a"]),
3008            dag_step("d", &["a"]),
3009            dag_step("e", &["b", "c", "d"]),
3010        ];
3011        let executor = TimedMockExecutor { delay_ms: 30 };
3012
3013        let start = Instant::now();
3014        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3015        let elapsed = start.elapsed();
3016
3017        assert_eq!(result.status, WorkflowRunStatus::Completed);
3018        assert_eq!(result.step_results.len(), 5);
3019
3020        // 3 waves: A (30ms) + B,C,D parallel (30ms) + E (30ms) = ~90ms
3021        // Sequential would be 5*30 = 150ms
3022        assert!(
3023            elapsed.as_millis() < 130,
3024            "fan-out/fan-in took {}ms, expected ~90ms",
3025            elapsed.as_millis()
3026        );
3027
3028        // Verify execution trace shows 3 waves
3029        assert_eq!(result.execution_trace.len(), 3);
3030        // Wave 2 should have B, C, D
3031        let wave2 = &result.execution_trace[1].steps;
3032        assert_eq!(wave2.len(), 3);
3033    }
3034
3035    /// Fan-in: multiple parallel roots feed into one join step.
3036    #[tokio::test]
3037    async fn dag_fan_in_parallel_roots() {
3038        let steps = vec![
3039            dag_step("r1", &[]),
3040            dag_step("r2", &[]),
3041            dag_step("r3", &[]),
3042            dag_step("join", &["r1", "r2", "r3"]),
3043        ];
3044        let executor = MockExecutor::new()
3045            .with_response("r1", "out1")
3046            .with_response("r2", "out2")
3047            .with_response("r3", "out3")
3048            .with_response("join", "merged");
3049
3050        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3051        assert_eq!(result.status, WorkflowRunStatus::Completed);
3052        assert_eq!(result.step_results["join"].response, "merged");
3053        // r1, r2, r3 in wave 1, join in wave 2
3054        assert_eq!(result.execution_trace.len(), 2);
3055        assert_eq!(result.execution_trace[0].steps.len(), 3);
3056    }
3057
3058    /// Diamond dependency: A -> B,C -> D (D depends on both B and C).
3059    #[tokio::test]
3060    async fn dag_diamond_dependency_parallel() {
3061        let steps = vec![
3062            dag_step("a", &[]),
3063            dag_step("b", &["a"]),
3064            dag_step("c", &["a"]),
3065            dag_step("d", &["b", "c"]),
3066        ];
3067        let executor = TimedMockExecutor { delay_ms: 30 };
3068
3069        let start = Instant::now();
3070        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3071        let elapsed = start.elapsed();
3072
3073        assert_eq!(result.status, WorkflowRunStatus::Completed);
3074        // 3 waves: A, B+C parallel, D
3075        assert_eq!(result.execution_trace.len(), 3);
3076        // B and C should be in the same wave
3077        let wave2 = &result.execution_trace[1].steps;
3078        assert!(wave2.contains(&"b".to_string()));
3079        assert!(wave2.contains(&"c".to_string()));
3080        // Total should be ~90ms (3 waves * 30ms), not 120ms (4 sequential)
3081        assert!(
3082            elapsed.as_millis() < 120,
3083            "diamond took {}ms, expected ~90ms",
3084            elapsed.as_millis()
3085        );
3086    }
3087
3088    /// Conditional skipping in a DAG.
3089    #[tokio::test]
3090    async fn dag_conditional_skip_in_dag() {
3091        let mut steps = vec![
3092            dag_step("check", &[]),
3093            dag_step("true_branch", &["check"]),
3094            dag_step("false_branch", &["check"]),
3095        ];
3096        // true_branch runs only if check succeeds (it will)
3097        steps[1].condition = Some(Condition::IfSuccess {
3098            step: "check".to_string(),
3099        });
3100        // false_branch runs only if check fails (it won't)
3101        steps[2].condition = Some(Condition::IfFailure {
3102            step: "check".to_string(),
3103        });
3104
3105        let executor = MockExecutor::new()
3106            .with_response("check", "all good")
3107            .with_response("true_branch", "ran")
3108            .with_response("false_branch", "should_not_run");
3109
3110        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3111        assert_eq!(
3112            result.step_results["true_branch"].status,
3113            StepStatus::Completed
3114        );
3115        assert_eq!(
3116            result.step_results["false_branch"].status,
3117            StepStatus::Skipped
3118        );
3119    }
3120
3121    /// Loop execution within a DAG step (ForEach).
3122    #[tokio::test]
3123    async fn dag_loop_foreach_within_dag() {
3124        let mut steps = vec![
3125            dag_step("data", &[]),
3126            dag_step("process", &["data"]),
3127            dag_step("summary", &["process"]),
3128        ];
3129        steps[1].loop_config = Some(LoopConfig::ForEach {
3130            source_step: "data".to_string(),
3131            max_iterations: 10,
3132        });
3133        steps[1].prompt_template = "process: {{loop.item}}".to_string();
3134
3135        let executor = MockExecutor::new()
3136            .with_response("data", r#"["red", "green", "blue"]"#)
3137            .with_response("summary", "done");
3138
3139        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3140        assert_eq!(result.status, WorkflowRunStatus::Completed);
3141        let process_out = &result.step_results["process"].response;
3142        // Should contain output from all 3 loop iterations
3143        assert!(process_out.contains("process: red"));
3144        assert!(process_out.contains("process: green"));
3145        assert!(process_out.contains("process: blue"));
3146    }
3147
3148    /// Partial failure: one parallel branch fails, others succeed.
3149    #[tokio::test]
3150    async fn dag_partial_failure_parallel_branches() {
3151        let steps = vec![
3152            dag_step("root", &[]),
3153            dag_step("ok_branch", &["root"]),
3154            dag_step("fail_branch", &["root"]),
3155            dag_step("ok_branch2", &["root"]),
3156        ];
3157
3158        let executor = MockExecutor::new()
3159            .with_response("root", "start")
3160            .with_response("ok_branch", "success1")
3161            .with_failure("fail_branch", "branch failed")
3162            .with_response("ok_branch2", "success2");
3163
3164        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3165        assert_eq!(result.status, WorkflowRunStatus::PartiallyCompleted);
3166        assert_eq!(
3167            result.step_results["ok_branch"].status,
3168            StepStatus::Completed
3169        );
3170        assert_eq!(
3171            result.step_results["ok_branch2"].status,
3172            StepStatus::Completed
3173        );
3174        assert!(result.step_results["fail_branch"].error.is_some());
3175    }
3176
3177    /// Fallback step execution on failure.
3178    #[tokio::test]
3179    async fn dag_fallback_step_runs_on_failure() {
3180        let mut steps = vec![
3181            dag_step("primary", &[]),
3182            dag_step("fallback_handler", &[]),
3183            dag_step("downstream", &["primary"]),
3184        ];
3185        steps[0].on_error = OnError::Fallback {
3186            step: "fallback_handler".to_string(),
3187        };
3188
3189        let executor = MockExecutor::new()
3190            .with_failure("primary", "primary broke")
3191            .with_response("fallback_handler", "recovered via fallback")
3192            .with_response("downstream", "downstream ran");
3193
3194        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3195        // primary should have the fallback result
3196        let primary_result = &result.step_results["primary"];
3197        assert_eq!(primary_result.response, "recovered via fallback");
3198        // downstream should have run since fallback recovered
3199        assert!(result.step_results.contains_key("downstream"));
3200    }
3201
3202    /// Circuit breaker triggering after N failures.
3203    #[tokio::test]
3204    async fn dag_circuit_breaker_triggers() {
3205        let mut steps = vec![dag_step("cb_step", &[])];
3206        steps[0].on_error = OnError::CircuitBreaker {
3207            max_failures: 2,
3208            cooldown_secs: 300,
3209        };
3210
3211        // First run: fail twice to trip the breaker
3212        let executor1 = MockExecutor::new().with_failure("cb_step", "fail1");
3213        let result1 = execute_dag("test", &steps, "input", Arc::new(executor1)).await;
3214        assert!(result1.step_results["cb_step"].error.is_some());
3215
3216        // The circuit breaker state is per-run, so we test within a single run
3217        // with a step that has CircuitBreaker and fails. The breaker opens internally
3218        // after max_failures. Let's verify the circuit breaker state logic directly.
3219        let mut cb = CircuitBreakerState::default();
3220        cb.record_failure();
3221        assert!(!cb.is_open(2, 300), "should not be open after 1 failure");
3222        cb.record_failure();
3223        assert!(cb.is_open(2, 300), "should be open after 2 failures");
3224        // After cooldown, it should close — but since cooldown is 300s, it's still open
3225        assert!(cb.is_open(2, 300));
3226    }
3227
3228    /// Variable substitution works across parallel branches.
3229    #[tokio::test]
3230    async fn dag_variable_substitution_across_parallel_branches() {
3231        let mut steps = vec![
3232            dag_step("source_a", &[]),
3233            dag_step("source_b", &[]),
3234            dag_step("consumer", &["source_a", "source_b"]),
3235        ];
3236        steps[2].prompt_template = "A={{source_a.output}}, B={{source_b.output}}".to_string();
3237
3238        let executor = MockExecutor::new()
3239            .with_response("source_a", "value_from_a")
3240            .with_response("source_b", "value_from_b");
3241        // consumer doesn't have a fixed response, so it will use the expanded prompt
3242
3243        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3244        assert_eq!(result.status, WorkflowRunStatus::Completed);
3245        let consumer_out = &result.step_results["consumer"].response;
3246        assert!(
3247            consumer_out.contains("value_from_a"),
3248            "consumer should see source_a output, got: {consumer_out}"
3249        );
3250        assert!(
3251            consumer_out.contains("value_from_b"),
3252            "consumer should see source_b output, got: {consumer_out}"
3253        );
3254    }
3255
3256    /// Wide parallel fan-out with timing proof.
3257    #[tokio::test]
3258    async fn dag_wide_parallel_fan_out_timing() {
3259        // 10 independent steps each taking 30ms
3260        let steps: Vec<DagWorkflowStep> =
3261            (0..10).map(|i| dag_step(&format!("s{i}"), &[])).collect();
3262        let executor = TimedMockExecutor { delay_ms: 30 };
3263
3264        let start = Instant::now();
3265        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3266        let elapsed = start.elapsed();
3267
3268        assert_eq!(result.status, WorkflowRunStatus::Completed);
3269        assert_eq!(result.step_results.len(), 10);
3270        // All 10 should run in one wave (~30ms), not sequentially (~300ms)
3271        assert!(
3272            elapsed.as_millis() < 80,
3273            "10 parallel 30ms steps took {}ms, expected ~30ms",
3274            elapsed.as_millis()
3275        );
3276        assert_eq!(result.execution_trace.len(), 1);
3277        assert_eq!(result.execution_trace[0].steps.len(), 10);
3278    }
3279
3280    /// While loop with condition that eventually terminates.
3281    #[tokio::test]
3282    async fn dag_while_loop_with_condition() {
3283        let mut steps = vec![dag_step("looper", &[])];
3284        steps[0].loop_config = Some(LoopConfig::While {
3285            condition: Condition::Expression("true".to_string()),
3286            max_iterations: 3,
3287        });
3288
3289        let executor = MockExecutor::new().with_response("looper", "iteration");
3290
3291        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3292        assert_eq!(result.status, WorkflowRunStatus::Completed);
3293        let output = &result.step_results["looper"].response;
3294        // Should have 3 iterations
3295        let lines: Vec<&str> = output.split('\n').collect();
3296        assert_eq!(lines.len(), 3);
3297    }
3298
3299    /// Retry loop succeeds on second attempt.
3300    #[tokio::test]
3301    async fn dag_retry_succeeds_on_retry() {
3302        let mut steps = vec![dag_step("retry_step", &[])];
3303        steps[0].loop_config = Some(LoopConfig::Retry {
3304            max_retries: 2,
3305            backoff_ms: 1,
3306            backoff_multiplier: 1.0,
3307        });
3308
3309        let executor = FailNTimesMockExecutor::new(1);
3310
3311        let result = execute_dag("test", &steps, "input", Arc::new(executor)).await;
3312        assert_eq!(result.status, WorkflowRunStatus::Completed);
3313        assert!(result.step_results["retry_step"].error.is_none());
3314        assert!(
3315            result.step_results["retry_step"]
3316                .response
3317                .contains("success on attempt 2")
3318        );
3319    }
3320}
punch_kernel/workflow.rs

punch_kernel/
workflow.rs