reasonkit/thinktool/
executor.rs

1//! Protocol Executor
2//!
3//! Executes ThinkTool protocols by orchestrating LLM calls
4//! and managing step execution flow.
5
6use std::cell::RefCell;
7use std::collections::HashMap;
8use std::path::PathBuf;
9use std::process::Command;
10use std::time::Instant;
11
12use futures::stream::{FuturesUnordered, StreamExt};
13use once_cell::sync::Lazy;
14use regex::Regex;
15use serde::{Deserialize, Serialize};
16use std::collections::HashSet;
17use std::sync::Arc;
18use tokio::sync::RwLock as TokioRwLock;
19
20// ═══════════════════════════════════════════════════════════════════════════
21// STATIC REGEX PATTERNS (PERFORMANCE OPTIMIZATION)
22// ═══════════════════════════════════════════════════════════════════════════
23
24/// Static regex for conditional blocks: {{#if ...}}...{{/if}}
25static CONDITIONAL_BLOCK_RE: Lazy<Regex> = Lazy::new(|| {
26    Regex::new(r"\{\{#if \w+\}\}.*?\{\{/if\}\}").expect("Invalid static regex pattern")
27});
28
29/// Static regex for unfilled placeholders: {{...}}
30static UNFILLED_PLACEHOLDER_RE: Lazy<Regex> =
31    Lazy::new(|| Regex::new(r"\{\{[^}]+\}\}").expect("Invalid static regex pattern"));
32
33// Thread-local cache for dynamic nested regex patterns ({{step_id.field}})
34// This avoids recompiling the same patterns within a single execution
35thread_local! {
36    static NESTED_REGEX_CACHE: RefCell<HashMap<String, Regex>> = RefCell::new(HashMap::new());
37}
38
39/// Get or create a cached nested field regex pattern
40fn get_nested_regex(key: &str) -> Regex {
41    NESTED_REGEX_CACHE.with(|cache| {
42        let mut cache = cache.borrow_mut();
43        if let Some(re) = cache.get(key) {
44            return re.clone();
45        }
46        let pattern = format!(r"\{{\{{{}\\.(\w+)\}}\}}", regex::escape(key));
47        let re = Regex::new(&pattern).expect("Failed to compile nested regex pattern");
48        cache.insert(key.to_string(), re.clone());
49        re
50    })
51}
52
53use super::budget::{BudgetConfig, BudgetSummary, BudgetTracker};
54use super::consistency::{ConsistencyResult, SelfConsistencyConfig, SelfConsistencyEngine};
55use super::llm::{LlmClient, LlmConfig, LlmRequest, UnifiedLlmClient};
56use super::profiles::{ChainCondition, ProfileRegistry, ReasoningProfile};
57use super::protocol::{BranchCondition, Protocol, ProtocolStep, StepAction};
58use super::registry::ProtocolRegistry;
59use super::step::{ListItem, StepOutput, StepResult, TokenUsage};
60use super::trace::{ExecutionTrace, StepStatus, StepTrace, TraceMetadata};
61use crate::error::{Error, Result};
62
63/// CLI tool configuration for shell-out execution
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct CliToolConfig {
66    /// CLI command to use (e.g., "claude", "codex", "gemini")
67    pub command: String,
68    /// Arguments to pass before the prompt (e.g., ["-p"] for claude/gemini)
69    pub pre_args: Vec<String>,
70    /// Arguments to pass after the prompt
71    pub post_args: Vec<String>,
72    /// Whether this tool requires interactive input handling
73    pub interactive: bool,
74}
75
76impl CliToolConfig {
77    /// Create config for Claude CLI (claude -p "...")
78    pub fn claude() -> Self {
79        Self {
80            command: "claude".to_string(),
81            pre_args: vec!["-p".to_string()],
82            post_args: vec!["--output-format".to_string(), "text".to_string()],
83            interactive: false,
84        }
85    }
86
87    /// Create config for Codex CLI (codex "...")
88    pub fn codex() -> Self {
89        Self {
90            command: "codex".to_string(),
91            pre_args: vec!["-q".to_string()],
92            post_args: vec![],
93            interactive: false,
94        }
95    }
96
97    /// Create config for Gemini CLI (gemini -p "...")
98    pub fn gemini() -> Self {
99        Self {
100            command: "gemini".to_string(),
101            pre_args: vec!["-p".to_string()],
102            post_args: vec![],
103            interactive: false,
104        }
105    }
106
107    /// Create config for OpenCode CLI (opencode "...")
108    pub fn opencode() -> Self {
109        let command = std::env::var("RK_OPENCODE_CMD").unwrap_or_else(|_| "opencode".to_string());
110        Self {
111            command,
112            pre_args: vec!["--no-rk".to_string(), "run".to_string()],
113            post_args: vec![],
114            interactive: false,
115        }
116    }
117
118    /// Create config for GitHub Copilot CLI (gh copilot suggest)
119    pub fn copilot() -> Self {
120        Self {
121            command: "gh".to_string(),
122            pre_args: vec!["copilot".to_string(), "suggest".to_string()],
123            post_args: vec![],
124            interactive: true,
125        }
126    }
127}
128
129/// Configuration for protocol execution
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct ExecutorConfig {
132    /// LLM configuration
133    pub llm: LlmConfig,
134
135    /// Global timeout in seconds
136    #[serde(default = "default_timeout")]
137    pub timeout_secs: u64,
138
139    /// Whether to save traces
140    #[serde(default)]
141    pub save_traces: bool,
142
143    /// Trace output directory
144    pub trace_dir: Option<PathBuf>,
145
146    /// Verbose output
147    #[serde(default)]
148    pub verbose: bool,
149
150    /// Use mock LLM (for testing)
151    #[serde(default)]
152    pub use_mock: bool,
153
154    /// Budget constraints for adaptive execution
155    #[serde(default)]
156    pub budget: BudgetConfig,
157
158    /// CLI tool configuration (for shell-out execution to claude/codex/gemini/etc.)
159    #[serde(default)]
160    pub cli_tool: Option<CliToolConfig>,
161
162    /// Self-Consistency configuration (Wang et al. 2023)
163    /// Enables voting across multiple reasoning paths for improved accuracy
164    /// Reference: arXiv:2203.11171 (+17.9% GSM8K improvement)
165    #[serde(default)]
166    pub self_consistency: Option<SelfConsistencyConfig>,
167
168    /// Show progress indicators during execution
169    /// Outputs step progress to stderr so it doesn't interfere with JSON output
170    #[serde(default = "default_show_progress")]
171    pub show_progress: bool,
172
173    /// Enable parallel execution of independent steps
174    /// When enabled, steps without dependencies or with satisfied dependencies
175    /// will be executed concurrently, significantly reducing total latency.
176    /// PERFORMANCE: Can reduce (N-1)/N latency for N independent steps.
177    #[serde(default)]
178    pub enable_parallel: bool,
179
180    /// Maximum concurrent steps when parallel execution is enabled
181    /// Set to 0 for unlimited concurrency (default: 4)
182    #[serde(default = "default_max_concurrent")]
183    pub max_concurrent_steps: usize,
184}
185
186fn default_max_concurrent() -> usize {
187    4
188}
189
190fn default_show_progress() -> bool {
191    true
192}
193
194fn default_timeout() -> u64 {
195    120
196}
197
198impl Default for ExecutorConfig {
199    fn default() -> Self {
200        Self {
201            llm: LlmConfig::default(),
202            timeout_secs: default_timeout(),
203            save_traces: false,
204            trace_dir: None,
205            verbose: false,
206            use_mock: false,
207            budget: BudgetConfig::default(),
208            cli_tool: None,
209            self_consistency: None,
210            show_progress: default_show_progress(),
211            enable_parallel: false,
212            max_concurrent_steps: default_max_concurrent(),
213        }
214    }
215}
216
217impl ExecutorConfig {
218    /// Create config for testing with mock LLM
219    pub fn mock() -> Self {
220        Self {
221            use_mock: true,
222            ..Default::default()
223        }
224    }
225
226    /// Create config for CLI tool execution
227    pub fn with_cli_tool(tool: CliToolConfig) -> Self {
228        Self {
229            cli_tool: Some(tool),
230            ..Default::default()
231        }
232    }
233
234    /// Create config for Claude CLI
235    pub fn claude_cli() -> Self {
236        Self::with_cli_tool(CliToolConfig::claude())
237    }
238
239    /// Create config for Codex CLI
240    pub fn codex_cli() -> Self {
241        Self::with_cli_tool(CliToolConfig::codex())
242    }
243
244    /// Create config for Gemini CLI
245    pub fn gemini_cli() -> Self {
246        Self::with_cli_tool(CliToolConfig::gemini())
247    }
248
249    /// Create config for OpenCode CLI
250    pub fn opencode_cli() -> Self {
251        Self::with_cli_tool(CliToolConfig::opencode())
252    }
253
254    /// Create config for Copilot CLI
255    pub fn copilot_cli() -> Self {
256        Self::with_cli_tool(CliToolConfig::copilot())
257    }
258
259    /// Enable Self-Consistency with default config (5 samples, majority vote)
260    /// Research: Wang et al. 2023 (arXiv:2203.11171) +17.9% GSM8K
261    pub fn with_self_consistency(mut self) -> Self {
262        self.self_consistency = Some(SelfConsistencyConfig::default());
263        self
264    }
265
266    /// Enable Self-Consistency with custom config
267    pub fn with_self_consistency_config(mut self, config: SelfConsistencyConfig) -> Self {
268        self.self_consistency = Some(config);
269        self
270    }
271
272    /// Enable fast Self-Consistency (3 samples, 70% threshold)
273    pub fn with_self_consistency_fast(mut self) -> Self {
274        self.self_consistency = Some(SelfConsistencyConfig::fast());
275        self
276    }
277
278    /// Enable thorough Self-Consistency (10 samples, no early stopping)
279    pub fn with_self_consistency_thorough(mut self) -> Self {
280        self.self_consistency = Some(SelfConsistencyConfig::thorough());
281        self
282    }
283
284    /// Enable paranoid Self-Consistency (15 samples, max accuracy)
285    pub fn with_self_consistency_paranoid(mut self) -> Self {
286        self.self_consistency = Some(SelfConsistencyConfig::paranoid());
287        self
288    }
289
290    /// Enable parallel execution of independent steps
291    /// PERFORMANCE: Can reduce (N-1)/N latency for N independent steps
292    pub fn with_parallel(mut self) -> Self {
293        self.enable_parallel = true;
294        self
295    }
296
297    /// Enable parallel execution with custom concurrency limit
298    pub fn with_parallel_limit(mut self, max_concurrent: usize) -> Self {
299        self.enable_parallel = true;
300        self.max_concurrent_steps = max_concurrent;
301        self
302    }
303}
304
305/// Input for protocol execution
306#[derive(Debug, Clone, Serialize, Deserialize)]
307pub struct ProtocolInput {
308    /// Input fields
309    pub fields: HashMap<String, serde_json::Value>,
310}
311
312impl ProtocolInput {
313    /// Create input with a query
314    pub fn query(query: impl Into<String>) -> Self {
315        let mut fields = HashMap::new();
316        fields.insert("query".to_string(), serde_json::Value::String(query.into()));
317        Self { fields }
318    }
319
320    /// Create input with an argument (for LaserLogic)
321    pub fn argument(argument: impl Into<String>) -> Self {
322        let mut fields = HashMap::new();
323        fields.insert(
324            "argument".to_string(),
325            serde_json::Value::String(argument.into()),
326        );
327        Self { fields }
328    }
329
330    /// Create input with a statement (for BedRock)
331    pub fn statement(statement: impl Into<String>) -> Self {
332        let mut fields = HashMap::new();
333        fields.insert(
334            "statement".to_string(),
335            serde_json::Value::String(statement.into()),
336        );
337        Self { fields }
338    }
339
340    /// Create input with a claim (for ProofGuard)
341    pub fn claim(claim: impl Into<String>) -> Self {
342        let mut fields = HashMap::new();
343        fields.insert("claim".to_string(), serde_json::Value::String(claim.into()));
344        Self { fields }
345    }
346
347    /// Create input with work to critique (for BrutalHonesty)
348    pub fn work(work: impl Into<String>) -> Self {
349        let mut fields = HashMap::new();
350        fields.insert("work".to_string(), serde_json::Value::String(work.into()));
351        Self { fields }
352    }
353
354    /// Add additional field
355    pub fn with_field(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
356        self.fields
357            .insert(key.into(), serde_json::Value::String(value.into()));
358        self
359    }
360
361    /// Get a field as string
362    pub fn get_str(&self, key: &str) -> Option<&str> {
363        self.fields.get(key).and_then(|v| v.as_str())
364    }
365}
366
367/// Output from protocol execution
368#[derive(Debug, Clone, Serialize, Deserialize)]
369pub struct ProtocolOutput {
370    /// Protocol that was executed
371    pub protocol_id: String,
372
373    /// Whether execution succeeded
374    pub success: bool,
375
376    /// Output data
377    pub data: HashMap<String, serde_json::Value>,
378
379    /// Overall confidence score
380    pub confidence: f64,
381
382    /// Step results
383    pub steps: Vec<StepResult>,
384
385    /// Total token usage
386    pub tokens: TokenUsage,
387
388    /// Execution time in milliseconds
389    pub duration_ms: u64,
390
391    /// Error message if failed
392    pub error: Option<String>,
393
394    /// Trace ID (if saved)
395    pub trace_id: Option<String>,
396
397    /// Budget usage summary (if budget was set)
398    #[serde(skip_serializing_if = "Option::is_none")]
399    pub budget_summary: Option<BudgetSummary>,
400}
401
402impl ProtocolOutput {
403    /// Get a field from output data
404    pub fn get(&self, key: &str) -> Option<&serde_json::Value> {
405        self.data.get(key)
406    }
407
408    /// Get perspectives (for GigaThink output)
409    pub fn perspectives(&self) -> Vec<&str> {
410        self.data
411            .get("perspectives")
412            .and_then(|v| v.as_array())
413            .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
414            .unwrap_or_default()
415    }
416
417    /// Get verdict (for various outputs)
418    pub fn verdict(&self) -> Option<&str> {
419        self.data.get("verdict").and_then(|v| v.as_str())
420    }
421}
422
423/// Protocol executor
424pub struct ProtocolExecutor {
425    /// Protocol registry
426    registry: ProtocolRegistry,
427
428    /// Profile registry
429    profiles: ProfileRegistry,
430
431    /// Configuration
432    config: ExecutorConfig,
433
434    /// LLM client
435    llm_client: Option<UnifiedLlmClient>,
436}
437
438impl ProtocolExecutor {
439    /// Create a new executor with default configuration
440    pub fn new() -> Result<Self> {
441        Self::with_config(ExecutorConfig::default())
442    }
443
444    /// Create executor for testing (mock LLM)
445    pub fn mock() -> Result<Self> {
446        Self::with_config(ExecutorConfig::mock())
447    }
448
449    /// Create executor with custom configuration
450    pub fn with_config(config: ExecutorConfig) -> Result<Self> {
451        let mut registry = ProtocolRegistry::new();
452        registry.register_builtins()?;
453
454        let profiles = ProfileRegistry::with_builtins();
455
456        // Only create LLM client if not using mock
457        let llm_client = if config.use_mock {
458            None
459        } else {
460            Some(UnifiedLlmClient::new(config.llm.clone())?)
461        };
462
463        Ok(Self {
464            registry,
465            profiles,
466            config,
467            llm_client,
468        })
469    }
470
471    /// Get the protocol registry
472    pub fn registry(&self) -> &ProtocolRegistry {
473        &self.registry
474    }
475
476    /// Get mutable registry
477    pub fn registry_mut(&mut self) -> &mut ProtocolRegistry {
478        &mut self.registry
479    }
480
481    /// Get profile registry
482    pub fn profiles(&self) -> &ProfileRegistry {
483        &self.profiles
484    }
485
486    /// Execute a protocol
487    pub async fn execute(&self, protocol_id: &str, input: ProtocolInput) -> Result<ProtocolOutput> {
488        let protocol = self
489            .registry
490            .get(protocol_id)
491            .ok_or_else(|| Error::NotFound {
492                resource: format!("protocol:{}", protocol_id),
493            })?
494            .clone();
495
496        // Validate input
497        self.validate_input(&protocol, &input)?;
498
499        let start = Instant::now();
500        let mut trace = ExecutionTrace::new(&protocol.id, &protocol.version)
501            .with_input(serde_json::to_value(&input.fields).unwrap_or_default());
502
503        trace.timing.start();
504        trace.metadata = TraceMetadata {
505            model: Some(self.config.llm.model.clone()),
506            provider: Some(format!("{:?}", self.config.llm.provider)),
507            temperature: Some(self.config.llm.temperature),
508            ..Default::default()
509        };
510
511        // Execute steps - choose sequential or parallel based on config
512        let (step_results, step_outputs, total_tokens, step_traces) = if self.config.enable_parallel
513        {
514            self.execute_steps_parallel(&protocol.steps, &input, &start)
515                .await?
516        } else {
517            self.execute_steps_sequential(&protocol.steps, &input, &start)
518                .await?
519        };
520
521        // Add traces
522        for step_trace in step_traces {
523            trace.add_step(step_trace);
524        }
525
526        let duration_ms = start.elapsed().as_millis() as u64;
527
528        // Calculate overall confidence
529        let confidence = if step_results.is_empty() {
530            0.0
531        } else {
532            step_results.iter().map(|r| r.confidence).sum::<f64>() / step_results.len() as f64
533        };
534
535        // Build output data
536        let mut data = HashMap::new();
537        for (key, output) in &step_outputs {
538            data.insert(
539                key.clone(),
540                serde_json::to_value(output).unwrap_or_default(),
541            );
542        }
543        data.insert("confidence".to_string(), serde_json::json!(confidence));
544
545        // Check if all steps succeeded
546        let success = step_results.iter().all(|r| r.success);
547        let error = if success {
548            None
549        } else {
550            step_results
551                .iter()
552                .find(|r| !r.success)
553                .and_then(|r| r.error.clone())
554        };
555
556        // Complete trace
557        if success {
558            trace.complete(serde_json::to_value(&data).unwrap_or_default(), confidence);
559        } else {
560            trace.fail(&error.clone().unwrap_or_else(|| "Unknown error".to_string()));
561        }
562
563        // Save trace if configured
564        let trace_id = if self.config.save_traces {
565            self.save_trace(&trace)?;
566            Some(trace.id.to_string())
567        } else {
568            None
569        };
570
571        // Build budget summary if budget is constrained
572        let budget_summary = if self.config.budget.is_constrained() {
573            let mut tracker = BudgetTracker::new(self.config.budget.clone());
574            // Record cumulative usage from all steps
575            for result in &step_results {
576                tracker.record_usage(result.tokens.total_tokens, result.tokens.cost_usd);
577            }
578            Some(tracker.summary())
579        } else {
580            None
581        };
582
583        Ok(ProtocolOutput {
584            protocol_id: protocol_id.to_string(),
585            success,
586            data,
587            confidence,
588            steps: step_results,
589            tokens: total_tokens,
590            duration_ms,
591            error,
592            trace_id,
593            budget_summary,
594        })
595    }
596
597    /// Execute steps sequentially (original behavior)
598    async fn execute_steps_sequential(
599        &self,
600        steps: &[ProtocolStep],
601        input: &ProtocolInput,
602        start: &Instant,
603    ) -> Result<(
604        Vec<StepResult>,
605        HashMap<String, StepOutput>,
606        TokenUsage,
607        Vec<StepTrace>,
608    )> {
609        let mut step_results: Vec<StepResult> = Vec::with_capacity(steps.len());
610        let mut step_outputs: HashMap<String, StepOutput> = HashMap::with_capacity(steps.len());
611        let mut total_tokens = TokenUsage::default();
612        let mut traces: Vec<StepTrace> = Vec::with_capacity(steps.len());
613
614        let total_steps = steps.len();
615        for (index, step) in steps.iter().enumerate() {
616            // Check dependencies
617            if !self.dependencies_met(&step.depends_on, &step_results) {
618                continue;
619            }
620
621            // Check branch condition
622            if let Some(condition) = &step.branch {
623                if !self.evaluate_branch_condition(condition, &step_results) {
624                    let mut skipped = StepTrace::new(&step.id, index);
625                    skipped.status = StepStatus::Skipped;
626                    traces.push(skipped);
627                    continue;
628                }
629            }
630
631            // Progress indicator
632            if self.config.show_progress {
633                let elapsed = start.elapsed().as_secs();
634                eprintln!(
635                    "\x1b[2m[{}/{}]\x1b[0m \x1b[36m⏳\x1b[0m Executing step: \x1b[1m{}\x1b[0m ({}s elapsed)...",
636                    index + 1,
637                    total_steps,
638                    step.id,
639                    elapsed
640                );
641            }
642
643            // Execute step
644            let step_result = self.execute_step(step, input, &step_outputs, index).await?;
645
646            // Progress update
647            if self.config.show_progress {
648                let status_icon = if step_result.success { "✓" } else { "✗" };
649                let status_color = if step_result.success {
650                    "\x1b[32m"
651                } else {
652                    "\x1b[31m"
653                };
654                eprintln!(
655                    "\x1b[2m[{}/{}]\x1b[0m {}{}\x1b[0m {} completed ({:.1}% confidence, {}ms)",
656                    index + 1,
657                    total_steps,
658                    status_color,
659                    status_icon,
660                    step.id,
661                    step_result.confidence * 100.0,
662                    step_result.duration_ms
663                );
664            }
665
666            // Record in trace
667            let mut step_trace = StepTrace::new(&step.id, index);
668            step_trace.confidence = step_result.confidence;
669            step_trace.tokens = step_result.tokens.clone();
670            step_trace.duration_ms = step_result.duration_ms;
671
672            if step_result.success {
673                step_trace.complete(step_result.output.clone(), step_result.confidence);
674            } else {
675                step_trace.fail(step_result.error.clone().unwrap_or_default());
676            }
677
678            traces.push(step_trace);
679            total_tokens.add(&step_result.tokens);
680            step_outputs.insert(step.id.clone(), step_result.output.clone());
681            step_results.push(step_result);
682        }
683
684        Ok((step_results, step_outputs, total_tokens, traces))
685    }
686
687    /// Execute steps in parallel when dependencies allow
688    /// PERFORMANCE: Reduces latency by (N-1)/N for N independent steps
689    async fn execute_steps_parallel(
690        &self,
691        steps: &[ProtocolStep],
692        input: &ProtocolInput,
693        start: &Instant,
694    ) -> Result<(
695        Vec<StepResult>,
696        HashMap<String, StepOutput>,
697        TokenUsage,
698        Vec<StepTrace>,
699    )> {
700        let total_steps = steps.len();
701
702        // Shared state protected by async RwLock
703        let completed_ids: Arc<TokioRwLock<HashSet<String>>> =
704            Arc::new(TokioRwLock::new(HashSet::with_capacity(total_steps)));
705        let step_outputs: Arc<TokioRwLock<HashMap<String, StepOutput>>> =
706            Arc::new(TokioRwLock::new(HashMap::with_capacity(total_steps)));
707        let step_results: Arc<TokioRwLock<Vec<(usize, StepResult)>>> =
708            Arc::new(TokioRwLock::new(Vec::with_capacity(total_steps)));
709        let traces: Arc<TokioRwLock<Vec<StepTrace>>> =
710            Arc::new(TokioRwLock::new(Vec::with_capacity(total_steps)));
711
712        // Track pending steps
713        let mut pending: HashSet<usize> = (0..total_steps).collect();
714        let mut completed_count = 0;
715
716        while completed_count < total_steps && !pending.is_empty() {
717            // Find steps that are ready to execute (dependencies satisfied)
718            let completed_ids_guard = completed_ids.read().await;
719            let ready_indices: Vec<usize> = pending
720                .iter()
721                .filter(|&&idx| {
722                    let step = &steps[idx];
723                    step.depends_on
724                        .iter()
725                        .all(|dep| completed_ids_guard.contains(dep))
726                })
727                .copied()
728                .collect();
729            drop(completed_ids_guard);
730
731            if ready_indices.is_empty() && completed_count < total_steps {
732                // No ready steps but not all complete - this shouldn't happen with valid deps
733                break;
734            }
735
736            // Limit concurrency if configured
737            let max_concurrent = if self.config.max_concurrent_steps > 0 {
738                self.config.max_concurrent_steps.min(ready_indices.len())
739            } else {
740                ready_indices.len()
741            };
742
743            // Execute ready steps concurrently
744            let mut futures = FuturesUnordered::new();
745
746            for idx in ready_indices.into_iter().take(max_concurrent) {
747                pending.remove(&idx);
748                let step = steps[idx].clone();
749                let input = input.clone();
750                let step_outputs_clone = Arc::clone(&step_outputs);
751                let completed_ids_clone = Arc::clone(&completed_ids);
752                let step_results_clone = Arc::clone(&step_results);
753                let traces_clone = Arc::clone(&traces);
754                let show_progress = self.config.show_progress;
755                let start_clone = *start;
756
757                // Clone self fields needed for execution
758                let config = self.config.clone();
759                let llm_client = self.llm_client.as_ref().map(|_| {
760                    // For parallel execution, create new clients that share the HTTP pool
761                    UnifiedLlmClient::new(config.llm.clone()).ok()
762                });
763
764                futures.push(async move {
765                    // Progress indicator
766                    if show_progress {
767                        let elapsed = start_clone.elapsed().as_secs();
768                        eprintln!(
769                            "\x1b[2m[{}/{}]\x1b[0m \x1b[36m⏳\x1b[0m Executing step: \x1b[1m{}\x1b[0m ({}s elapsed, parallel)...",
770                            idx + 1,
771                            total_steps,
772                            step.id,
773                            elapsed
774                        );
775                    }
776
777                    // Get current outputs for template rendering
778                    let outputs = step_outputs_clone.read().await.clone();
779
780                    // Execute the step
781                    let step_start = Instant::now();
782                    let (response, tokens) = if config.use_mock {
783                        // Mock execution
784                        let mock_response = format!("Mock response for step: {}", step.id);
785                        (mock_response, TokenUsage::default())
786                    } else if let Some(Some(client)) = llm_client {
787                        // Real LLM call
788                        let prompt = Self::render_template_static(&step.prompt_template, &input, &outputs);
789                        let system = Self::build_system_prompt_static(&step);
790                        let request = super::llm::LlmRequest::new(&prompt)
791                            .with_system(&system)
792                            .with_temperature(config.llm.temperature)
793                            .with_max_tokens(config.llm.max_tokens);
794
795                        match client.complete(request).await {
796                            Ok(resp) => {
797                                let tokens = TokenUsage {
798                                    input_tokens: resp.usage.input_tokens,
799                                    output_tokens: resp.usage.output_tokens,
800                                    total_tokens: resp.usage.total_tokens,
801                                    cost_usd: 0.0,
802                                };
803                                (resp.content, tokens)
804                            }
805                            Err(e) => {
806                                return (idx, step.id.clone(), Err(e));
807                            }
808                        }
809                    } else {
810                        // Fallback mock
811                        let mock_response = format!("Mock response for step: {}", step.id);
812                        (mock_response, TokenUsage::default())
813                    };
814
815                    let duration_ms = step_start.elapsed().as_millis() as u64;
816                    let confidence = Self::extract_confidence_static(&response).unwrap_or(0.7);
817                    let output = StepOutput::Text {
818                        content: response.clone(),
819                    };
820
821                    let result = StepResult {
822                        step_id: step.id.clone(),
823                        success: true,
824                        output: output.clone(),
825                        confidence,
826                        tokens: tokens.clone(),
827                        duration_ms,
828                        error: None,
829                    };
830
831                    // Update shared state
832                    {
833                        let mut outputs = step_outputs_clone.write().await;
834                        outputs.insert(step.id.clone(), output.clone());
835                    }
836                    {
837                        let mut completed = completed_ids_clone.write().await;
838                        completed.insert(step.id.clone());
839                    }
840                    {
841                        let mut results = step_results_clone.write().await;
842                        results.push((idx, result.clone()));
843                    }
844
845                    // Create trace
846                    let mut step_trace = StepTrace::new(&step.id, idx);
847                    step_trace.confidence = confidence;
848                    step_trace.tokens = tokens;
849                    step_trace.duration_ms = duration_ms;
850                    step_trace.complete(output, confidence);
851
852                    {
853                        let mut traces = traces_clone.write().await;
854                        traces.push(step_trace);
855                    }
856
857                    // Progress update
858                    if show_progress {
859                        eprintln!(
860                            "\x1b[2m[{}/{}]\x1b[0m \x1b[32m✓\x1b[0m {} completed ({:.1}% confidence, {}ms, parallel)",
861                            idx + 1,
862                            total_steps,
863                            step.id,
864                            confidence * 100.0,
865                            duration_ms
866                        );
867                    }
868
869                    (idx, step.id.clone(), Ok(result))
870                });
871            }
872
873            // Wait for this batch to complete
874            while let Some((_idx, step_id, result)) = futures.next().await {
875                match result {
876                    Ok(_) => {
877                        completed_count += 1;
878                    }
879                    Err(e) => {
880                        return Err(Error::Validation(format!(
881                            "Step '{}': Parallel step execution failed: {}",
882                            step_id, e
883                        )));
884                    }
885                }
886            }
887        }
888
889        // Collect results
890        let step_outputs = Arc::try_unwrap(step_outputs)
891            .map_err(|_| {
892                Error::Config(
893                    "Failed to unwrap step_outputs: Arc still has multiple references".to_string(),
894                )
895            })?
896            .into_inner();
897        let mut step_results_vec = Arc::try_unwrap(step_results)
898            .map_err(|_| {
899                Error::Config(
900                    "Failed to unwrap step_results: Arc still has multiple references".to_string(),
901                )
902            })?
903            .into_inner();
904        let traces = Arc::try_unwrap(traces)
905            .map_err(|_| {
906                Error::Config(
907                    "Failed to unwrap traces: Arc still has multiple references".to_string(),
908                )
909            })?
910            .into_inner();
911
912        // Sort results by index to maintain order
913        step_results_vec.sort_by_key(|(idx, _)| *idx);
914        let step_results: Vec<StepResult> = step_results_vec.into_iter().map(|(_, r)| r).collect();
915
916        // Calculate total tokens
917        let mut total_tokens = TokenUsage::default();
918        for result in &step_results {
919            total_tokens.add(&result.tokens);
920        }
921
922        Ok((step_results, step_outputs, total_tokens, traces))
923    }
924
925    /// Static version of render_template for use in parallel execution
926    fn render_template_static(
927        template: &str,
928        input: &ProtocolInput,
929        previous_outputs: &HashMap<String, StepOutput>,
930    ) -> String {
931        let mut result = template.to_string();
932
933        // Replace input placeholders
934        for (key, value) in &input.fields {
935            let placeholder = format!("{{{{{}}}}}", key);
936            let value_str = match value {
937                serde_json::Value::String(s) => s.clone(),
938                other => other.to_string(),
939            };
940            result = result.replace(&placeholder, &value_str);
941        }
942
943        // Replace previous output placeholders
944        for (key, output) in previous_outputs {
945            let placeholder = format!("{{{{{}}}}}", key);
946            let value_str = match output {
947                StepOutput::Text { content } => content.clone(),
948                StepOutput::List { items } => items
949                    .iter()
950                    .map(|i| i.content.clone())
951                    .collect::<Vec<_>>()
952                    .join("\n"),
953                other => serde_json::to_string(other).unwrap_or_default(),
954            };
955            result = result.replace(&placeholder, &value_str);
956        }
957
958        // Clean up using static regexes
959        result = CONDITIONAL_BLOCK_RE.replace_all(&result, "").to_string();
960        result = UNFILLED_PLACEHOLDER_RE.replace_all(&result, "").to_string();
961
962        result
963    }
964
965    /// Static version of build_system_prompt for use in parallel execution
966    fn build_system_prompt_static(step: &ProtocolStep) -> String {
967        let base = "You are a structured reasoning assistant following the ReasonKit protocol.";
968        let action_guidance = match &step.action {
969            StepAction::Analyze { .. } => {
970                "Analyze the given input thoroughly. Break down components and relationships."
971            }
972            StepAction::Synthesize { .. } => {
973                "Synthesize information from previous steps into a coherent whole."
974            }
975            StepAction::Validate { .. } => "Validate claims and check for logical consistency.",
976            StepAction::Generate { .. } => "Generate new ideas or content based on the context.",
977            StepAction::Critique { .. } => {
978                "Critically evaluate the reasoning and identify weaknesses."
979            }
980            StepAction::Decide { .. } => {
981                "Make a decision based on the available evidence and reasoning."
982            }
983            StepAction::CrossReference { .. } => {
984                "Cross-reference information from multiple sources to verify claims."
985            }
986        };
987
988        format!(
989            "{}\n\n{}\n\nProvide a confidence score (0.0-1.0) for your response.",
990            base, action_guidance
991        )
992    }
993
994    /// Static version of extract_confidence for use in parallel execution
995    fn extract_confidence_static(content: &str) -> Option<f64> {
996        static CONFIDENCE_RE: Lazy<Regex> = Lazy::new(|| {
997            Regex::new(r"(?i)confidence[:\s]+(\d+\.?\d*)").expect("Invalid regex pattern")
998        });
999
1000        if let Some(caps) = CONFIDENCE_RE.captures(content) {
1001            if let Some(m) = caps.get(1) {
1002                return m.as_str().parse::<f64>().ok().map(|v| v.min(1.0));
1003            }
1004        }
1005        None
1006    }
1007
1008    /// Execute a reasoning profile (chain of protocols)
1009    pub async fn execute_profile(
1010        &self,
1011        profile_id: &str,
1012        input: ProtocolInput,
1013    ) -> Result<ProtocolOutput> {
1014        let profile = self
1015            .profiles
1016            .get(profile_id)
1017            .ok_or_else(|| Error::NotFound {
1018                resource: format!("profile:{}", profile_id),
1019            })?
1020            .clone();
1021
1022        let start = Instant::now();
1023        let chain_len = profile.chain.len();
1024        // PERFORMANCE: Pre-allocate based on expected chain size
1025        let mut all_step_results: Vec<StepResult> = Vec::with_capacity(chain_len * 3); // ~3 steps per protocol
1026        let mut all_outputs: HashMap<String, serde_json::Value> = HashMap::with_capacity(chain_len);
1027        let mut total_tokens = TokenUsage::default();
1028        let current_input = input.clone();
1029
1030        // Track outputs by step ID for input mapping
1031        let mut step_outputs: HashMap<String, serde_json::Value> =
1032            HashMap::with_capacity(chain_len + 1);
1033        step_outputs.insert(
1034            "input".to_string(),
1035            serde_json::to_value(&input.fields).unwrap_or_default(),
1036        );
1037
1038        for chain_step in &profile.chain {
1039            // Check condition
1040            if let Some(condition) = &chain_step.condition {
1041                if !self.evaluate_chain_condition(condition, &all_step_results) {
1042                    continue;
1043                }
1044            }
1045
1046            // Build input from mapping
1047            let mut mapped_input = ProtocolInput {
1048                fields: HashMap::with_capacity(chain_step.input_mapping.len()),
1049            };
1050            for (target_field, source_expr) in &chain_step.input_mapping {
1051                if let Some(value) = self.resolve_mapping(source_expr, &step_outputs, &input) {
1052                    mapped_input.fields.insert(target_field.clone(), value);
1053                }
1054            }
1055
1056            // Fall back to original input if no mapping
1057            if mapped_input.fields.is_empty() {
1058                mapped_input = current_input.clone();
1059            }
1060
1061            // Execute protocol
1062            let result = self.execute(&chain_step.protocol_id, mapped_input).await?;
1063
1064            // Store outputs for next step
1065            step_outputs.insert(
1066                format!("steps.{}", chain_step.protocol_id),
1067                serde_json::to_value(&result.data).unwrap_or_default(),
1068            );
1069
1070            total_tokens.add(&result.tokens);
1071            all_step_results.extend(result.steps);
1072            all_outputs.extend(result.data);
1073        }
1074
1075        let duration_ms = start.elapsed().as_millis() as u64;
1076
1077        // Calculate overall confidence
1078        let confidence = if all_step_results.is_empty() {
1079            0.0
1080        } else {
1081            all_step_results.iter().map(|r| r.confidence).sum::<f64>()
1082                / all_step_results.len() as f64
1083        };
1084
1085        let success =
1086            all_step_results.iter().all(|r| r.success) && confidence >= profile.min_confidence;
1087
1088        // Build budget summary if budget is constrained
1089        let budget_summary = if self.config.budget.is_constrained() {
1090            let mut tracker = BudgetTracker::new(self.config.budget.clone());
1091            for result in &all_step_results {
1092                tracker.record_usage(result.tokens.total_tokens, result.tokens.cost_usd);
1093            }
1094            Some(tracker.summary())
1095        } else {
1096            None
1097        };
1098
1099        Ok(ProtocolOutput {
1100            protocol_id: profile_id.to_string(),
1101            success,
1102            data: all_outputs,
1103            confidence,
1104            steps: all_step_results,
1105            tokens: total_tokens,
1106            duration_ms,
1107            error: None,
1108            trace_id: None,
1109            budget_summary,
1110        })
1111    }
1112
1113    /// Execute a profile with Self-Consistency voting
1114    ///
1115    /// This runs multiple independent reasoning paths and aggregates them via voting.
1116    /// Based on Wang et al. 2023 (arXiv:2203.11171) which showed +17.9% on GSM8K.
1117    ///
1118    /// # Arguments
1119    /// * `profile_id` - Profile to execute (e.g., "balanced", "deep")
1120    /// * `input` - Input data for the protocol
1121    /// * `sc_config` - Self-Consistency configuration
1122    ///
1123    /// # Returns
1124    /// ProtocolOutput with voted answer and ConsistencyResult details
1125    pub async fn execute_with_self_consistency(
1126        &self,
1127        profile_id: &str,
1128        input: ProtocolInput,
1129        sc_config: &SelfConsistencyConfig,
1130    ) -> Result<(ProtocolOutput, ConsistencyResult)> {
1131        let engine = SelfConsistencyEngine::new(sc_config.clone());
1132        let start = Instant::now();
1133        // PERFORMANCE: Pre-allocate based on configured sample count
1134        let mut all_results: Vec<StepResult> = Vec::with_capacity(sc_config.num_samples);
1135        let mut all_outputs: Vec<ProtocolOutput> = Vec::with_capacity(sc_config.num_samples);
1136        let mut total_tokens = TokenUsage::default();
1137
1138        // Run multiple samples
1139        for sample_idx in 0..sc_config.num_samples {
1140            // Execute profile for this sample
1141            let output = self.execute_profile(profile_id, input.clone()).await?;
1142
1143            // Create step result for voting
1144            let step_result = StepResult::success(
1145                format!("sample_{}", sample_idx),
1146                StepOutput::Text {
1147                    content: self.extract_voting_text(&output),
1148                },
1149                output.confidence,
1150            );
1151
1152            all_results.push(step_result);
1153            total_tokens.add(&output.tokens);
1154            all_outputs.push(output);
1155
1156            // Check for early stopping
1157            if sc_config.early_stopping && engine.should_early_stop(&all_results) {
1158                break;
1159            }
1160        }
1161
1162        // Vote using Self-Consistency engine
1163        let consistency_result = engine.vote(all_results.clone());
1164
1165        // Build the final output using the voted answer
1166        let duration_ms = start.elapsed().as_millis() as u64;
1167
1168        // Use the output from the sample that matches the voted answer, or first sample
1169        let best_output = all_outputs
1170            .iter()
1171            .find(|o| {
1172                self.extract_voting_text(o)
1173                    .contains(&consistency_result.answer)
1174            })
1175            .cloned()
1176            .or_else(|| all_outputs.first().cloned())
1177            .ok_or_else(|| Error::Config("No outputs generated during self-consistency".into()))?;
1178
1179        let mut final_output = best_output;
1180        final_output.confidence = consistency_result.confidence;
1181        final_output.tokens = total_tokens;
1182        final_output.duration_ms = duration_ms;
1183
1184        // Add self-consistency metadata to output data
1185        final_output.data.insert(
1186            "self_consistency".to_string(),
1187            serde_json::json!({
1188                "voted_answer": consistency_result.answer,
1189                "agreement_ratio": consistency_result.agreement_ratio,
1190                "vote_count": consistency_result.vote_count,
1191                "total_samples": consistency_result.total_samples,
1192                "early_stopped": consistency_result.early_stopped,
1193            }),
1194        );
1195
1196        Ok((final_output, consistency_result))
1197    }
1198
1199    /// Extract text suitable for voting from protocol output
1200    fn extract_voting_text(&self, output: &ProtocolOutput) -> String {
1201        // Try to get conclusion from data
1202        if let Some(conclusion) = output.data.get("conclusion") {
1203            if let Some(s) = conclusion.as_str() {
1204                return s.to_string();
1205            }
1206        }
1207
1208        // Try to get from last step
1209        if let Some(last_step) = output.steps.last() {
1210            if let Some(text) = last_step.as_text() {
1211                return text.to_string();
1212            }
1213        }
1214
1215        // Concatenate all step outputs
1216        output
1217            .steps
1218            .iter()
1219            .filter_map(|s| s.as_text())
1220            .collect::<Vec<_>>()
1221            .join("\n\n")
1222    }
1223
1224    /// Validate input against protocol requirements
1225    fn validate_input(&self, protocol: &Protocol, input: &ProtocolInput) -> Result<()> {
1226        for required in &protocol.input.required {
1227            if !input.fields.contains_key(required) {
1228                return Err(Error::Validation(format!(
1229                    "Missing required input field: {}",
1230                    required
1231                )));
1232            }
1233        }
1234        Ok(())
1235    }
1236
1237    /// Check if step dependencies are met
1238    fn dependencies_met(&self, deps: &[String], results: &[StepResult]) -> bool {
1239        deps.iter()
1240            .all(|dep| results.iter().any(|r| r.step_id == *dep && r.success))
1241    }
1242
1243    /// Evaluate a branch condition
1244    fn evaluate_branch_condition(
1245        &self,
1246        condition: &BranchCondition,
1247        results: &[StepResult],
1248    ) -> bool {
1249        match condition {
1250            BranchCondition::Always => true,
1251            BranchCondition::ConfidenceBelow { threshold } => results
1252                .last()
1253                .map(|r| r.confidence < *threshold)
1254                .unwrap_or(true),
1255            BranchCondition::ConfidenceAbove { threshold } => results
1256                .last()
1257                .map(|r| r.confidence >= *threshold)
1258                .unwrap_or(false),
1259            BranchCondition::OutputEquals { field: _, value } => results
1260                .last()
1261                .map(|r| {
1262                    if let Some(text) = r.as_text() {
1263                        text.contains(value)
1264                    } else {
1265                        false
1266                    }
1267                })
1268                .unwrap_or(false),
1269        }
1270    }
1271
1272    /// Evaluate a chain condition
1273    fn evaluate_chain_condition(&self, condition: &ChainCondition, results: &[StepResult]) -> bool {
1274        match condition {
1275            ChainCondition::Always => true,
1276            ChainCondition::ConfidenceBelow { threshold } => results
1277                .last()
1278                .map(|r| r.confidence < *threshold)
1279                .unwrap_or(true),
1280            ChainCondition::ConfidenceAbove { threshold } => results
1281                .last()
1282                .map(|r| r.confidence >= *threshold)
1283                .unwrap_or(false),
1284            ChainCondition::OutputExists { step_id, field: _ } => results
1285                .iter()
1286                .any(|r| r.step_id == *step_id && r.as_text().is_some()),
1287        }
1288    }
1289
1290    /// Resolve a mapping expression to a value
1291    fn resolve_mapping(
1292        &self,
1293        expr: &str,
1294        step_outputs: &HashMap<String, serde_json::Value>,
1295        input: &ProtocolInput,
1296    ) -> Option<serde_json::Value> {
1297        // Handle input.* expressions
1298        if let Some(field) = expr.strip_prefix("input.") {
1299            return input.fields.get(field).cloned();
1300        }
1301
1302        // Handle steps.*.* expressions
1303        if let Some(rest) = expr.strip_prefix("steps.") {
1304            let key = format!("steps.{}", rest.split('.').next().unwrap_or(""));
1305            if let Some(step_data) = step_outputs.get(&key) {
1306                // Try to get nested field
1307                let field = rest.split('.').skip(1).collect::<Vec<_>>().join(".");
1308                if !field.is_empty() {
1309                    return step_data.get(&field).cloned();
1310                }
1311                return Some(step_data.clone());
1312            }
1313        }
1314
1315        None
1316    }
1317
1318    /// Execute a single step
1319    async fn execute_step(
1320        &self,
1321        step: &ProtocolStep,
1322        input: &ProtocolInput,
1323        previous_outputs: &HashMap<String, StepOutput>,
1324        _index: usize,
1325    ) -> Result<StepResult> {
1326        let start = Instant::now();
1327
1328        // Build prompt from template
1329        let prompt = self.render_template(&step.prompt_template, input, previous_outputs);
1330
1331        // Build system prompt based on step action
1332        let system = self.build_system_prompt(step);
1333
1334        // Call LLM, CLI tool, or mock
1335        let (content, tokens) = if self.config.use_mock {
1336            self.mock_llm_call(&prompt, step).await
1337        } else if self.config.cli_tool.is_some() {
1338            self.cli_tool_call(&prompt, &system).await?
1339        } else {
1340            self.real_llm_call(&prompt, &system).await?
1341        };
1342
1343        let duration_ms = start.elapsed().as_millis() as u64;
1344
1345        // Parse response into structured output
1346        let (output, confidence) = self.parse_step_output(&content, step);
1347
1348        Ok(StepResult::success(&step.id, output, confidence)
1349            .with_duration(duration_ms)
1350            .with_tokens(tokens))
1351    }
1352
1353    /// Build system prompt for a step
1354    fn build_system_prompt(&self, step: &ProtocolStep) -> String {
1355        let base = "You are a structured reasoning assistant. Follow the instructions precisely and provide clear, well-organized responses.";
1356
1357        let action_guidance = match &step.action {
1358            StepAction::Generate {
1359                min_count,
1360                max_count,
1361            } => {
1362                format!(
1363                    "Generate {}-{} distinct items. Number each item clearly.",
1364                    min_count, max_count
1365                )
1366            }
1367            StepAction::Analyze { criteria } => {
1368                format!(
1369                    "Analyze based on these criteria: {}. Be thorough and specific.",
1370                    criteria.join(", ")
1371                )
1372            }
1373            StepAction::Synthesize { .. } => {
1374                "Synthesize the information into a coherent summary. Identify patterns and themes."
1375                    .to_string()
1376            }
1377            StepAction::Validate { rules } => {
1378                format!(
1379                    "Validate against these rules: {}. Be explicit about pass/fail for each.",
1380                    rules.join(", ")
1381                )
1382            }
1383            StepAction::Critique { severity } => {
1384                format!(
1385                    "Provide {:?}-level critique. Be honest and specific about weaknesses.",
1386                    severity
1387                )
1388            }
1389            StepAction::Decide { method } => {
1390                format!(
1391                    "Make a decision using {:?} method. Explain your reasoning clearly.",
1392                    method
1393                )
1394            }
1395            StepAction::CrossReference { min_sources } => {
1396                format!(
1397                    "Cross-reference with at least {} sources. Cite each source.",
1398                    min_sources
1399                )
1400            }
1401        };
1402
1403        format!(
1404            "{}\n\n{}\n\nProvide a confidence score (0.0-1.0) for your response.",
1405            base, action_guidance
1406        )
1407    }
1408
1409    /// Render template with input values
1410    ///
1411    /// PERFORMANCE: Uses static and cached regex patterns to avoid
1412    /// recompilation overhead (20-40% faster template rendering)
1413    fn render_template(
1414        &self,
1415        template: &str,
1416        input: &ProtocolInput,
1417        previous_outputs: &HashMap<String, StepOutput>,
1418    ) -> String {
1419        let mut result = template.to_string();
1420
1421        // Replace input placeholders
1422        for (key, value) in &input.fields {
1423            let placeholder = format!("{{{{{}}}}}", key);
1424            let value_str = match value {
1425                serde_json::Value::String(s) => s.clone(),
1426                other => other.to_string(),
1427            };
1428            result = result.replace(&placeholder, &value_str);
1429        }
1430
1431        // Replace previous output placeholders - handle nested field access
1432        for (key, output) in previous_outputs {
1433            // First, handle nested field access like {{step_id.field}}
1434            // Convert output to JSON for field extraction
1435            if let Ok(json_value) = serde_json::to_value(output) {
1436                // Use cached regex pattern instead of compiling each time
1437                let nested_re = get_nested_regex(key);
1438                result = nested_re
1439                    .replace_all(&result, |caps: &regex::Captures| {
1440                        let field = &caps[1];
1441                        // Try to extract the field from the JSON
1442                        if let Some(field_value) = json_value.get(field) {
1443                            match field_value {
1444                                serde_json::Value::String(s) => s.clone(),
1445                                serde_json::Value::Array(arr) => arr
1446                                    .iter()
1447                                    .filter_map(|v| v.as_str())
1448                                    .collect::<Vec<_>>()
1449                                    .join("\n"),
1450                                other => other.to_string().trim_matches('"').to_string(),
1451                            }
1452                        } else {
1453                            // Field not found - return a helpful placeholder
1454                            format!("[{}.{}: not found]", key, field)
1455                        }
1456                    })
1457                    .to_string();
1458            }
1459
1460            // Then handle full step output replacement ({{step_id}})
1461            let placeholder = format!("{{{{{}}}}}", key);
1462            let value_str = match output {
1463                StepOutput::Text { content } => content.clone(),
1464                StepOutput::List { items } => items
1465                    .iter()
1466                    .map(|i| i.content.clone())
1467                    .collect::<Vec<_>>()
1468                    .join("\n"),
1469                other => serde_json::to_string(other).unwrap_or_default(),
1470            };
1471            result = result.replace(&placeholder, &value_str);
1472        }
1473
1474        // Clean up conditional blocks {{#if ...}}...{{/if}}
1475        // PERFORMANCE: Use static compiled regex instead of compiling each time
1476        result = CONDITIONAL_BLOCK_RE.replace_all(&result, "").to_string();
1477
1478        // Clean up any remaining unfilled placeholders {{...}}
1479        // PERFORMANCE: Use static compiled regex instead of compiling each time
1480        if UNFILLED_PLACEHOLDER_RE.is_match(&result) {
1481            tracing::warn!(
1482                "Template has unfilled placeholders: {:?}",
1483                UNFILLED_PLACEHOLDER_RE
1484                    .find_iter(&result)
1485                    .map(|m| m.as_str())
1486                    .collect::<Vec<_>>()
1487            );
1488        }
1489        result = UNFILLED_PLACEHOLDER_RE.replace_all(&result, "").to_string();
1490
1491        result
1492    }
1493
1494    /// Real LLM call
1495    async fn real_llm_call(&self, prompt: &str, system: &str) -> Result<(String, TokenUsage)> {
1496        let client = self
1497            .llm_client
1498            .as_ref()
1499            .ok_or_else(|| Error::Config("LLM client not initialized".to_string()))?;
1500
1501        let request = LlmRequest::new(prompt)
1502            .with_system(system)
1503            .with_temperature(self.config.llm.temperature)
1504            .with_max_tokens(self.config.llm.max_tokens);
1505
1506        let response = client.complete(request).await?;
1507
1508        let tokens = TokenUsage::new(
1509            response.usage.input_tokens,
1510            response.usage.output_tokens,
1511            response.usage.cost_usd(&self.config.llm.model),
1512        );
1513
1514        Ok((response.content, tokens))
1515    }
1516
1517    /// Mock LLM call (for testing)
1518    async fn mock_llm_call(&self, _prompt: &str, step: &ProtocolStep) -> (String, TokenUsage) {
1519        let content = match &step.action {
1520            StepAction::Generate { min_count, .. } => {
1521                let items: Vec<String> = (1..=*min_count)
1522                    .map(|i| format!("{}. Generated perspective {}", i, i))
1523                    .collect();
1524                format!("{}\n\nConfidence: 0.85", items.join("\n"))
1525            }
1526            StepAction::Analyze { .. } => {
1527                "Analysis:\n- Key finding 1\n- Key finding 2\n- Key finding 3\n\nConfidence: 0.82".to_string()
1528            }
1529            StepAction::Synthesize { .. } => {
1530                "Synthesis: The main themes identified are X, Y, and Z. Key insight: ...\n\nConfidence: 0.88".to_string()
1531            }
1532            StepAction::Validate { .. } => {
1533                "Validation result: PASS\n- Rule 1: Pass\n- Rule 2: Pass\n\nConfidence: 0.90".to_string()
1534            }
1535            StepAction::Critique { .. } => {
1536                "Critique:\n1. Strength: Good structure\n2. Weakness: Needs more evidence\n3. Suggestion: Add sources\n\nConfidence: 0.78".to_string()
1537            }
1538            StepAction::Decide { .. } => {
1539                "Decision: Option A recommended\nRationale: Best balance of factors\n\nConfidence: 0.85".to_string()
1540            }
1541            StepAction::CrossReference { .. } => {
1542                "Sources verified:\n1. Source A: Confirms\n2. Source B: Confirms\n3. Source C: Partially confirms\n\nConfidence: 0.87".to_string()
1543            }
1544        };
1545
1546        let tokens = TokenUsage::new(100, 150, 0.001);
1547        (content, tokens)
1548    }
1549
1550    /// CLI tool call (shells out to claude/codex/gemini/etc.)
1551    async fn cli_tool_call(&self, prompt: &str, system: &str) -> Result<(String, TokenUsage)> {
1552        let cli_config = self
1553            .config
1554            .cli_tool
1555            .as_ref()
1556            .ok_or_else(|| Error::Config("CLI tool not configured".to_string()))?;
1557
1558        // Build the full prompt with system message
1559        let full_prompt = if system.is_empty() {
1560            prompt.to_string()
1561        } else {
1562            format!("{}\n\n{}", system, prompt)
1563        };
1564
1565        // Build command with arguments
1566        let mut cmd = Command::new(&cli_config.command);
1567
1568        // Add pre-args
1569        for arg in &cli_config.pre_args {
1570            cmd.arg(arg);
1571        }
1572
1573        // Add the prompt
1574        cmd.arg(&full_prompt);
1575
1576        // Add post-args
1577        for arg in &cli_config.post_args {
1578            cmd.arg(arg);
1579        }
1580
1581        if self.config.verbose {
1582            tracing::info!(
1583                "CLI tool call: {} {} \"{}\"",
1584                cli_config.command,
1585                cli_config.pre_args.join(" "),
1586                if full_prompt.len() > 50 {
1587                    format!("{}...", &full_prompt[..50])
1588                } else {
1589                    full_prompt.clone()
1590                }
1591            );
1592        }
1593
1594        // Execute the command
1595        let output = cmd.output().map_err(|e| {
1596            Error::Network(format!(
1597                "Failed to execute CLI tool '{}': {}",
1598                cli_config.command, e
1599            ))
1600        })?;
1601
1602        if !output.status.success() {
1603            let stderr = String::from_utf8_lossy(&output.stderr);
1604            return Err(Error::Network(format!(
1605                "CLI tool '{}' failed with status {}: {}",
1606                cli_config.command, output.status, stderr
1607            )));
1608        }
1609
1610        let content = String::from_utf8_lossy(&output.stdout).to_string();
1611
1612        // Estimate tokens (rough approximation: ~4 chars per token for English)
1613        let input_tokens = (full_prompt.len() / 4) as u32;
1614        let output_tokens = (content.len() / 4) as u32;
1615
1616        // Estimate cost based on CLI tool type
1617        // Prices as of Dec 2025 (per 1M tokens):
1618        // - Claude Sonnet 4: $3/1M input, $15/1M output
1619        // - Gemini 2.0 Flash: $0.10/1M input, $0.40/1M output
1620        // - GPT-4: $30/1M input, $60/1M output
1621        let (input_price, output_price) = match cli_config.command.as_str() {
1622            "claude" => (3.0, 15.0),              // Claude Sonnet 4 default
1623            "gemini" => (0.10, 0.40),             // Gemini 2.0 Flash
1624            "codex" | "opencode" => (30.0, 60.0), // GPT-4 level
1625            _ => (5.0, 15.0),                     // Conservative default
1626        };
1627
1628        // Calculate cost in USD (prices are per 1M tokens)
1629        let cost_usd = (input_tokens as f64 * input_price / 1_000_000.0)
1630            + (output_tokens as f64 * output_price / 1_000_000.0);
1631
1632        let tokens = TokenUsage::new(input_tokens, output_tokens, cost_usd);
1633
1634        // Log estimated cost for transparency
1635        if self.config.verbose {
1636            tracing::info!(
1637                "CLI tool estimated: {} input + {} output tokens ≈ ${:.6}",
1638                input_tokens,
1639                output_tokens,
1640                cost_usd
1641            );
1642        }
1643
1644        Ok((content, tokens))
1645    }
1646
1647    /// Parse step output from LLM response
1648    fn parse_step_output(&self, content: &str, step: &ProtocolStep) -> (StepOutput, f64) {
1649        // Extract confidence from response
1650        let confidence = self.extract_confidence(content).unwrap_or(0.75);
1651
1652        let output = match &step.action {
1653            StepAction::Generate { .. } => {
1654                let items = self.extract_list_items(content);
1655                StepOutput::List { items }
1656            }
1657            StepAction::Analyze { .. } | StepAction::Synthesize { .. } => StepOutput::Text {
1658                content: content.to_string(),
1659            },
1660            StepAction::Validate { .. } => {
1661                let passed = content.to_lowercase().contains("pass");
1662                StepOutput::Boolean {
1663                    value: passed,
1664                    reason: Some(content.to_string()),
1665                }
1666            }
1667            StepAction::Critique { .. } => {
1668                let items = self.extract_list_items(content);
1669                StepOutput::List { items }
1670            }
1671            StepAction::Decide { .. } => {
1672                let mut data = HashMap::new();
1673                data.insert("decision".to_string(), serde_json::json!(content));
1674                StepOutput::Structured { data }
1675            }
1676            StepAction::CrossReference { .. } => {
1677                let items = self.extract_list_items(content);
1678                StepOutput::List { items }
1679            }
1680        };
1681
1682        (output, confidence)
1683    }
1684
1685    /// Extract confidence score from response text
1686    fn extract_confidence(&self, content: &str) -> Option<f64> {
1687        // Look for "Confidence: X.XX" pattern
1688        let re = regex::Regex::new(r"[Cc]onfidence:?\s*(\d+\.?\d*)").ok()?;
1689        if let Some(caps) = re.captures(content) {
1690            if let Some(m) = caps.get(1) {
1691                return m.as_str().parse::<f64>().ok().map(|v| v.min(1.0));
1692            }
1693        }
1694        None
1695    }
1696
1697    /// Extract list items from response text
1698    /// Handles multiple formats: numbered (1. 2. 10.), bullets (- *), bold headers (**item**)
1699    fn extract_list_items(&self, content: &str) -> Vec<ListItem> {
1700        // Compile regexes once (static lifetime)
1701        static NUMBERED_RE: Lazy<regex::Regex> =
1702            Lazy::new(|| regex::Regex::new(r"^\d+[\.\)]\s*(.+)$").expect("Invalid regex pattern"));
1703        static BOLD_RE: Lazy<regex::Regex> = Lazy::new(|| {
1704            regex::Regex::new(r"^\*\*([^*]+)\*\*[:\s-]*(.*)$").expect("Invalid regex pattern")
1705        });
1706
1707        let mut items = Vec::new();
1708        let mut current_item: Option<String> = None;
1709
1710        for line in content.lines() {
1711            let trimmed = line.trim();
1712
1713            // Skip empty lines and confidence markers
1714            if trimmed.is_empty() || trimmed.to_lowercase().starts_with("confidence") {
1715                // If we have a current item being built, save it
1716                if let Some(item) = current_item.take() {
1717                    if !item.is_empty() {
1718                        items.push(ListItem::new(item));
1719                    }
1720                }
1721                continue;
1722            }
1723
1724            // Try to match numbered items (1. 2. ... 10. 11. etc)
1725            if let Some(caps) = NUMBERED_RE.captures(trimmed) {
1726                // Save previous item if exists
1727                if let Some(item) = current_item.take() {
1728                    if !item.is_empty() {
1729                        items.push(ListItem::new(item));
1730                    }
1731                }
1732                current_item = Some(caps[1].to_string());
1733                continue;
1734            }
1735
1736            // Match bullet points (- * •)
1737            if let Some(text) = trimmed
1738                .strip_prefix('-')
1739                .or(trimmed.strip_prefix('*'))
1740                .or(trimmed.strip_prefix('•'))
1741            {
1742                let text = text.trim();
1743                if !text.is_empty() {
1744                    // Save previous item if exists
1745                    if let Some(item) = current_item.take() {
1746                        if !item.is_empty() {
1747                            items.push(ListItem::new(item));
1748                        }
1749                    }
1750                    current_item = Some(text.to_string());
1751                    continue;
1752                }
1753            }
1754
1755            // Match bold headers: **Title**: Description or **Title** - Description
1756            if let Some(caps) = BOLD_RE.captures(trimmed) {
1757                // Save previous item if exists
1758                if let Some(item) = current_item.take() {
1759                    if !item.is_empty() {
1760                        items.push(ListItem::new(item));
1761                    }
1762                }
1763                let title = caps[1].trim();
1764                let desc = caps[2].trim();
1765                if desc.is_empty() {
1766                    current_item = Some(title.to_string());
1767                } else {
1768                    current_item = Some(format!("{}: {}", title, desc));
1769                }
1770                continue;
1771            }
1772
1773            // If line starts with indentation and we have a current item, append to it
1774            if line.starts_with("  ") || line.starts_with("\t") {
1775                if let Some(ref mut item) = current_item {
1776                    item.push(' ');
1777                    item.push_str(trimmed);
1778                    continue;
1779                }
1780            }
1781
1782            // Otherwise, if it's a continuation of the previous item or standalone text
1783            if let Some(ref mut item) = current_item {
1784                // Continuation of multi-line item
1785                item.push(' ');
1786                item.push_str(trimmed);
1787            }
1788            // Skip standalone lines that aren't list items
1789        }
1790
1791        // Don't forget the last item
1792        if let Some(item) = current_item {
1793            if !item.is_empty() {
1794                items.push(ListItem::new(item));
1795            }
1796        }
1797
1798        items
1799    }
1800
1801    /// Save trace to file
1802    fn save_trace(&self, trace: &ExecutionTrace) -> Result<()> {
1803        let dir = self
1804            .config
1805            .trace_dir
1806            .as_ref()
1807            .ok_or_else(|| Error::Config("Trace directory not configured".to_string()))?;
1808
1809        std::fs::create_dir_all(dir).map_err(|e| Error::IoMessage {
1810            message: format!("Failed to create trace directory: {}", e),
1811        })?;
1812
1813        let filename = format!("{}_{}.json", trace.protocol_id, trace.id);
1814        let path = dir.join(filename);
1815
1816        let json = trace.to_json().map_err(|e| Error::Parse {
1817            message: format!("Failed to serialize trace: {}", e),
1818        })?;
1819
1820        std::fs::write(&path, json).map_err(|e| Error::IoMessage {
1821            message: format!("Failed to write trace: {}", e),
1822        })?;
1823
1824        Ok(())
1825    }
1826
1827    /// List available protocols
1828    pub fn list_protocols(&self) -> Vec<&str> {
1829        self.registry.list_ids()
1830    }
1831
1832    /// List available profiles
1833    pub fn list_profiles(&self) -> Vec<&str> {
1834        self.profiles.list_ids()
1835    }
1836
1837    /// Get protocol info
1838    pub fn get_protocol(&self, id: &str) -> Option<&Protocol> {
1839        self.registry.get(id)
1840    }
1841
1842    /// Get profile info
1843    pub fn get_profile(&self, id: &str) -> Option<&ReasoningProfile> {
1844        self.profiles.get(id)
1845    }
1846}
1847
1848impl Default for ProtocolExecutor {
1849    fn default() -> Self {
1850        Self::new().expect("Failed to create default executor")
1851    }
1852}
1853
1854// ═══════════════════════════════════════════════════════════════════════════════
1855// COMPREHENSIVE TEST SUITE
1856// ═══════════════════════════════════════════════════════════════════════════════
1857//
1858// Test Coverage:
1859// 1. Executor Creation and Configuration
1860// 2. Single Module Execution (GigaThink, LaserLogic, BedRock, ProofGuard, BrutalHonesty)
1861// 3. PowerCombo Chain Execution (all 5 ThinkTools)
1862// 4. Timeout Handling
1863// 5. Error Recovery
1864// 6. Trace Generation
1865// 7. Template Rendering
1866// 8. Confidence Extraction
1867// 9. List Item Parsing
1868// 10. Branch Condition Evaluation
1869// 11. Dependency Management
1870// 12. Budget Tracking
1871// 13. Parallel Execution
1872// ═══════════════════════════════════════════════════════════════════════════════
1873
1874#[cfg(test)]
1875mod tests {
1876    use super::*;
1877    use std::time::Duration;
1878
1879    // ═══════════════════════════════════════════════════════════════════════════
1880    // 1. EXECUTOR CREATION AND CONFIGURATION TESTS
1881    // ═══════════════════════════════════════════════════════════════════════════
1882
1883    #[test]
1884    fn test_executor_creation() {
1885        let executor = ProtocolExecutor::mock().unwrap();
1886        assert!(!executor.registry().is_empty());
1887        assert!(!executor.profiles().is_empty());
1888    }
1889
1890    #[test]
1891    fn test_executor_config_default() {
1892        let config = ExecutorConfig::default();
1893        assert_eq!(config.timeout_secs, 120);
1894        assert!(!config.use_mock);
1895        assert!(!config.save_traces);
1896        assert!(config.show_progress);
1897        assert!(!config.enable_parallel);
1898        assert_eq!(config.max_concurrent_steps, 4);
1899    }
1900
1901    #[test]
1902    fn test_executor_config_mock() {
1903        let config = ExecutorConfig::mock();
1904        assert!(config.use_mock);
1905    }
1906
1907    #[test]
1908    fn test_executor_config_parallel() {
1909        let config = ExecutorConfig::default().with_parallel();
1910        assert!(config.enable_parallel);
1911        assert_eq!(config.max_concurrent_steps, 4);
1912
1913        let config_limited = ExecutorConfig::default().with_parallel_limit(2);
1914        assert!(config_limited.enable_parallel);
1915        assert_eq!(config_limited.max_concurrent_steps, 2);
1916    }
1917
1918    #[test]
1919    fn test_executor_config_self_consistency() {
1920        let config = ExecutorConfig::default().with_self_consistency();
1921        assert!(config.self_consistency.is_some());
1922
1923        let config_fast = ExecutorConfig::default().with_self_consistency_fast();
1924        assert!(config_fast.self_consistency.is_some());
1925        assert_eq!(config_fast.self_consistency.unwrap().num_samples, 3);
1926
1927        let config_thorough = ExecutorConfig::default().with_self_consistency_thorough();
1928        assert!(config_thorough.self_consistency.is_some());
1929        assert_eq!(config_thorough.self_consistency.unwrap().num_samples, 10);
1930    }
1931
1932    #[test]
1933    fn test_list_protocols() {
1934        let executor = ProtocolExecutor::mock().unwrap();
1935        let protocols = executor.list_protocols();
1936        assert!(protocols.contains(&"gigathink"));
1937        assert!(protocols.contains(&"laserlogic"));
1938        assert!(protocols.contains(&"bedrock"));
1939        assert!(protocols.contains(&"proofguard"));
1940        assert!(protocols.contains(&"brutalhonesty"));
1941    }
1942
1943    #[test]
1944    fn test_list_profiles() {
1945        let executor = ProtocolExecutor::mock().unwrap();
1946        let profiles = executor.list_profiles();
1947        assert!(profiles.contains(&"quick"));
1948        assert!(profiles.contains(&"balanced"));
1949        assert!(profiles.contains(&"deep"));
1950        assert!(profiles.contains(&"paranoid"));
1951        assert!(profiles.contains(&"powercombo"));
1952    }
1953
1954    #[test]
1955    fn test_get_protocol() {
1956        let executor = ProtocolExecutor::mock().unwrap();
1957        let gigathink = executor.get_protocol("gigathink");
1958        assert!(gigathink.is_some());
1959        assert_eq!(gigathink.unwrap().id, "gigathink");
1960
1961        let nonexistent = executor.get_protocol("nonexistent_protocol");
1962        assert!(nonexistent.is_none());
1963    }
1964
1965    #[test]
1966    fn test_get_profile() {
1967        let executor = ProtocolExecutor::mock().unwrap();
1968        let quick = executor.get_profile("quick");
1969        assert!(quick.is_some());
1970        assert_eq!(quick.unwrap().id, "quick");
1971
1972        let nonexistent = executor.get_profile("nonexistent_profile");
1973        assert!(nonexistent.is_none());
1974    }
1975
1976    // ═══════════════════════════════════════════════════════════════════════════
1977    // 2. PROTOCOL INPUT TESTS
1978    // ═══════════════════════════════════════════════════════════════════════════
1979
1980    #[test]
1981    fn test_protocol_input_query() {
1982        let input = ProtocolInput::query("Test query");
1983        assert_eq!(input.get_str("query"), Some("Test query"));
1984    }
1985
1986    #[test]
1987    fn test_protocol_input_argument() {
1988        let input = ProtocolInput::argument("Test argument");
1989        assert_eq!(input.get_str("argument"), Some("Test argument"));
1990    }
1991
1992    #[test]
1993    fn test_protocol_input_statement() {
1994        let input = ProtocolInput::statement("Test statement");
1995        assert_eq!(input.get_str("statement"), Some("Test statement"));
1996    }
1997
1998    #[test]
1999    fn test_protocol_input_claim() {
2000        let input = ProtocolInput::claim("Test claim");
2001        assert_eq!(input.get_str("claim"), Some("Test claim"));
2002    }
2003
2004    #[test]
2005    fn test_protocol_input_work() {
2006        let input = ProtocolInput::work("Test work");
2007        assert_eq!(input.get_str("work"), Some("Test work"));
2008    }
2009
2010    #[test]
2011    fn test_protocol_input_with_field() {
2012        let input = ProtocolInput::query("Test query")
2013            .with_field("context", "Some context")
2014            .with_field("domain", "AI");
2015
2016        assert_eq!(input.get_str("query"), Some("Test query"));
2017        assert_eq!(input.get_str("context"), Some("Some context"));
2018        assert_eq!(input.get_str("domain"), Some("AI"));
2019    }
2020
2021    #[test]
2022    fn test_protocol_input_missing_field() {
2023        let input = ProtocolInput::query("Test query");
2024        assert_eq!(input.get_str("nonexistent"), None);
2025    }
2026
2027    // ═══════════════════════════════════════════════════════════════════════════
2028    // 3. TEMPLATE RENDERING TESTS
2029    // ═══════════════════════════════════════════════════════════════════════════
2030
2031    #[test]
2032    fn test_template_rendering_simple() {
2033        let executor = ProtocolExecutor::mock().unwrap();
2034        let input = ProtocolInput::query("What is AI?");
2035
2036        let template = "Question: {{query}}";
2037        let rendered = executor.render_template(template, &input, &HashMap::new());
2038
2039        assert_eq!(rendered, "Question: What is AI?");
2040    }
2041
2042    #[test]
2043    fn test_template_rendering_multiple_fields() {
2044        let executor = ProtocolExecutor::mock().unwrap();
2045        let input = ProtocolInput::query("What is AI?").with_field("context", "machine learning");
2046
2047        let template = "Question: {{query}}\nContext: {{context}}";
2048        let rendered = executor.render_template(template, &input, &HashMap::new());
2049
2050        assert_eq!(rendered, "Question: What is AI?\nContext: machine learning");
2051    }
2052
2053    #[test]
2054    fn test_template_rendering_with_previous_outputs() {
2055        let executor = ProtocolExecutor::mock().unwrap();
2056        let input = ProtocolInput::query("Test");
2057
2058        let mut previous_outputs = HashMap::new();
2059        previous_outputs.insert(
2060            "step1".to_string(),
2061            StepOutput::Text {
2062                content: "Previous output".to_string(),
2063            },
2064        );
2065
2066        let template = "Input: {{query}}\nPrevious: {{step1}}";
2067        let rendered = executor.render_template(template, &input, &previous_outputs);
2068
2069        assert_eq!(rendered, "Input: Test\nPrevious: Previous output");
2070    }
2071
2072    #[test]
2073    fn test_template_rendering_list_output() {
2074        let executor = ProtocolExecutor::mock().unwrap();
2075        let input = ProtocolInput::query("Test");
2076
2077        let mut previous_outputs = HashMap::new();
2078        previous_outputs.insert(
2079            "ideas".to_string(),
2080            StepOutput::List {
2081                items: vec![
2082                    ListItem::new("Idea 1"),
2083                    ListItem::new("Idea 2"),
2084                    ListItem::new("Idea 3"),
2085                ],
2086            },
2087        );
2088
2089        let template = "Ideas:\n{{ideas}}";
2090        let rendered = executor.render_template(template, &input, &previous_outputs);
2091
2092        assert!(rendered.contains("Idea 1"));
2093        assert!(rendered.contains("Idea 2"));
2094        assert!(rendered.contains("Idea 3"));
2095    }
2096
2097    #[test]
2098    fn test_template_rendering_unfilled_placeholders_removed() {
2099        let executor = ProtocolExecutor::mock().unwrap();
2100        let input = ProtocolInput::query("Test");
2101
2102        let template = "Question: {{query}}\nOptional: {{optional_field}}";
2103        let rendered = executor.render_template(template, &input, &HashMap::new());
2104
2105        assert_eq!(rendered, "Question: Test\nOptional: ");
2106    }
2107
2108    #[test]
2109    fn test_template_static_rendering() {
2110        let input = ProtocolInput::query("Test query");
2111        let previous_outputs = HashMap::new();
2112
2113        let template = "Question: {{query}}";
2114        let rendered =
2115            ProtocolExecutor::render_template_static(template, &input, &previous_outputs);
2116
2117        assert_eq!(rendered, "Question: Test query");
2118    }
2119
2120    // ═══════════════════════════════════════════════════════════════════════════
2121    // 4. CONFIDENCE EXTRACTION TESTS
2122    // ═══════════════════════════════════════════════════════════════════════════
2123
2124    #[test]
2125    fn test_extract_confidence_standard_format() {
2126        let executor = ProtocolExecutor::mock().unwrap();
2127
2128        assert_eq!(executor.extract_confidence("Confidence: 0.85"), Some(0.85));
2129        assert_eq!(executor.extract_confidence("confidence: 0.9"), Some(0.9));
2130        assert_eq!(executor.extract_confidence("Confidence 0.75"), Some(0.75));
2131    }
2132
2133    #[test]
2134    fn test_extract_confidence_multiline() {
2135        let executor = ProtocolExecutor::mock().unwrap();
2136
2137        let content = "Some analysis text\nMore text\nConfidence: 0.75";
2138        assert_eq!(executor.extract_confidence(content), Some(0.75));
2139    }
2140
2141    #[test]
2142    fn test_extract_confidence_integer() {
2143        let executor = ProtocolExecutor::mock().unwrap();
2144
2145        // Integer values should be capped at 1.0
2146        assert_eq!(executor.extract_confidence("Confidence: 95"), Some(1.0));
2147    }
2148
2149    #[test]
2150    fn test_extract_confidence_missing() {
2151        let executor = ProtocolExecutor::mock().unwrap();
2152
2153        assert_eq!(executor.extract_confidence("No confidence here"), None);
2154        assert_eq!(executor.extract_confidence(""), None);
2155    }
2156
2157    #[test]
2158    fn test_extract_confidence_static() {
2159        assert_eq!(
2160            ProtocolExecutor::extract_confidence_static("Confidence: 0.88"),
2161            Some(0.88)
2162        );
2163        assert_eq!(
2164            ProtocolExecutor::extract_confidence_static("confidence 0.72"),
2165            Some(0.72)
2166        );
2167    }
2168
2169    // ═══════════════════════════════════════════════════════════════════════════
2170    // 5. LIST ITEM EXTRACTION TESTS
2171    // ═══════════════════════════════════════════════════════════════════════════
2172
2173    #[test]
2174    fn test_extract_list_items_numbered() {
2175        let executor = ProtocolExecutor::mock().unwrap();
2176
2177        let content = "1. First item\n2. Second item\n3. Third item\nConfidence: 0.8";
2178        let items = executor.extract_list_items(content);
2179
2180        assert_eq!(items.len(), 3);
2181        assert_eq!(items[0].content, "First item");
2182        assert_eq!(items[1].content, "Second item");
2183        assert_eq!(items[2].content, "Third item");
2184    }
2185
2186    #[test]
2187    fn test_extract_list_items_bulleted() {
2188        let executor = ProtocolExecutor::mock().unwrap();
2189
2190        let content = "- First item\n- Second item\n- Third item";
2191        let items = executor.extract_list_items(content);
2192
2193        assert_eq!(items.len(), 3);
2194        assert_eq!(items[0].content, "First item");
2195    }
2196
2197    #[test]
2198    fn test_extract_list_items_mixed() {
2199        let executor = ProtocolExecutor::mock().unwrap();
2200
2201        let content = "1. First item\n2. Second item\n- Third item\nConfidence: 0.8";
2202        let items = executor.extract_list_items(content);
2203
2204        assert_eq!(items.len(), 3);
2205    }
2206
2207    #[test]
2208    fn test_extract_list_items_with_bold() {
2209        let executor = ProtocolExecutor::mock().unwrap();
2210
2211        let content = "**Title**: Description here\n**Another**: More text";
2212        let items = executor.extract_list_items(content);
2213
2214        assert_eq!(items.len(), 2);
2215        assert!(items[0].content.contains("Title"));
2216    }
2217
2218    #[test]
2219    fn test_extract_list_items_multiline() {
2220        let executor = ProtocolExecutor::mock().unwrap();
2221
2222        let content = "1. First item with\n   continuation on next line\n2. Second item";
2223        let items = executor.extract_list_items(content);
2224
2225        assert_eq!(items.len(), 2);
2226        assert!(items[0].content.contains("continuation"));
2227    }
2228
2229    #[test]
2230    fn test_extract_list_items_empty() {
2231        let executor = ProtocolExecutor::mock().unwrap();
2232
2233        let content = "No list items here\nJust plain text";
2234        let items = executor.extract_list_items(content);
2235
2236        assert!(items.is_empty());
2237    }
2238
2239    // ═══════════════════════════════════════════════════════════════════════════
2240    // 6. SINGLE MODULE EXECUTION TESTS
2241    // ═══════════════════════════════════════════════════════════════════════════
2242
2243    #[tokio::test]
2244    async fn test_execute_gigathink_mock() {
2245        let executor = ProtocolExecutor::mock().unwrap();
2246        let input = ProtocolInput::query("What are the key factors for startup success?");
2247
2248        let result = executor.execute("gigathink", input).await.unwrap();
2249
2250        assert!(result.success);
2251        assert!(result.confidence > 0.0);
2252        assert!(!result.steps.is_empty());
2253        assert_eq!(result.protocol_id, "gigathink");
2254        assert!(result.duration_ms > 0);
2255    }
2256
2257    #[tokio::test]
2258    async fn test_execute_laserlogic_mock() {
2259        let executor = ProtocolExecutor::mock().unwrap();
2260        let input = ProtocolInput::argument(
2261            "All humans are mortal. Socrates is human. Therefore, Socrates is mortal.",
2262        );
2263
2264        let result = executor.execute("laserlogic", input).await.unwrap();
2265
2266        assert!(result.success);
2267        assert!(result.confidence > 0.0);
2268        assert!(!result.steps.is_empty());
2269    }
2270
2271    #[tokio::test]
2272    async fn test_execute_bedrock_mock() {
2273        let executor = ProtocolExecutor::mock().unwrap();
2274        let input = ProtocolInput::statement("The Earth revolves around the Sun.");
2275
2276        let result = executor.execute("bedrock", input).await.unwrap();
2277
2278        assert!(result.success);
2279        assert!(!result.steps.is_empty());
2280    }
2281
2282    #[tokio::test]
2283    async fn test_execute_proofguard_mock() {
2284        let executor = ProtocolExecutor::mock().unwrap();
2285        let input = ProtocolInput::claim("Climate change is caused by human activities.");
2286
2287        let result = executor.execute("proofguard", input).await.unwrap();
2288
2289        assert!(result.success);
2290        assert!(!result.steps.is_empty());
2291    }
2292
2293    #[tokio::test]
2294    async fn test_execute_brutalhonesty_mock() {
2295        let executor = ProtocolExecutor::mock().unwrap();
2296        let input = ProtocolInput::work("My analysis concludes that AI will solve all problems.");
2297
2298        let result = executor.execute("brutalhonesty", input).await.unwrap();
2299
2300        assert!(result.success);
2301        assert!(!result.steps.is_empty());
2302    }
2303
2304    // ═══════════════════════════════════════════════════════════════════════════
2305    // 7. POWERCOMBO CHAIN EXECUTION TESTS
2306    // ═══════════════════════════════════════════════════════════════════════════
2307
2308    #[tokio::test]
2309    async fn test_execute_profile_quick_mock() {
2310        let executor = ProtocolExecutor::mock().unwrap();
2311        let input = ProtocolInput::query("Should we adopt microservices?");
2312
2313        let result = executor.execute_profile("quick", input).await.unwrap();
2314
2315        assert!(result.success);
2316        assert!(result.confidence > 0.0);
2317        assert!(!result.steps.is_empty());
2318    }
2319
2320    #[tokio::test]
2321    async fn test_execute_profile_balanced_mock() {
2322        let executor = ProtocolExecutor::mock().unwrap();
2323        let input = ProtocolInput::query("What is the future of AI in healthcare?");
2324
2325        let result = executor.execute_profile("balanced", input).await.unwrap();
2326
2327        assert!(result.success);
2328        assert!(result.confidence > 0.0);
2329    }
2330
2331    #[tokio::test]
2332    async fn test_execute_profile_powercombo_mock() {
2333        let executor = ProtocolExecutor::mock().unwrap();
2334        let input =
2335            ProtocolInput::query("Analyze the impact of quantum computing on cryptography.");
2336
2337        let result = executor.execute_profile("powercombo", input).await.unwrap();
2338
2339        // In mock mode, check that execution completed and produced valid results
2340        // Note: powercombo has min_confidence=0.95, which mock mode may not reach
2341        // The important thing is that execution completes without error
2342        assert!(result.confidence > 0.0);
2343        // PowerCombo should produce many steps from all 5 ThinkTools
2344        assert!(result.steps.len() >= 5);
2345        // All individual steps should succeed even if overall confidence is below threshold
2346        assert!(result.steps.iter().all(|s| s.success));
2347    }
2348
2349    // ═══════════════════════════════════════════════════════════════════════════
2350    // 8. ERROR HANDLING AND RECOVERY TESTS
2351    // ═══════════════════════════════════════════════════════════════════════════
2352
2353    #[tokio::test]
2354    async fn test_execute_nonexistent_protocol() {
2355        let executor = ProtocolExecutor::mock().unwrap();
2356        let input = ProtocolInput::query("Test");
2357
2358        let result = executor.execute("nonexistent_protocol", input).await;
2359
2360        assert!(result.is_err());
2361        let err = result.unwrap_err();
2362        assert!(matches!(err, Error::NotFound { .. }));
2363    }
2364
2365    #[tokio::test]
2366    async fn test_execute_nonexistent_profile() {
2367        let executor = ProtocolExecutor::mock().unwrap();
2368        let input = ProtocolInput::query("Test");
2369
2370        let result = executor.execute_profile("nonexistent_profile", input).await;
2371
2372        assert!(result.is_err());
2373    }
2374
2375    #[tokio::test]
2376    async fn test_execute_missing_required_input() {
2377        let executor = ProtocolExecutor::mock().unwrap();
2378        // GigaThink requires "query" field, but we provide "argument"
2379        let input = ProtocolInput::argument("Wrong field type");
2380
2381        let result = executor.execute("gigathink", input).await;
2382
2383        assert!(result.is_err());
2384        let err = result.unwrap_err();
2385        assert!(matches!(err, Error::Validation(_)));
2386    }
2387
2388    // ═══════════════════════════════════════════════════════════════════════════
2389    // 9. TRACE GENERATION TESTS
2390    // ═══════════════════════════════════════════════════════════════════════════
2391
2392    #[tokio::test]
2393    async fn test_trace_generation_basic() {
2394        // Create executor with trace saving enabled (but to temp dir)
2395        let config = ExecutorConfig {
2396            use_mock: true,
2397            save_traces: true,
2398            trace_dir: Some(std::env::temp_dir().join("reasonkit_test_traces")),
2399            show_progress: false,
2400            ..Default::default()
2401        };
2402        let executor = ProtocolExecutor::with_config(config).unwrap();
2403        let input = ProtocolInput::query("Test trace generation");
2404
2405        let result = executor.execute("gigathink", input).await.unwrap();
2406
2407        assert!(result.trace_id.is_some());
2408    }
2409
2410    #[test]
2411    fn test_execution_trace_creation() {
2412        let trace = ExecutionTrace::new("test_protocol", "1.0.0");
2413
2414        assert_eq!(trace.protocol_id, "test_protocol");
2415        assert_eq!(trace.protocol_version, "1.0.0");
2416        assert!(trace.steps.is_empty());
2417        assert_eq!(
2418            trace.status,
2419            crate::thinktool::trace::ExecutionStatus::Running
2420        );
2421    }
2422
2423    #[test]
2424    fn test_step_trace_creation() {
2425        let mut step_trace = StepTrace::new("step1", 0);
2426
2427        assert_eq!(step_trace.step_id, "step1");
2428        assert_eq!(step_trace.index, 0);
2429        assert_eq!(step_trace.status, StepStatus::Pending);
2430
2431        step_trace.complete(
2432            StepOutput::Text {
2433                content: "Output".to_string(),
2434            },
2435            0.85,
2436        );
2437
2438        assert_eq!(step_trace.status, StepStatus::Completed);
2439        assert_eq!(step_trace.confidence, 0.85);
2440    }
2441
2442    // ═══════════════════════════════════════════════════════════════════════════
2443    // 10. BRANCH CONDITION EVALUATION TESTS
2444    // ═══════════════════════════════════════════════════════════════════════════
2445
2446    #[test]
2447    fn test_branch_condition_always() {
2448        let executor = ProtocolExecutor::mock().unwrap();
2449        let condition = BranchCondition::Always;
2450        let results: Vec<StepResult> = vec![];
2451
2452        assert!(executor.evaluate_branch_condition(&condition, &results));
2453    }
2454
2455    #[test]
2456    fn test_branch_condition_confidence_below() {
2457        let executor = ProtocolExecutor::mock().unwrap();
2458        let condition = BranchCondition::ConfidenceBelow { threshold: 0.8 };
2459
2460        // With empty results, should return true (no confidence to check)
2461        let empty_results: Vec<StepResult> = vec![];
2462        assert!(executor.evaluate_branch_condition(&condition, &empty_results));
2463
2464        // With low confidence result, should return true
2465        let low_conf_results = vec![StepResult::success(
2466            "step1",
2467            StepOutput::Text {
2468                content: "test".to_string(),
2469            },
2470            0.5,
2471        )];
2472        assert!(executor.evaluate_branch_condition(&condition, &low_conf_results));
2473
2474        // With high confidence result, should return false
2475        let high_conf_results = vec![StepResult::success(
2476            "step1",
2477            StepOutput::Text {
2478                content: "test".to_string(),
2479            },
2480            0.9,
2481        )];
2482        assert!(!executor.evaluate_branch_condition(&condition, &high_conf_results));
2483    }
2484
2485    #[test]
2486    fn test_branch_condition_confidence_above() {
2487        let executor = ProtocolExecutor::mock().unwrap();
2488        let condition = BranchCondition::ConfidenceAbove { threshold: 0.8 };
2489
2490        // With high confidence result, should return true
2491        let high_conf_results = vec![StepResult::success(
2492            "step1",
2493            StepOutput::Text {
2494                content: "test".to_string(),
2495            },
2496            0.9,
2497        )];
2498        assert!(executor.evaluate_branch_condition(&condition, &high_conf_results));
2499
2500        // With low confidence result, should return false
2501        let low_conf_results = vec![StepResult::success(
2502            "step1",
2503            StepOutput::Text {
2504                content: "test".to_string(),
2505            },
2506            0.5,
2507        )];
2508        assert!(!executor.evaluate_branch_condition(&condition, &low_conf_results));
2509    }
2510
2511    #[test]
2512    fn test_branch_condition_output_equals() {
2513        let executor = ProtocolExecutor::mock().unwrap();
2514        let condition = BranchCondition::OutputEquals {
2515            field: "result".to_string(),
2516            value: "PASS".to_string(),
2517        };
2518
2519        // With matching output
2520        let matching_results = vec![StepResult::success(
2521            "step1",
2522            StepOutput::Text {
2523                content: "Result: PASS".to_string(),
2524            },
2525            0.9,
2526        )];
2527        assert!(executor.evaluate_branch_condition(&condition, &matching_results));
2528
2529        // With non-matching output
2530        let non_matching_results = vec![StepResult::success(
2531            "step1",
2532            StepOutput::Text {
2533                content: "Result: FAIL".to_string(),
2534            },
2535            0.9,
2536        )];
2537        assert!(!executor.evaluate_branch_condition(&condition, &non_matching_results));
2538    }
2539
2540    // ═══════════════════════════════════════════════════════════════════════════
2541    // 11. DEPENDENCY MANAGEMENT TESTS
2542    // ═══════════════════════════════════════════════════════════════════════════
2543
2544    #[test]
2545    fn test_dependencies_met_empty() {
2546        let executor = ProtocolExecutor::mock().unwrap();
2547        let deps: Vec<String> = vec![];
2548        let results: Vec<StepResult> = vec![];
2549
2550        assert!(executor.dependencies_met(&deps, &results));
2551    }
2552
2553    #[test]
2554    fn test_dependencies_met_satisfied() {
2555        let executor = ProtocolExecutor::mock().unwrap();
2556        let deps = vec!["step1".to_string(), "step2".to_string()];
2557        let results = vec![
2558            StepResult::success(
2559                "step1",
2560                StepOutput::Text {
2561                    content: "".to_string(),
2562                },
2563                0.9,
2564            ),
2565            StepResult::success(
2566                "step2",
2567                StepOutput::Text {
2568                    content: "".to_string(),
2569                },
2570                0.8,
2571            ),
2572        ];
2573
2574        assert!(executor.dependencies_met(&deps, &results));
2575    }
2576
2577    #[test]
2578    fn test_dependencies_met_unsatisfied() {
2579        let executor = ProtocolExecutor::mock().unwrap();
2580        let deps = vec!["step1".to_string(), "step2".to_string()];
2581        let results = vec![StepResult::success(
2582            "step1",
2583            StepOutput::Text {
2584                content: "".to_string(),
2585            },
2586            0.9,
2587        )];
2588
2589        assert!(!executor.dependencies_met(&deps, &results));
2590    }
2591
2592    #[test]
2593    fn test_dependencies_met_failed_step() {
2594        let executor = ProtocolExecutor::mock().unwrap();
2595        let deps = vec!["step1".to_string()];
2596        let results = vec![StepResult::failure("step1", "Some error")];
2597
2598        assert!(!executor.dependencies_met(&deps, &results));
2599    }
2600
2601    // ═══════════════════════════════════════════════════════════════════════════
2602    // 12. BUDGET TRACKING TESTS
2603    // ═══════════════════════════════════════════════════════════════════════════
2604
2605    #[tokio::test]
2606    async fn test_execution_with_token_budget() {
2607        let config = ExecutorConfig {
2608            use_mock: true,
2609            budget: BudgetConfig::with_tokens(10000),
2610            show_progress: false,
2611            ..Default::default()
2612        };
2613        let executor = ProtocolExecutor::with_config(config).unwrap();
2614        let input = ProtocolInput::query("Test budget tracking");
2615
2616        let result = executor.execute("gigathink", input).await.unwrap();
2617
2618        assert!(result.success);
2619        assert!(result.budget_summary.is_some());
2620        let summary = result.budget_summary.unwrap();
2621        assert!(summary.tokens_used > 0);
2622    }
2623
2624    #[tokio::test]
2625    async fn test_execution_with_cost_budget() {
2626        let config = ExecutorConfig {
2627            use_mock: true,
2628            budget: BudgetConfig::with_cost(1.0),
2629            show_progress: false,
2630            ..Default::default()
2631        };
2632        let executor = ProtocolExecutor::with_config(config).unwrap();
2633        let input = ProtocolInput::query("Test cost budget");
2634
2635        let result = executor.execute("gigathink", input).await.unwrap();
2636
2637        assert!(result.success);
2638        assert!(result.budget_summary.is_some());
2639    }
2640
2641    #[test]
2642    fn test_budget_config_parsing() {
2643        // Time budget
2644        let time_budget = BudgetConfig::parse("30s").unwrap();
2645        assert_eq!(time_budget.time_limit, Some(Duration::from_secs(30)));
2646
2647        let min_budget = BudgetConfig::parse("5m").unwrap();
2648        assert_eq!(min_budget.time_limit, Some(Duration::from_secs(300)));
2649
2650        // Token budget
2651        let token_budget = BudgetConfig::parse("1000t").unwrap();
2652        assert_eq!(token_budget.token_limit, Some(1000));
2653
2654        // Cost budget
2655        let cost_budget = BudgetConfig::parse("$0.50").unwrap();
2656        assert_eq!(cost_budget.cost_limit, Some(0.50));
2657    }
2658
2659    // ═══════════════════════════════════════════════════════════════════════════
2660    // 13. PARALLEL EXECUTION TESTS
2661    // ═══════════════════════════════════════════════════════════════════════════
2662
2663    #[tokio::test]
2664    async fn test_parallel_execution_mock() {
2665        let config = ExecutorConfig {
2666            use_mock: true,
2667            enable_parallel: true,
2668            max_concurrent_steps: 4,
2669            show_progress: false,
2670            ..Default::default()
2671        };
2672        let executor = ProtocolExecutor::with_config(config).unwrap();
2673        let input = ProtocolInput::query("Test parallel execution");
2674
2675        let result = executor.execute("gigathink", input).await.unwrap();
2676
2677        assert!(result.success);
2678        assert!(!result.steps.is_empty());
2679    }
2680
2681    #[tokio::test]
2682    async fn test_parallel_execution_with_limit() {
2683        let config = ExecutorConfig {
2684            use_mock: true,
2685            enable_parallel: true,
2686            max_concurrent_steps: 2,
2687            show_progress: false,
2688            ..Default::default()
2689        };
2690        let executor = ProtocolExecutor::with_config(config).unwrap();
2691        let input = ProtocolInput::query("Test parallel with limit");
2692
2693        let result = executor.execute("gigathink", input).await.unwrap();
2694
2695        assert!(result.success);
2696    }
2697
2698    // ═══════════════════════════════════════════════════════════════════════════
2699    // 14. CLI TOOL CONFIG TESTS
2700    // ═══════════════════════════════════════════════════════════════════════════
2701
2702    #[test]
2703    fn test_cli_tool_config_claude() {
2704        let config = CliToolConfig::claude();
2705        assert_eq!(config.command, "claude");
2706        assert!(config.pre_args.contains(&"-p".to_string()));
2707        assert!(!config.interactive);
2708    }
2709
2710    #[test]
2711    fn test_cli_tool_config_codex() {
2712        let config = CliToolConfig::codex();
2713        assert_eq!(config.command, "codex");
2714        assert!(config.pre_args.contains(&"-q".to_string()));
2715    }
2716
2717    #[test]
2718    fn test_cli_tool_config_gemini() {
2719        let config = CliToolConfig::gemini();
2720        assert_eq!(config.command, "gemini");
2721        assert!(config.pre_args.contains(&"-p".to_string()));
2722    }
2723
2724    #[test]
2725    fn test_cli_tool_config_copilot() {
2726        let config = CliToolConfig::copilot();
2727        assert_eq!(config.command, "gh");
2728        assert!(config.pre_args.contains(&"copilot".to_string()));
2729        assert!(config.interactive);
2730    }
2731
2732    #[test]
2733    fn test_executor_config_cli() {
2734        let config = ExecutorConfig::claude_cli();
2735        assert!(config.cli_tool.is_some());
2736        assert_eq!(config.cli_tool.unwrap().command, "claude");
2737
2738        let config = ExecutorConfig::gemini_cli();
2739        assert!(config.cli_tool.is_some());
2740    }
2741
2742    // ═══════════════════════════════════════════════════════════════════════════
2743    // 15. PROTOCOL OUTPUT TESTS
2744    // ═══════════════════════════════════════════════════════════════════════════
2745
2746    #[test]
2747    fn test_protocol_output_get() {
2748        let mut data = HashMap::new();
2749        data.insert("key1".to_string(), serde_json::json!("value1"));
2750        data.insert("key2".to_string(), serde_json::json!(42));
2751
2752        let output = ProtocolOutput {
2753            protocol_id: "test".to_string(),
2754            success: true,
2755            data,
2756            confidence: 0.85,
2757            steps: vec![],
2758            tokens: TokenUsage::default(),
2759            duration_ms: 100,
2760            error: None,
2761            trace_id: None,
2762            budget_summary: None,
2763        };
2764
2765        assert_eq!(output.get("key1"), Some(&serde_json::json!("value1")));
2766        assert_eq!(output.get("key2"), Some(&serde_json::json!(42)));
2767        assert_eq!(output.get("nonexistent"), None);
2768    }
2769
2770    #[test]
2771    fn test_protocol_output_verdict() {
2772        let mut data = HashMap::new();
2773        data.insert("verdict".to_string(), serde_json::json!("VALID"));
2774
2775        let output = ProtocolOutput {
2776            protocol_id: "test".to_string(),
2777            success: true,
2778            data,
2779            confidence: 0.85,
2780            steps: vec![],
2781            tokens: TokenUsage::default(),
2782            duration_ms: 100,
2783            error: None,
2784            trace_id: None,
2785            budget_summary: None,
2786        };
2787
2788        assert_eq!(output.verdict(), Some("VALID"));
2789    }
2790
2791    // ═══════════════════════════════════════════════════════════════════════════
2792    // 16. CHAIN CONDITION EVALUATION TESTS
2793    // ═══════════════════════════════════════════════════════════════════════════
2794
2795    #[test]
2796    fn test_chain_condition_always() {
2797        let executor = ProtocolExecutor::mock().unwrap();
2798        let condition = ChainCondition::Always;
2799        let results: Vec<StepResult> = vec![];
2800
2801        assert!(executor.evaluate_chain_condition(&condition, &results));
2802    }
2803
2804    #[test]
2805    fn test_chain_condition_confidence_below() {
2806        let executor = ProtocolExecutor::mock().unwrap();
2807        let condition = ChainCondition::ConfidenceBelow { threshold: 0.8 };
2808
2809        let low_conf_results = vec![StepResult::success(
2810            "step1",
2811            StepOutput::Text {
2812                content: "test".to_string(),
2813            },
2814            0.5,
2815        )];
2816        assert!(executor.evaluate_chain_condition(&condition, &low_conf_results));
2817    }
2818
2819    #[test]
2820    fn test_chain_condition_output_exists() {
2821        let executor = ProtocolExecutor::mock().unwrap();
2822        let condition = ChainCondition::OutputExists {
2823            step_id: "step1".to_string(),
2824            field: "result".to_string(),
2825        };
2826
2827        let results = vec![StepResult::success(
2828            "step1",
2829            StepOutput::Text {
2830                content: "output".to_string(),
2831            },
2832            0.9,
2833        )];
2834        assert!(executor.evaluate_chain_condition(&condition, &results));
2835
2836        let empty_results: Vec<StepResult> = vec![];
2837        assert!(!executor.evaluate_chain_condition(&condition, &empty_results));
2838    }
2839
2840    // ═══════════════════════════════════════════════════════════════════════════
2841    // 17. MAPPING RESOLUTION TESTS
2842    // ═══════════════════════════════════════════════════════════════════════════
2843
2844    #[test]
2845    fn test_resolve_mapping_input() {
2846        let executor = ProtocolExecutor::mock().unwrap();
2847        let input = ProtocolInput::query("Test query");
2848        let step_outputs: HashMap<String, serde_json::Value> = HashMap::new();
2849
2850        let result = executor.resolve_mapping("input.query", &step_outputs, &input);
2851        assert!(result.is_some());
2852        assert_eq!(result.unwrap(), serde_json::json!("Test query"));
2853    }
2854
2855    #[test]
2856    fn test_resolve_mapping_missing_input() {
2857        let executor = ProtocolExecutor::mock().unwrap();
2858        let input = ProtocolInput::query("Test query");
2859        let step_outputs: HashMap<String, serde_json::Value> = HashMap::new();
2860
2861        let result = executor.resolve_mapping("input.nonexistent", &step_outputs, &input);
2862        assert!(result.is_none());
2863    }
2864
2865    #[test]
2866    fn test_resolve_mapping_step_output() {
2867        let executor = ProtocolExecutor::mock().unwrap();
2868        let input = ProtocolInput::query("Test");
2869        let mut step_outputs: HashMap<String, serde_json::Value> = HashMap::new();
2870        step_outputs.insert(
2871            "steps.gigathink".to_string(),
2872            serde_json::json!({
2873                "result": "some output",
2874                "confidence": 0.85
2875            }),
2876        );
2877
2878        let result = executor.resolve_mapping("steps.gigathink", &step_outputs, &input);
2879        assert!(result.is_some());
2880    }
2881
2882    // ═══════════════════════════════════════════════════════════════════════════
2883    // 18. TOKEN USAGE TESTS
2884    // ═══════════════════════════════════════════════════════════════════════════
2885
2886    #[test]
2887    fn test_token_usage_creation() {
2888        let usage = TokenUsage::new(100, 50, 0.001);
2889
2890        assert_eq!(usage.input_tokens, 100);
2891        assert_eq!(usage.output_tokens, 50);
2892        assert_eq!(usage.total_tokens, 150);
2893        assert_eq!(usage.cost_usd, 0.001);
2894    }
2895
2896    #[test]
2897    fn test_token_usage_add() {
2898        let mut usage1 = TokenUsage::new(100, 50, 0.001);
2899        let usage2 = TokenUsage::new(200, 100, 0.002);
2900
2901        usage1.add(&usage2);
2902
2903        assert_eq!(usage1.input_tokens, 300);
2904        assert_eq!(usage1.output_tokens, 150);
2905        assert_eq!(usage1.total_tokens, 450);
2906        assert_eq!(usage1.cost_usd, 0.003);
2907    }
2908
2909    #[tokio::test]
2910    async fn test_execution_accumulates_tokens() {
2911        let executor = ProtocolExecutor::mock().unwrap();
2912        let input = ProtocolInput::query("Test token accumulation");
2913
2914        let result = executor.execute("gigathink", input).await.unwrap();
2915
2916        assert!(result.tokens.total_tokens > 0);
2917        assert!(result.tokens.input_tokens > 0);
2918        assert!(result.tokens.output_tokens > 0);
2919    }
2920
2921    // ═══════════════════════════════════════════════════════════════════════════
2922    // 19. SYSTEM PROMPT GENERATION TESTS
2923    // ═══════════════════════════════════════════════════════════════════════════
2924
2925    #[test]
2926    fn test_build_system_prompt_generate() {
2927        let step = ProtocolStep {
2928            id: "test".to_string(),
2929            action: StepAction::Generate {
2930                min_count: 5,
2931                max_count: 10,
2932            },
2933            prompt_template: "".to_string(),
2934            output_format: crate::thinktool::protocol::StepOutputFormat::List,
2935            min_confidence: 0.7,
2936            depends_on: vec![],
2937            branch: None,
2938        };
2939
2940        let prompt = ProtocolExecutor::build_system_prompt_static(&step);
2941        assert!(prompt.contains("Generate"));
2942        assert!(prompt.contains("confidence"));
2943    }
2944
2945    #[test]
2946    fn test_build_system_prompt_analyze() {
2947        let step = ProtocolStep {
2948            id: "test".to_string(),
2949            action: StepAction::Analyze {
2950                criteria: vec!["accuracy".to_string()],
2951            },
2952            prompt_template: "".to_string(),
2953            output_format: crate::thinktool::protocol::StepOutputFormat::Text,
2954            min_confidence: 0.7,
2955            depends_on: vec![],
2956            branch: None,
2957        };
2958
2959        let prompt = ProtocolExecutor::build_system_prompt_static(&step);
2960        assert!(prompt.contains("Analyze"));
2961    }
2962
2963    #[test]
2964    fn test_build_system_prompt_validate() {
2965        let step = ProtocolStep {
2966            id: "test".to_string(),
2967            action: StepAction::Validate {
2968                rules: vec!["rule1".to_string()],
2969            },
2970            prompt_template: "".to_string(),
2971            output_format: crate::thinktool::protocol::StepOutputFormat::Boolean,
2972            min_confidence: 0.7,
2973            depends_on: vec![],
2974            branch: None,
2975        };
2976
2977        let prompt = ProtocolExecutor::build_system_prompt_static(&step);
2978        assert!(prompt.contains("Validate"));
2979    }
2980
2981    // ═══════════════════════════════════════════════════════════════════════════
2982    // 20. EDGE CASES AND STRESS TESTS
2983    // ═══════════════════════════════════════════════════════════════════════════
2984
2985    #[test]
2986    fn test_empty_template() {
2987        let executor = ProtocolExecutor::mock().unwrap();
2988        let input = ProtocolInput::query("Test");
2989
2990        let rendered = executor.render_template("", &input, &HashMap::new());
2991        assert_eq!(rendered, "");
2992    }
2993
2994    #[test]
2995    fn test_template_with_special_characters() {
2996        let executor = ProtocolExecutor::mock().unwrap();
2997        let input = ProtocolInput::query("Test with \"quotes\" and 'apostrophes'");
2998
2999        let template = "Query: {{query}}";
3000        let rendered = executor.render_template(template, &input, &HashMap::new());
3001        assert!(rendered.contains("\"quotes\""));
3002        assert!(rendered.contains("'apostrophes'"));
3003    }
3004
3005    #[test]
3006    fn test_confidence_extraction_edge_cases() {
3007        let executor = ProtocolExecutor::mock().unwrap();
3008
3009        // Very small confidence
3010        assert_eq!(
3011            executor.extract_confidence("Confidence: 0.001"),
3012            Some(0.001)
3013        );
3014
3015        // Confidence at boundary
3016        assert_eq!(executor.extract_confidence("Confidence: 1.0"), Some(1.0));
3017        assert_eq!(executor.extract_confidence("Confidence: 0.0"), Some(0.0));
3018    }
3019
3020    #[tokio::test]
3021    async fn test_execution_with_very_long_input() {
3022        let executor = ProtocolExecutor::mock().unwrap();
3023        let long_query = "A".repeat(10000);
3024        let input = ProtocolInput::query(long_query);
3025
3026        let result = executor.execute("gigathink", input).await.unwrap();
3027        assert!(result.success);
3028    }
3029
3030    #[test]
3031    fn test_list_extraction_with_many_items() {
3032        let executor = ProtocolExecutor::mock().unwrap();
3033
3034        let mut content = String::new();
3035        for i in 1..=50 {
3036            content.push_str(&format!("{}. Item number {}\n", i, i));
3037        }
3038        content.push_str("Confidence: 0.85");
3039
3040        let items = executor.extract_list_items(&content);
3041        assert_eq!(items.len(), 50);
3042    }
3043}
reasonkit/thinktool/executor.rs

reasonkit/thinktool/
executor.rs