ravenclaws/
agent.rs

1//! RavenClaws
2//!
3//! Supports single-provider and multi-model (multi-provider) modes.
4//! Security-integrated: PolicyEngine, Sandbox, and AuditLog wired to agent loop.
5
6use crate::audit::{AuditEventType, AuditLog};
7use crate::config::Config;
8use crate::error::Result;
9use crate::llm::{
10    ChatMessage, Choice, LLMProviderTrait, MultiModelManager, ProviderFallbackChain, TokenBudget,
11};
12use crate::mcp::McpClient;
13use crate::policy::{Decision, PolicyEngine};
14use crate::ravenfabric::RavenFabricClient;
15use crate::sandbox::Sandbox;
16use crate::tools::{ToolCall, ToolRegistry, ToolResult};
17use std::sync::Arc;
18use tokio::sync::RwLock;
19use tracing::{debug, info, instrument, warn};
20
21/// In-memory conversation memory — stores message history for the session.
22/// Messages are lost when the process exits.
23#[derive(Debug, Clone)]
24pub struct ConversationMemory {
25    /// Maximum number of messages to retain (0 = unlimited)
26    max_messages: usize,
27    /// Stored message history
28    messages: Vec<ChatMessage>,
29}
30
31impl ConversationMemory {
32    /// Create a new conversation memory with the given system prompt.
33    /// `max_messages` caps history length (oldest user/assistant pairs are dropped first).
34    pub fn new(system_prompt: &str, max_messages: usize) -> Self {
35        Self {
36            max_messages,
37            messages: vec![ChatMessage {
38                role: "system".to_string(),
39                content: system_prompt.to_string(),
40            }],
41        }
42    }
43
44    /// Add a user message and return the full message history for an LLM call.
45    pub fn add_user_message(&mut self, content: &str) -> &[ChatMessage] {
46        self.messages.push(ChatMessage {
47            role: "user".to_string(),
48            content: content.to_string(),
49        });
50        self.trim_to_max();
51        &self.messages
52    }
53
54    /// Add an assistant message to history.
55    pub fn add_assistant_message(&mut self, content: &str) {
56        self.messages.push(ChatMessage {
57            role: "assistant".to_string(),
58            content: content.to_string(),
59        });
60        self.trim_to_max();
61    }
62
63    /// Get the current message history.
64    pub fn history(&self) -> &[ChatMessage] {
65        &self.messages
66    }
67
68    /// Trim oldest non-system messages when over the limit.
69    fn trim_to_max(&mut self) {
70        if self.max_messages == 0 {
71            return;
72        }
73        while self.messages.len() > self.max_messages {
74            // Remove the oldest non-system message (index 1, since index 0 is system)
75            if self.messages.len() > 1 {
76                self.messages.remove(1);
77            } else {
78                break;
79            }
80        }
81    }
82}
83
84/// Agent loop configuration
85#[derive(Debug, Clone)]
86pub struct AgentLoopConfig {
87    /// Maximum iterations before forcing completion
88    pub max_iterations: usize,
89    /// Whether to enable tool calling
90    pub enable_tools: bool,
91    /// Require human approval for tool calls
92    pub require_approval: bool,
93    /// Enable prompt-injection defense on LLM responses
94    pub prompt_injection_protection: bool,
95    /// Maximum session lifetime in seconds (0 = unlimited)
96    /// When non-zero, the agent loop will stop after this duration
97    /// to enforce credential/session expiry.
98    pub token_lifetime_secs: u64,
99    /// When true, treat any non-tool-call response as completion (no FINAL: required)
100    pub no_final_required: bool,
101    /// Optional provider fallback chain — tries providers in order on failure
102    pub fallback_chain: Option<Arc<std::sync::Mutex<ProviderFallbackChain>>>,
103    /// Optional token budget — limits total tokens used per session
104    pub token_budget: Option<Arc<std::sync::Mutex<TokenBudget>>>,
105    /// Optional RavenFabric client — reports agent status and results to mesh
106    pub ravenfabric: Option<RavenFabricClient>,
107}
108
109impl Default for AgentLoopConfig {
110    fn default() -> Self {
111        Self {
112            max_iterations: 10,
113            enable_tools: false,
114            require_approval: false,
115            prompt_injection_protection: true,
116            token_lifetime_secs: 0,
117            no_final_required: false,
118            fallback_chain: None,
119            token_budget: None,
120            ravenfabric: None,
121        }
122    }
123}
124
125/// Run the agent loop with security integration (PolicyEngine + Sandbox + AuditLog)
126///
127/// This is the security-integrated version that:
128/// 1. Checks all tool calls against PolicyEngine before execution
129/// 2. Executes shell commands in the Sandbox
130/// 3. Logs all tool calls, policy decisions, and results to AuditLog
131#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
132pub async fn run_agent_loop(
133    llm: Arc<dyn LLMProviderTrait>,
134    initial_prompt: &str,
135    system_prompt: &str,
136    config: AgentLoopConfig,
137) -> Result<String> {
138    run_agent_loop_with_registry(llm, initial_prompt, system_prompt, config, None).await
139}
140
141/// Run the agent loop with an optional pre-configured ToolRegistry
142///
143/// This allows callers to pass a registry with custom tool configurations
144/// (e.g., configured web search endpoint). If `None` is passed, default tools are used.
145#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
146pub async fn run_agent_loop_with_registry(
147    llm: Arc<dyn LLMProviderTrait>,
148    initial_prompt: &str,
149    system_prompt: &str,
150    config: AgentLoopConfig,
151    tool_registry: Option<ToolRegistry>,
152) -> Result<String> {
153    // Initialize security components
154    let policy_engine = PolicyEngine::default_secure();
155    let mut sandbox = Sandbox::default();
156    sandbox.init().await.map_err(|e| {
157        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
158    })?;
159    let audit_log = AuditLog::new(format!("agent-{}", std::process::id()));
160
161    // Initialize injection detector
162    let injection_detector = if config.prompt_injection_protection {
163        Some(crate::policy::InjectionDetector::new())
164    } else {
165        None
166    };
167
168    // Initialize tool registry (use provided one or default)
169    let registry = tool_registry.unwrap_or_else(ToolRegistry::with_default_tools);
170
171    // Track session start time for token lifetime enforcement
172    let session_start = std::time::Instant::now();
173
174    info!(
175        provider = llm.provider_name(),
176        model = llm.model(),
177        max_iterations = config.max_iterations,
178        enable_tools = config.enable_tools,
179        require_approval = config.require_approval,
180        prompt_injection_protection = config.prompt_injection_protection,
181        token_lifetime_secs = config.token_lifetime_secs,
182        "Agent loop starting with security integration"
183    );
184
185    // Audit: agent start
186    let _ = audit_log.append(
187        AuditEventType::AgentStart,
188        "agent",
189        &format!(
190            "Agent loop started with {} (model: {})",
191            llm.provider_name(),
192            llm.model()
193        ),
194        Some(serde_json::json!({
195            "provider": llm.provider_name(),
196            "model": llm.model(),
197            "max_iterations": config.max_iterations,
198            "enable_tools": config.enable_tools,
199            "require_approval": config.require_approval,
200            "prompt_injection_protection": config.prompt_injection_protection,
201            "token_lifetime_secs": config.token_lifetime_secs,
202        })),
203    );
204
205    let mut memory = ConversationMemory::new(system_prompt, 0);
206    memory.add_user_message(initial_prompt);
207
208    for iteration in 0..config.max_iterations {
209        // Check token lifetime: enforce session expiry
210        if config.token_lifetime_secs > 0 {
211            let elapsed = session_start.elapsed().as_secs();
212            if elapsed >= config.token_lifetime_secs {
213                warn!(
214                    iteration = iteration,
215                    elapsed_secs = elapsed,
216                    token_lifetime_secs = config.token_lifetime_secs,
217                    "Agent loop reached token lifetime limit"
218                );
219                let _ = audit_log.append(
220                    AuditEventType::SecurityViolation,
221                    "token_lifetime",
222                    &format!(
223                        "Session expired after {} seconds (limit: {}s)",
224                        elapsed, config.token_lifetime_secs
225                    ),
226                    Some(serde_json::json!({
227                        "elapsed_secs": elapsed,
228                        "token_lifetime_secs": config.token_lifetime_secs,
229                        "iteration": iteration,
230                    })),
231                );
232                return Err(crate::error::RavenClawsError::SecurityViolation(format!(
233                    "Session token expired after {} seconds (limit: {}s)",
234                    elapsed, config.token_lifetime_secs
235                )));
236            }
237        }
238        let messages = memory.history().to_vec();
239
240        // Check token budget before making LLM call
241        if let Some(ref budget) = config.token_budget {
242            let budget = budget.lock().unwrap();
243            if budget.remaining() < 100 {
244                warn!(
245                    iteration = iteration,
246                    remaining = budget.remaining(),
247                    "Token budget exhausted"
248                );
249                let _ = audit_log.append(
250                    AuditEventType::SecurityViolation,
251                    "token_budget",
252                    &format!("Token budget exhausted (remaining: {})", budget.remaining()),
253                    Some(serde_json::json!({
254                        "remaining": budget.remaining(),
255                        "used": budget.used_tokens,
256                        "iteration": iteration,
257                    })),
258                );
259                return Err(crate::error::RavenClawsError::SecurityViolation(
260                    "Token budget exhausted".to_string(),
261                ));
262            }
263        }
264
265        let response = match llm.chat(messages.clone()).await {
266            Ok(r) => r,
267            Err(e) => {
268                // Try fallback chain if available
269                if let Some(ref chain) = config.fallback_chain {
270                    warn!(error = %e, "Primary LLM failed, trying fallback chain");
271                    let _ = audit_log.append(
272                        AuditEventType::Error,
273                        "llm",
274                        &format!("Primary LLM failed, trying fallback: {}", e),
275                        None,
276                    );
277                    // Clone configs out of mutex to avoid holding MutexGuard across .await
278                    let configs = {
279                        let c = chain.lock().unwrap();
280                        c.configs.clone()
281                    };
282                    let mut temp_chain = ProviderFallbackChain::new(configs);
283                    match temp_chain.chat_with_fallback(messages).await {
284                        Ok(r) => {
285                            info!("Fallback chain succeeded");
286                            // Record token usage from fallback response
287                            if let Some(ref budget) = config.token_budget {
288                                if let Some(usage) = &r.usage {
289                                    let mut b = budget.lock().unwrap();
290                                    b.record_usage(usage.total_tokens);
291                                }
292                            }
293                            r
294                        }
295                        Err(fallback_e) => {
296                            warn!(error = %fallback_e, "Fallback chain also failed");
297                            let _ = audit_log.append(
298                                AuditEventType::Error,
299                                "llm",
300                                &format!("All providers failed: {}", fallback_e),
301                                None,
302                            );
303                            return Err(crate::error::RavenClawsError::Llm(fallback_e));
304                        }
305                    }
306                } else {
307                    warn!(error = %e, "LLM request failed");
308                    let _ = audit_log.append(
309                        AuditEventType::Error,
310                        "llm",
311                        &format!("LLM request failed: {}", e),
312                        None,
313                    );
314                    return Err(crate::error::RavenClawsError::Llm(e));
315                }
316            }
317        };
318
319        // Record token usage from response
320        if let Some(ref budget) = config.token_budget {
321            if let Some(usage) = &response.usage {
322                let mut b = budget.lock().unwrap();
323                b.record_usage(usage.total_tokens);
324                debug!(
325                    iteration = iteration,
326                    tokens_used = usage.total_tokens,
327                    total_used = b.used_tokens,
328                    remaining = b.remaining(),
329                    "Token usage recorded"
330                );
331            }
332        }
333
334        // Report progress to RavenFabric if configured
335        if let Some(ref rf) = config.ravenfabric {
336            if rf.is_enabled() {
337                let _ = rf.health().await;
338                info!(
339                    iteration = iteration,
340                    ravenfabric = true,
341                    "RavenFabric health check completed"
342                );
343            }
344        }
345
346        let first_choice = response.choices.first();
347        let content = first_choice
348            .map(|c| c.message.content.clone())
349            .unwrap_or_default();
350
351        debug!(
352            iteration = iteration,
353            response_length = content.len(),
354            response_preview = %content[..content.len().min(500)],
355            "LLM response received"
356        );
357
358        // Prompt-injection defense: check LLM response before processing
359        if let Some(ref detector) = injection_detector {
360            match detector.check(&content) {
361                crate::policy::InjectionVerdict::Suspicious(reason) => {
362                    warn!(
363                        iteration = iteration,
364                        reason = %reason,
365                        "Prompt-injection detected in LLM response"
366                    );
367                    let _ = audit_log.append(
368                        AuditEventType::SecurityViolation,
369                        "injection_detector",
370                        &format!("Prompt-injection detected: {}", reason),
371                        Some(serde_json::json!({
372                            "reason": reason,
373                            "iteration": iteration,
374                            "content_preview": &content[..content.len().min(200)],
375                        })),
376                    );
377                    return Err(crate::error::RavenClawsError::SecurityViolation(format!(
378                        "LLM response blocked: potential prompt injection ({})",
379                        reason
380                    )));
381                }
382                crate::policy::InjectionVerdict::Clean => {}
383            }
384        }
385
386        // Check for structured tool calls first (OpenAI Tools format)
387        if config.enable_tools {
388            if let Some((tool_name, args)) = first_choice.and_then(parse_structured_tool_call) {
389                info!(tool = %tool_name, "Structured tool call detected");
390
391                // Execute tool with security
392                if let Some(tool_result) = execute_parsed_tool_call(
393                    tool_name,
394                    args,
395                    &registry,
396                    &policy_engine,
397                    &sandbox,
398                    &audit_log,
399                    config.require_approval,
400                )
401                .await
402                {
403                    let observation = if tool_result.success {
404                        format!("OBSERVATION: {}", tool_result.output)
405                    } else {
406                        format!(
407                            "OBSERVATION: Tool failed with error: {}",
408                            tool_result.error.as_deref().unwrap_or("unknown error")
409                        )
410                    };
411
412                    memory.add_user_message(&observation);
413
414                    info!(
415                        iteration = iteration,
416                        tool = %tool_result.tool_name,
417                        success = tool_result.success,
418                        "Structured tool executed"
419                    );
420                    continue;
421                }
422            }
423        }
424
425        // Check for completion signal
426        if content.contains("FINAL:") {
427            let final_response = content
428                .split("FINAL:")
429                .nth(1)
430                .unwrap_or("")
431                .trim()
432                .to_string();
433
434            memory.add_assistant_message(&content);
435
436            // Audit: agent finish
437            let _ = audit_log.append(
438                AuditEventType::AgentFinish,
439                "agent",
440                "Agent loop completed successfully",
441                Some(serde_json::json!({
442                    "iterations": iteration + 1,
443                    "final_response_length": final_response.len(),
444                })),
445            );
446
447            return Ok(final_response);
448        }
449
450        // Execute tool calls if enabled (legacy pattern-matching fallback)
451        if config.enable_tools {
452            if let Some(tool_result) = execute_tool_call_with_security(
453                &content,
454                &registry,
455                &policy_engine,
456                &sandbox,
457                &audit_log,
458            )
459            .await
460            {
461                let observation = if tool_result.success {
462                    format!("OBSERVATION: {}", tool_result.output)
463                } else {
464                    format!(
465                        "OBSERVATION: Tool failed with error: {}",
466                        tool_result.error.as_deref().unwrap_or("unknown error")
467                    )
468                };
469
470                memory.add_assistant_message(&content);
471                memory.add_user_message(&observation);
472
473                info!(
474                    iteration = iteration,
475                    tool = %tool_result.tool_name,
476                    success = tool_result.success,
477                    "Tool executed"
478                );
479                continue;
480            }
481        }
482
483        // No tool call found and no FINAL: — treat as regular response
484        memory.add_assistant_message(&content);
485
486        // If no_final_required is set, treat any non-tool-call response as completion
487        if config.no_final_required {
488            info!(
489                iteration = iteration,
490                response_length = content.len(),
491                "no_final_required: treating response as completion"
492            );
493            let _ = audit_log.append(
494                AuditEventType::AgentFinish,
495                "agent",
496                "Agent loop completed (no_final_required)",
497                Some(serde_json::json!({
498                    "iterations": iteration + 1,
499                    "final_response_length": content.len(),
500                })),
501            );
502            return Ok(content);
503        }
504
505        info!(
506            iteration = iteration,
507            thought = %content.lines().find(|l| l.starts_with("THOUGHT:")).unwrap_or("<no thought>"),
508            "Agent loop progress"
509        );
510    }
511
512    // Max iterations reached
513    warn!(
514        max_iterations = config.max_iterations,
515        "Agent loop reached max iterations"
516    );
517
518    let _ = audit_log.append(
519        AuditEventType::Error,
520        "agent",
521        "Agent loop reached max iterations without completing",
522        Some(serde_json::json!({
523            "max_iterations": config.max_iterations,
524        })),
525    );
526
527    let history = memory.history();
528    if history.len() > 1 {
529        if let Some(last) = history.last() {
530            return Ok(last.content.clone());
531        }
532    }
533
534    Err(crate::error::RavenClawsError::CommandExecution(
535        "Agent loop reached max iterations without completing the task".to_string(),
536    ))
537}
538
539/// Run the agent loop with MCP tool integration (v0.5.2)
540///
541/// This version extends run_agent_loop with MCP tool support:
542/// 1. Registers MCP tools into the ToolRegistry
543/// 2. MCP tools are executed alongside built-in tools
544#[allow(dead_code)]
545#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
546pub async fn run_agent_loop_with_mcp(
547    llm: Arc<dyn LLMProviderTrait>,
548    initial_prompt: &str,
549    system_prompt: &str,
550    config: AgentLoopConfig,
551    mcp_client: Option<Arc<RwLock<McpClient>>>,
552) -> Result<String> {
553    run_agent_loop_with_mcp_and_registry(
554        llm,
555        initial_prompt,
556        system_prompt,
557        config,
558        mcp_client,
559        None,
560    )
561    .await
562}
563
564/// Run the agent loop with MCP tools and an optional pre-configured ToolRegistry
565#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
566pub async fn run_agent_loop_with_mcp_and_registry(
567    llm: Arc<dyn LLMProviderTrait>,
568    initial_prompt: &str,
569    system_prompt: &str,
570    config: AgentLoopConfig,
571    mcp_client: Option<Arc<RwLock<McpClient>>>,
572    tool_registry: Option<ToolRegistry>,
573) -> Result<String> {
574    // Initialize security components
575    let policy_engine = PolicyEngine::default_secure();
576    let mut sandbox = Sandbox::default();
577    sandbox.init().await.map_err(|e| {
578        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
579    })?;
580    let audit_log = AuditLog::new(format!("agent-{}", std::process::id()));
581
582    // Initialize injection detector
583    let injection_detector = if config.prompt_injection_protection {
584        Some(crate::policy::InjectionDetector::new())
585    } else {
586        None
587    };
588
589    // Initialize tool registry (use provided one or default)
590    let mut registry = tool_registry.unwrap_or_else(ToolRegistry::with_default_tools);
591
592    // Register MCP tools if client is provided
593    if let Some(client) = &mcp_client {
594        match crate::mcp::register_mcp_tools(&mut registry, client.clone()).await {
595            Ok(count) => {
596                info!(count, "MCP tools registered");
597            }
598            Err(e) => {
599                warn!(error = %e, "Failed to register MCP tools");
600            }
601        }
602    }
603
604    // Track session start time for token lifetime enforcement
605    let session_start = std::time::Instant::now();
606
607    info!(
608        provider = llm.provider_name(),
609        model = llm.model(),
610        max_iterations = config.max_iterations,
611        enable_tools = config.enable_tools,
612        tool_count = registry.len(),
613        require_approval = config.require_approval,
614        prompt_injection_protection = config.prompt_injection_protection,
615        token_lifetime_secs = config.token_lifetime_secs,
616        "Agent loop starting with MCP integration"
617    );
618
619    // Audit: agent start
620    let _ = audit_log.append(
621        AuditEventType::AgentStart,
622        "agent",
623        &format!(
624            "Agent loop started with {} (model: {})",
625            llm.provider_name(),
626            llm.model()
627        ),
628        Some(serde_json::json!({
629            "provider": llm.provider_name(),
630            "model": llm.model(),
631            "max_iterations": config.max_iterations,
632            "enable_tools": config.enable_tools,
633            "mcp_enabled": mcp_client.is_some(),
634            "tool_count": registry.len(),
635            "require_approval": config.require_approval,
636            "prompt_injection_protection": config.prompt_injection_protection,
637            "token_lifetime_secs": config.token_lifetime_secs,
638        })),
639    );
640
641    let mut memory = ConversationMemory::new(system_prompt, 0);
642    memory.add_user_message(initial_prompt);
643
644    for iteration in 0..config.max_iterations {
645        // Check token lifetime: enforce session expiry
646        if config.token_lifetime_secs > 0 {
647            let elapsed = session_start.elapsed().as_secs();
648            if elapsed >= config.token_lifetime_secs {
649                warn!(
650                    iteration = iteration,
651                    elapsed_secs = elapsed,
652                    token_lifetime_secs = config.token_lifetime_secs,
653                    "Agent loop reached token lifetime limit"
654                );
655                let _ = audit_log.append(
656                    AuditEventType::SecurityViolation,
657                    "token_lifetime",
658                    &format!(
659                        "Session expired after {} seconds (limit: {}s)",
660                        elapsed, config.token_lifetime_secs
661                    ),
662                    Some(serde_json::json!({
663                        "elapsed_secs": elapsed,
664                        "token_lifetime_secs": config.token_lifetime_secs,
665                        "iteration": iteration,
666                    })),
667                );
668                return Err(crate::error::RavenClawsError::SecurityViolation(format!(
669                    "Session token expired after {} seconds (limit: {}s)",
670                    elapsed, config.token_lifetime_secs
671                )));
672            }
673        }
674        let messages = memory.history().to_vec();
675
676        // Check token budget before making LLM call
677        if let Some(ref budget) = config.token_budget {
678            let budget = budget.lock().unwrap();
679            if budget.remaining() < 100 {
680                warn!(
681                    iteration = iteration,
682                    remaining = budget.remaining(),
683                    "Token budget exhausted"
684                );
685                let _ = audit_log.append(
686                    AuditEventType::SecurityViolation,
687                    "token_budget",
688                    &format!("Token budget exhausted (remaining: {})", budget.remaining()),
689                    Some(serde_json::json!({
690                        "remaining": budget.remaining(),
691                        "used": budget.used_tokens,
692                        "iteration": iteration,
693                    })),
694                );
695                return Err(crate::error::RavenClawsError::SecurityViolation(
696                    "Token budget exhausted".to_string(),
697                ));
698            }
699        }
700
701        let response = match llm.chat(messages.clone()).await {
702            Ok(r) => r,
703            Err(e) => {
704                // Try fallback chain if available
705                if let Some(ref chain) = config.fallback_chain {
706                    warn!(error = %e, "Primary LLM failed, trying fallback chain");
707                    let _ = audit_log.append(
708                        AuditEventType::Error,
709                        "llm",
710                        &format!("Primary LLM failed, trying fallback: {}", e),
711                        None,
712                    );
713                    // Clone configs out of mutex to avoid holding MutexGuard across .await
714                    let configs = {
715                        let c = chain.lock().unwrap();
716                        c.configs.clone()
717                    };
718                    let mut temp_chain = ProviderFallbackChain::new(configs);
719                    match temp_chain.chat_with_fallback(messages).await {
720                        Ok(r) => {
721                            info!("Fallback chain succeeded");
722                            // Record token usage from fallback response
723                            if let Some(ref budget) = config.token_budget {
724                                if let Some(usage) = &r.usage {
725                                    let mut b = budget.lock().unwrap();
726                                    b.record_usage(usage.total_tokens);
727                                }
728                            }
729                            r
730                        }
731                        Err(fallback_e) => {
732                            warn!(error = %fallback_e, "Fallback chain also failed");
733                            let _ = audit_log.append(
734                                AuditEventType::Error,
735                                "llm",
736                                &format!("All providers failed: {}", fallback_e),
737                                None,
738                            );
739                            return Err(crate::error::RavenClawsError::Llm(fallback_e));
740                        }
741                    }
742                } else {
743                    warn!(error = %e, "LLM request failed");
744                    let _ = audit_log.append(
745                        AuditEventType::Error,
746                        "llm",
747                        &format!("LLM request failed: {}", e),
748                        None,
749                    );
750                    return Err(crate::error::RavenClawsError::Llm(e));
751                }
752            }
753        };
754
755        // Record token usage from response
756        if let Some(ref budget) = config.token_budget {
757            if let Some(usage) = &response.usage {
758                let mut b = budget.lock().unwrap();
759                b.record_usage(usage.total_tokens);
760                debug!(
761                    iteration = iteration,
762                    tokens_used = usage.total_tokens,
763                    total_used = b.used_tokens,
764                    remaining = b.remaining(),
765                    "Token usage recorded"
766                );
767            }
768        }
769
770        // Report progress to RavenFabric if configured
771        if let Some(ref rf) = config.ravenfabric {
772            if rf.is_enabled() {
773                let _ = rf.health().await;
774                info!(
775                    iteration = iteration,
776                    ravenfabric = true,
777                    "RavenFabric health check completed"
778                );
779            }
780        }
781
782        let first_choice = response.choices.first();
783        let content = first_choice
784            .map(|c| c.message.content.clone())
785            .unwrap_or_default();
786
787        debug!(
788            iteration = iteration,
789            response_length = content.len(),
790            response_preview = %content[..content.len().min(500)],
791            "LLM response received (MCP loop)"
792        );
793
794        // Prompt-injection defense: check LLM response before processing
795        if let Some(ref detector) = injection_detector {
796            match detector.check(&content) {
797                crate::policy::InjectionVerdict::Suspicious(reason) => {
798                    warn!(
799                        iteration = iteration,
800                        reason = %reason,
801                        "Prompt-injection detected in LLM response"
802                    );
803                    let _ = audit_log.append(
804                        AuditEventType::SecurityViolation,
805                        "injection_detector",
806                        &format!("Prompt-injection detected: {}", reason),
807                        Some(serde_json::json!({
808                            "reason": reason,
809                            "iteration": iteration,
810                            "content_preview": &content[..content.len().min(200)],
811                        })),
812                    );
813                    return Err(crate::error::RavenClawsError::SecurityViolation(format!(
814                        "LLM response blocked: potential prompt injection ({})",
815                        reason
816                    )));
817                }
818                crate::policy::InjectionVerdict::Clean => {}
819            }
820        }
821
822        // Check for structured tool calls first (OpenAI Tools format)
823        if config.enable_tools {
824            if let Some((tool_name, args)) = first_choice.and_then(parse_structured_tool_call) {
825                info!(tool = %tool_name, "Structured tool call detected");
826
827                // Execute tool with security
828                if let Some(tool_result) = execute_parsed_tool_call(
829                    tool_name,
830                    args,
831                    &registry,
832                    &policy_engine,
833                    &sandbox,
834                    &audit_log,
835                    config.require_approval,
836                )
837                .await
838                {
839                    let observation = if tool_result.success {
840                        format!("OBSERVATION: {}", tool_result.output)
841                    } else {
842                        format!(
843                            "OBSERVATION: Tool failed with error: {}",
844                            tool_result.error.as_deref().unwrap_or("unknown error")
845                        )
846                    };
847
848                    memory.add_user_message(&observation);
849
850                    info!(
851                        iteration = iteration,
852                        tool = %tool_result.tool_name,
853                        success = tool_result.success,
854                        "Structured tool executed"
855                    );
856                    continue;
857                }
858            }
859        }
860
861        // Check for completion signal
862        if content.contains("FINAL:") {
863            let final_response = content
864                .split("FINAL:")
865                .nth(1)
866                .unwrap_or("")
867                .trim()
868                .to_string();
869
870            memory.add_assistant_message(&content);
871
872            // Audit: agent finish
873            let _ = audit_log.append(
874                AuditEventType::AgentFinish,
875                "agent",
876                "Agent loop completed successfully",
877                Some(serde_json::json!({
878                    "iterations": iteration + 1,
879                    "final_response_length": final_response.len(),
880                })),
881            );
882
883            return Ok(final_response);
884        }
885
886        // Execute tool calls if enabled (legacy pattern-matching fallback)
887        if config.enable_tools {
888            if let Some(tool_result) = execute_tool_call_with_security(
889                &content,
890                &registry,
891                &policy_engine,
892                &sandbox,
893                &audit_log,
894            )
895            .await
896            {
897                let observation = if tool_result.success {
898                    format!("OBSERVATION: {}", tool_result.output)
899                } else {
900                    format!(
901                        "OBSERVATION: Tool failed with error: {}",
902                        tool_result.error.as_deref().unwrap_or("unknown error")
903                    )
904                };
905
906                memory.add_assistant_message(&content);
907                memory.add_user_message(&observation);
908
909                info!(
910                    iteration = iteration,
911                    tool = %tool_result.tool_name,
912                    success = tool_result.success,
913                    "Tool executed"
914                );
915                continue;
916            }
917        }
918
919        // No tool call found and no FINAL: — treat as regular response
920        memory.add_assistant_message(&content);
921
922        // If no_final_required is set, treat any non-tool-call response as completion
923        if config.no_final_required {
924            info!(
925                iteration = iteration,
926                response_length = content.len(),
927                "no_final_required: treating response as completion"
928            );
929            let _ = audit_log.append(
930                AuditEventType::AgentFinish,
931                "agent",
932                "Agent loop completed (no_final_required)",
933                Some(serde_json::json!({
934                    "iterations": iteration + 1,
935                    "final_response_length": content.len(),
936                })),
937            );
938            return Ok(content);
939        }
940
941        info!(
942            iteration = iteration,
943            thought = %content.lines().find(|l| l.starts_with("THOUGHT:")).unwrap_or("<no thought>"),
944            "Agent loop progress"
945        );
946    }
947
948    // Max iterations reached
949    warn!(
950        max_iterations = config.max_iterations,
951        "Agent loop reached max iterations"
952    );
953
954    let _ = audit_log.append(
955        AuditEventType::Error,
956        "agent",
957        "Agent loop reached max iterations without completing",
958        Some(serde_json::json!({
959            "max_iterations": config.max_iterations,
960        })),
961    );
962
963    let history = memory.history();
964    if history.len() > 1 {
965        if let Some(last) = history.last() {
966            return Ok(last.content.clone());
967        }
968    }
969
970    Err(crate::error::RavenClawsError::CommandExecution(
971        "Agent loop reached max iterations without completing the task".to_string(),
972    ))
973}
974
975/// Prompt the user for approval of a tool call via stdin.
976///
977/// Returns `true` if the user approved, `false` if denied.
978/// If stdin is not a terminal (piped), auto-approves with a warning.
979async fn prompt_for_approval(tool_name: &str, args: &serde_json::Value) -> bool {
980    use std::io::{IsTerminal, Write};
981
982    let args_str = serde_json::to_string_pretty(args).unwrap_or_default();
983
984    // Check if stdin is a terminal
985    if !std::io::stdin().is_terminal() {
986        warn!(
987            tool = %tool_name,
988            "stdin is not a TTY — auto-approving tool call (use --require-approval only in interactive mode)"
989        );
990        return true;
991    }
992
993    // Print the approval prompt to stderr so it doesn't interfere with stdout output
994    eprintln!("\n⚠️  Tool requires approval:");
995    eprintln!("   Tool: {}", tool_name);
996    for line in args_str.lines() {
997        eprintln!("   {}", line);
998    }
999    eprint!("   Approve? [y/N] ");
1000    std::io::stderr().flush().ok();
1001
1002    let mut input = String::new();
1003    match std::io::stdin().read_line(&mut input) {
1004        Ok(_) => {
1005            let trimmed = input.trim().to_lowercase();
1006            trimmed == "y" || trimmed == "yes"
1007        }
1008        Err(e) => {
1009            warn!(error = %e, "Failed to read approval input — denying by default");
1010            false
1011        }
1012    }
1013}
1014
1015/// Testable version of prompt_for_approval that reads from a given input string.
1016/// Used in unit tests to avoid blocking on stdin.
1017#[cfg(test)]
1018async fn prompt_for_approval_with_input(
1019    tool_name: &str,
1020    args: &serde_json::Value,
1021    input: &str,
1022) -> bool {
1023    use std::io::Write;
1024
1025    let args_str = serde_json::to_string_pretty(args).unwrap_or_default();
1026
1027    eprintln!("\n⚠️  Tool requires approval:");
1028    eprintln!("   Tool: {}", tool_name);
1029    for line in args_str.lines() {
1030        eprintln!("   {}", line);
1031    }
1032    eprint!("   Approve? [y/N] ");
1033    std::io::stderr().flush().ok();
1034
1035    let trimmed = input.trim().to_lowercase();
1036    trimmed == "y" || trimmed == "yes"
1037}
1038
1039/// Execute a parsed tool call with security integration
1040///
1041/// This function:
1042/// 1. Checks the tool call against PolicyEngine
1043/// 2. Logs the policy decision to AuditLog
1044/// 3. Prompts for human approval if required (HITL)
1045/// 4. Executes the tool (sandbox is applied at the tool implementation level for shell_exec)
1046/// 5. Logs the result to AuditLog
1047async fn execute_parsed_tool_call(
1048    tool_name: String,
1049    args: serde_json::Value,
1050    registry: &ToolRegistry,
1051    policy_engine: &PolicyEngine,
1052    _sandbox: &Sandbox,
1053    audit_log: &AuditLog,
1054    require_approval: bool,
1055) -> Option<ToolResult> {
1056    info!(tool = %tool_name, "Executing parsed tool call");
1057
1058    // Audit: tool call requested
1059    let _ = audit_log.tool_call(&tool_name, &args);
1060
1061    // Check if tool requires approval
1062    if require_approval && policy_engine.requires_approval(&tool_name) {
1063        let _ = audit_log.append(
1064            AuditEventType::ApprovalRequested,
1065            "approval",
1066            &format!("Approval required for tool: {}", tool_name),
1067            Some(serde_json::json!({"tool": tool_name, "args": args})),
1068        );
1069
1070        // Prompt user for approval via stdin
1071        let granted = prompt_for_approval(&tool_name, &args).await;
1072
1073        if !granted {
1074            let _ = audit_log.approval(&tool_name, false, Some("Denied by user"));
1075            warn!(tool = %tool_name, "Tool call denied by user");
1076            return Some(ToolResult {
1077                tool_name: tool_name.clone(),
1078                success: false,
1079                output: String::new(),
1080                error: Some(format!("Approval denied by user for tool: {}", tool_name)),
1081                exit_code: Some(-1),
1082                duration_ms: None,
1083            });
1084        }
1085
1086        let _ = audit_log.approval(&tool_name, true, Some("Approved by user"));
1087        info!(tool = %tool_name, "Tool call approved by user");
1088    }
1089
1090    // Check policy BEFORE execution
1091    let policy_decision = policy_engine.check_tool_call(&tool_name, &args);
1092
1093    // Audit: policy decision
1094    match &policy_decision {
1095        Decision::Allow => {
1096            let _ = audit_log.policy_decision(&tool_name, true, None);
1097        }
1098        Decision::Deny(reason) => {
1099            let _ = audit_log.policy_decision(&tool_name, false, Some(reason));
1100            warn!(tool = %tool_name, reason = %reason, "Tool call denied by policy");
1101            return Some(ToolResult {
1102                tool_name: tool_name.clone(),
1103                success: false,
1104                output: String::new(),
1105                error: Some(format!("Policy denied: {}", reason)),
1106                exit_code: Some(-1),
1107                duration_ms: None,
1108            });
1109        }
1110    }
1111
1112    // Execute tool
1113    let tool_name_clone = tool_name.clone();
1114    let call = ToolCall {
1115        name: tool_name.clone(),
1116        arguments: args,
1117        id: None,
1118    };
1119
1120    let result = match registry.execute(call).await {
1121        Ok(result) => {
1122            // Audit: tool result
1123            let _ = audit_log.append(
1124                AuditEventType::ToolResult,
1125                &tool_name_clone,
1126                &format!(
1127                    "Tool executed: {} (success: {})",
1128                    tool_name_clone, result.success
1129                ),
1130                Some(serde_json::json!({
1131                    "success": result.success,
1132                    "exit_code": result.exit_code,
1133                    "duration_ms": result.duration_ms,
1134                })),
1135            );
1136            result
1137        }
1138        Err(e) => {
1139            // Audit: error
1140            let _ = audit_log.append(
1141                AuditEventType::Error,
1142                &tool_name_clone,
1143                &format!("Tool execution failed: {}", e),
1144                None,
1145            );
1146            ToolResult {
1147                tool_name: tool_name_clone,
1148                success: false,
1149                output: String::new(),
1150                error: Some(e.to_string()),
1151                exit_code: Some(-1),
1152                duration_ms: None,
1153            }
1154        }
1155    };
1156
1157    Some(result)
1158}
1159
1160/// Execute a tool call with security integration (legacy pattern-matching fallback)
1161///
1162/// This function:
1163/// 1. Parses the tool call from the LLM response (legacy TOOL_CALL:/ARGS: format)
1164/// 2. Checks the tool call against PolicyEngine
1165/// 3. Logs the policy decision to AuditLog
1166/// 4. Executes the tool (sandbox is applied at the tool implementation level for shell_exec)
1167/// 5. Logs the result to AuditLog
1168async fn execute_tool_call_with_security(
1169    content: &str,
1170    registry: &ToolRegistry,
1171    policy_engine: &PolicyEngine,
1172    _sandbox: &Sandbox,
1173    audit_log: &AuditLog,
1174) -> Option<ToolResult> {
1175    // Parse tool call from content (legacy format)
1176    let (tool_name, args) = parse_tool_call(content)?;
1177
1178    // Delegate to the common execution logic
1179    execute_parsed_tool_call(
1180        tool_name,
1181        args,
1182        registry,
1183        policy_engine,
1184        _sandbox,
1185        audit_log,
1186        false, // legacy path — no approval prompt
1187    )
1188    .await
1189}
1190
1191/// Parse a tool call from LLM response content
1192/// Returns (tool_name, args) if found, None otherwise
1193/// Parse tool call from structured LLM response (OpenAI Tools format)
1194fn parse_structured_tool_call(choice: &Choice) -> Option<(String, serde_json::Value)> {
1195    let tool_calls = choice.tool_calls.as_ref()?;
1196    let first_call = tool_calls.first()?;
1197
1198    let tool_name = first_call.function.name.clone();
1199    let args: serde_json::Value = serde_json::from_str(&first_call.function.arguments).ok()?;
1200
1201    Some((tool_name, args))
1202}
1203
1204/// Parse tool call from legacy pattern-matching format (TOOL_CALL: / ARGS:)
1205fn parse_tool_call(content: &str) -> Option<(String, serde_json::Value)> {
1206    let mut lines = content.lines();
1207    let tool_call_line = lines.find(|l| l.trim().starts_with("TOOL_CALL:"))?;
1208
1209    let tool_name = tool_call_line
1210        .trim()
1211        .strip_prefix("TOOL_CALL:")
1212        .map(|s| s.trim())
1213        .filter(|s| !s.is_empty())?
1214        .to_string();
1215
1216    // Find the ARGS line
1217    let args_line = lines.find(|l| l.trim().starts_with("ARGS:"))?;
1218    let args_str = args_line.trim().strip_prefix("ARGS:").map(|s| s.trim())?;
1219
1220    let args: serde_json::Value = serde_json::from_str(args_str).ok()?;
1221
1222    Some((tool_name, args))
1223}
1224
1225/// Run a single autonomous agent (single-provider mode)
1226pub async fn run_single(
1227    llm: Arc<dyn LLMProviderTrait>,
1228    config: Config,
1229    ravenfabric: Option<RavenFabricClient>,
1230) -> Result<()> {
1231    info!(
1232        "Starting single agent mode with provider: {}",
1233        llm.provider_name()
1234    );
1235
1236    // Perform RavenFabric health check if configured
1237    if let Some(ref rf) = ravenfabric {
1238        if rf.is_enabled() {
1239            info!("RavenFabric remote execution available");
1240            match rf.health().await {
1241                Ok(true) => info!("RavenFabric mesh is healthy"),
1242                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1243                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1244            }
1245        }
1246    }
1247
1248    let system_prompt = &config.llm.system_prompt;
1249
1250    let messages = vec![
1251        ChatMessage {
1252            role: "system".to_string(),
1253            content: system_prompt.to_string(),
1254        },
1255        ChatMessage {
1256            role: "user".to_string(),
1257            content: "Ready. Awaiting instructions.".to_string(),
1258        },
1259    ];
1260
1261    match llm.chat(messages).await {
1262        Ok(response) => {
1263            if let Some(choice) = response.choices.first() {
1264                info!(provider = llm.provider_name(), model = llm.model(), response = %choice.message.content, "Agent response received");
1265
1266                // Broadcast result to RavenFabric if configured
1267                if let Some(ref rf) = ravenfabric {
1268                    if rf.is_enabled() {
1269                        let preview = choice.message.content.chars().take(500).collect::<String>();
1270                        let _ = rf.broadcast(&preview, 30).await;
1271                        info!("Agent result broadcast to RavenFabric mesh");
1272                    }
1273                }
1274            }
1275        }
1276        Err(e) => {
1277            warn!(error = %e, provider = llm.provider_name(), "LLM request failed");
1278        }
1279    }
1280
1281    Ok(())
1282}
1283
1284/// Run multiple agents in swarm mode (single-provider) — v0.6
1285///
1286/// Swarm mode runs multiple agents in parallel, each working on the same task
1287/// with different approaches. Results are collected and compared.
1288pub async fn run_swarm(
1289    llm: Arc<dyn LLMProviderTrait>,
1290    config: Config,
1291    ravenfabric: Option<RavenFabricClient>,
1292) -> Result<()> {
1293    info!("Starting swarm mode (single-provider) — 3 parallel agents");
1294
1295    // Perform RavenFabric health check if configured
1296    if let Some(ref rf) = ravenfabric {
1297        if rf.is_enabled() {
1298            info!("RavenFabric remote execution available for swarm coordination");
1299            match rf.health().await {
1300                Ok(true) => info!("RavenFabric mesh is healthy"),
1301                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1302                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1303            }
1304        }
1305    }
1306
1307    let _system_prompt = &config.llm.system_prompt;
1308    let num_agents = 3;
1309    let mut handles = Vec::new();
1310
1311    // Spawn parallel agents with different personas
1312    let personas = [
1313        "You are an analytical agent. Focus on logic, structure, and precision.",
1314        "You are a creative agent. Focus on innovation, alternatives, and possibilities.",
1315        "You are a pragmatic agent. Focus on simplicity, efficiency, and practicality.",
1316    ];
1317
1318    for (i, persona) in personas.iter().enumerate().take(num_agents) {
1319        let llm_clone = llm.clone();
1320        let persona = persona.to_string();
1321        let task = "Analyze the given task and provide your solution.".to_string();
1322
1323        let handle = tokio::spawn(async move {
1324            let mut memory = ConversationMemory::new(&persona, 10);
1325            memory.add_user_message(&task);
1326
1327            let messages = memory.history().to_vec();
1328            match llm_clone.chat(messages).await {
1329                Ok(response) => {
1330                    let content = response
1331                        .choices
1332                        .first()
1333                        .map(|c| c.message.content.clone())
1334                        .unwrap_or_default();
1335                    Ok((i, content))
1336                }
1337                Err(e) => Err(format!("Agent {} failed: {}", i, e)),
1338            }
1339        });
1340
1341        handles.push(handle);
1342    }
1343
1344    // Collect results
1345    let mut results: Vec<(usize, String)> = Vec::new();
1346    for handle in handles {
1347        match handle.await {
1348            Ok(Ok((idx, result))) => {
1349                info!("Agent {} completed: {} chars", idx, result.len());
1350                results.push((idx, result));
1351            }
1352            Ok(Err(e)) => warn!("Agent failed: {}", e),
1353            Err(e) => warn!("Agent join failed: {}", e),
1354        }
1355    }
1356
1357    // Print swarm results
1358    println!("\n🐦‍⬛ Swarm Results ({} agents):", results.len());
1359    for (idx, result) in &results {
1360        println!(
1361            "\n── Agent {} ({}) ──",
1362            idx + 1,
1363            personas[*idx].split('.').next().unwrap_or("Unknown")
1364        );
1365        println!("{}", result);
1366    }
1367
1368    // Broadcast swarm results to RavenFabric if configured
1369    if let Some(ref rf) = ravenfabric {
1370        if rf.is_enabled() {
1371            let summary = format!(
1372                "Swarm completed: {} agents, results: {}",
1373                results.len(),
1374                results
1375                    .iter()
1376                    .map(|(i, r)| format!("Agent {}: {} chars", i, r.len()))
1377                    .collect::<Vec<_>>()
1378                    .join(", ")
1379            );
1380            let _ = rf.broadcast(&summary, 30).await;
1381            info!("Swarm results broadcast to RavenFabric mesh");
1382        }
1383    }
1384
1385    Ok(())
1386}
1387
1388/// Run supervisor agent coordinating sub-agents (single-provider) — v0.6
1389///
1390/// The supervisor decomposes a task into subtasks, spawns sub-agents for each,
1391/// and aggregates results. Uses the same LLM provider for all agents.
1392pub async fn run_supervisor(
1393    llm: Arc<dyn LLMProviderTrait>,
1394    config: Config,
1395    ravenfabric: Option<RavenFabricClient>,
1396) -> Result<()> {
1397    info!("Starting supervisor mode (single-provider)");
1398
1399    // Perform RavenFabric health check if configured
1400    if let Some(ref rf) = ravenfabric {
1401        if rf.is_enabled() {
1402            info!("RavenFabric remote execution available for supervisor coordination");
1403            match rf.health().await {
1404                Ok(true) => info!("RavenFabric mesh is healthy"),
1405                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1406                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1407            }
1408        }
1409    }
1410
1411    let system_prompt = &config.llm.system_prompt;
1412    let policy_engine = PolicyEngine::default_secure();
1413    let mut sandbox = Sandbox::default();
1414    sandbox.init().await.map_err(|e| {
1415        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
1416    })?;
1417    let audit_log = AuditLog::new(format!("supervisor-{}", std::process::id()));
1418    let registry = ToolRegistry::with_default_tools();
1419
1420    // Initial prompt to supervisor
1421    let supervisor_prompt = format!(
1422        "You are a supervisor agent. Your task is to decompose complex tasks into subtasks \
1423         and coordinate sub-agents to complete them. \
1424         \n\nFor each subtask, respond with:\n\
1425         SUBTASK: <description>\n\
1426         AGENT: <agent_number>\n\
1427         \nWhen all subtasks are complete, respond with:\n\
1428         FINAL: <aggregated result>\n\
1429         \nTask: {}",
1430        "Coordinate the completion of the assigned task."
1431    );
1432
1433    let mut memory = ConversationMemory::new(system_prompt, 20);
1434    memory.add_user_message(&supervisor_prompt);
1435
1436    let mut subtask_results: Vec<String> = Vec::new();
1437    let mut iteration = 0;
1438    let max_iterations = 15;
1439
1440    loop {
1441        iteration += 1;
1442        if iteration > max_iterations {
1443            warn!("Supervisor reached max iterations");
1444            break;
1445        }
1446
1447        let messages = memory.history().to_vec();
1448        let response = match llm.chat(messages).await {
1449            Ok(r) => r,
1450            Err(e) => {
1451                warn!(error = %e, "Supervisor LLM request failed");
1452                continue;
1453            }
1454        };
1455
1456        let content = response
1457            .choices
1458            .first()
1459            .map(|c| c.message.content.clone())
1460            .unwrap_or_default();
1461
1462        // Check for FINAL: completion
1463        if content.contains("FINAL:") {
1464            let final_response = content
1465                .split("FINAL:")
1466                .nth(1)
1467                .unwrap_or("")
1468                .trim()
1469                .to_string();
1470            info!("Supervisor completed task: {} chars", final_response.len());
1471
1472            let _ = audit_log.append(
1473                AuditEventType::AgentFinish,
1474                "supervisor",
1475                "Supervisor completed task coordination",
1476                Some(serde_json::json!({
1477                    "iterations": iteration,
1478                    "subtasks_completed": subtask_results.len(),
1479                })),
1480            );
1481
1482            println!("\n🐦‍⬛ Supervisor Result:\n{}", final_response);
1483            return Ok(());
1484        }
1485
1486        // Check for SUBTASK: decomposition
1487        if content.contains("SUBTASK:") {
1488            let subtask_block = content.split("SUBTASK:").nth(1).unwrap_or("");
1489            let subtask_lines: Vec<&str> = subtask_block.lines().take(3).collect();
1490
1491            let subtask_desc = subtask_lines.first().unwrap_or(&"").trim();
1492            let agent_num = subtask_lines
1493                .iter()
1494                .find(|l| l.starts_with("AGENT:"))
1495                .and_then(|l| l.split(':').nth(1))
1496                .unwrap_or("1")
1497                .trim();
1498
1499            if !subtask_desc.is_empty() {
1500                info!("Subtask {}: {}", agent_num, subtask_desc);
1501
1502                // Execute subtask
1503                let subtask_result = run_subtask_agent(
1504                    llm.clone(),
1505                    subtask_desc,
1506                    system_prompt,
1507                    &policy_engine,
1508                    &sandbox,
1509                    &audit_log,
1510                    &registry,
1511                )
1512                .await;
1513
1514                match subtask_result {
1515                    Ok(result) => {
1516                        info!("Subtask {} completed: {} chars", agent_num, result.len());
1517                        subtask_results.push(format!("Agent {} result: {}", agent_num, result));
1518
1519                        memory.add_assistant_message(&format!(
1520                            "Decomposed subtask {}: {}",
1521                            agent_num, subtask_desc
1522                        ));
1523                        memory
1524                            .add_user_message(&format!("Subtask {} result: {}", agent_num, result));
1525                    }
1526                    Err(e) => {
1527                        warn!("Subtask {} failed: {}", agent_num, e);
1528                        memory
1529                            .add_assistant_message(&format!("Subtask {} failed: {}", agent_num, e));
1530                    }
1531                }
1532            }
1533        } else {
1534            memory.add_assistant_message(&content);
1535        }
1536    }
1537
1538    // Fallback: return aggregated results
1539    if !subtask_results.is_empty() {
1540        let aggregated = subtask_results.join("\n\n");
1541        info!(
1542            "Supervisor aggregated {} subtask results",
1543            subtask_results.len()
1544        );
1545
1546        // Broadcast supervisor result to RavenFabric if configured
1547        if let Some(ref rf) = ravenfabric {
1548            if rf.is_enabled() {
1549                let summary = format!(
1550                    "Supervisor completed: {} subtasks, result: {} chars",
1551                    subtask_results.len(),
1552                    aggregated.len()
1553                );
1554                let _ = rf.broadcast(&summary, 30).await;
1555                info!("Supervisor result broadcast to RavenFabric mesh");
1556            }
1557        }
1558
1559        println!("\n🐦‍⬛ Supervisor Aggregated Result:\n{}", aggregated);
1560        return Ok(());
1561    }
1562
1563    Err(crate::error::RavenClawsError::CommandExecution(
1564        "Supervisor mode completed without results".to_string(),
1565    ))
1566}
1567
1568/// Run a subtask agent — helper for supervisor mode
1569async fn run_subtask_agent(
1570    llm: Arc<dyn LLMProviderTrait>,
1571    subtask: &str,
1572    system_prompt: &str,
1573    policy_engine: &PolicyEngine,
1574    sandbox: &Sandbox,
1575    audit_log: &AuditLog,
1576    registry: &ToolRegistry,
1577) -> Result<String> {
1578    let mut memory = ConversationMemory::new(system_prompt, 10);
1579    memory.add_user_message(&format!("Execute this subtask: {}", subtask));
1580
1581    for i in 0..5 {
1582        let messages = memory.history().to_vec();
1583        let response = match llm.chat(messages).await {
1584            Ok(r) => r,
1585            Err(e) => {
1586                warn!(error = %e, iteration = i, "Subtask agent LLM failed");
1587                continue;
1588            }
1589        };
1590
1591        let content = response
1592            .choices
1593            .first()
1594            .map(|c| c.message.content.clone())
1595            .unwrap_or_default();
1596
1597        if content.contains("FINAL:") || content.contains("DONE:") {
1598            return Ok(content
1599                .replace("FINAL:", "")
1600                .replace("DONE:", "")
1601                .trim()
1602                .to_string());
1603        }
1604
1605        // Try tool execution
1606        if let Some(tool_result) =
1607            execute_tool_call_with_security(&content, registry, policy_engine, sandbox, audit_log)
1608                .await
1609        {
1610            memory.add_assistant_message(&content);
1611            memory.add_user_message(&format!("Tool result: {}", tool_result.output));
1612        } else {
1613            memory.add_assistant_message(&content);
1614            memory.add_user_message("Continue with next step.");
1615        }
1616    }
1617
1618    Ok("Subtask completed".to_string())
1619}
1620
1621/// Run a single autonomous agent (multi-model mode)
1622pub async fn run_single_multi(
1623    multi_llm: MultiModelManager,
1624    config: Config,
1625    ravenfabric: Option<RavenFabricClient>,
1626) -> Result<()> {
1627    info!(
1628        "Starting single agent mode (multi-model) with {} providers",
1629        multi_llm.client_count()
1630    );
1631
1632    // Perform RavenFabric health check if configured
1633    if let Some(ref rf) = ravenfabric {
1634        if rf.is_enabled() {
1635            info!("RavenFabric remote execution available");
1636            match rf.health().await {
1637                Ok(true) => info!("RavenFabric mesh is healthy"),
1638                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1639                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1640            }
1641        }
1642    }
1643
1644    let system_prompt = &config.llm.system_prompt;
1645
1646    let messages = vec![
1647        ChatMessage {
1648            role: "system".to_string(),
1649            content: system_prompt.to_string(),
1650        },
1651        ChatMessage {
1652            role: "user".to_string(),
1653            content: "Ready. Awaiting instructions.".to_string(),
1654        },
1655    ];
1656
1657    // Round-robin: start with first provider, then rotate
1658    let mut last_index = 0;
1659    for i in 0..multi_llm.client_count() {
1660        let client = if i == 0 {
1661            multi_llm.get_client(0)
1662        } else {
1663            multi_llm.next_client(last_index)
1664        };
1665
1666        if let Some(client) = client {
1667            match client.chat(messages.clone()).await {
1668                Ok(response) => {
1669                    if let Some(choice) = response.choices.first() {
1670                        info!(provider = client.provider_name(), model = client.model(), response = %choice.message.content, "Provider response received");
1671                    }
1672                }
1673                Err(e) => {
1674                    warn!(error = %e, provider = client.provider_name(), model = client.model(), "Provider request failed");
1675                }
1676            }
1677            last_index = i;
1678        }
1679    }
1680
1681    // Broadcast results to RavenFabric if configured
1682    if let Some(ref rf) = ravenfabric {
1683        if rf.is_enabled() {
1684            let _ = rf
1685                .broadcast("Single agent (multi-model) completed", 30)
1686                .await;
1687            info!("Multi-model result broadcast to RavenFabric mesh");
1688        }
1689    }
1690
1691    Ok(())
1692}
1693
1694/// Run multiple agents in swarm mode (multi-model) — v0.6
1695///
1696/// Swarm mode runs multiple agents in parallel, each using a different LLM provider
1697/// for the same task. Results are collected and compared for diversity.
1698pub async fn run_swarm_multi(
1699    multi_llm: MultiModelManager,
1700    config: Config,
1701    ravenfabric: Option<RavenFabricClient>,
1702) -> Result<()> {
1703    info!(
1704        "Starting swarm mode (multi-model) — {} parallel agents",
1705        multi_llm.client_count()
1706    );
1707
1708    // Perform RavenFabric health check if configured
1709    if let Some(ref rf) = ravenfabric {
1710        if rf.is_enabled() {
1711            info!("RavenFabric remote execution available for swarm coordination");
1712            match rf.health().await {
1713                Ok(true) => info!("RavenFabric mesh is healthy"),
1714                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1715                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1716            }
1717        }
1718    }
1719
1720    let _system_prompt = &config.llm.system_prompt;
1721    let num_agents = multi_llm.client_count().min(3); // Cap at 3 for cost control
1722    let mut handles = Vec::new();
1723
1724    // Different personas for each agent
1725    let personas = [
1726        "You are an analytical agent. Focus on logic, structure, and precision.",
1727        "You are a creative agent. Focus on innovation, alternatives, and possibilities.",
1728        "You are a pragmatic agent. Focus on simplicity, efficiency, and practicality.",
1729    ];
1730
1731    for i in 0..num_agents {
1732        let client = multi_llm.get_client(i).unwrap().clone();
1733        let persona = personas.get(i).unwrap_or(&personas[0]).to_string();
1734        let task = "Analyze the given task and provide your solution.".to_string();
1735
1736        let handle = tokio::spawn(async move {
1737            let mut memory = ConversationMemory::new(&persona, 10);
1738            memory.add_user_message(&task);
1739
1740            let messages = memory.history().to_vec();
1741            match client.chat(messages).await {
1742                Ok(response) => {
1743                    let content = response
1744                        .choices
1745                        .first()
1746                        .map(|c| c.message.content.clone())
1747                        .unwrap_or_default();
1748                    Ok((
1749                        i,
1750                        client.provider_name().to_string(),
1751                        client.model().to_string(),
1752                        content,
1753                    ))
1754                }
1755                Err(e) => Err(format!("Agent {} failed: {}", i, e)),
1756            }
1757        });
1758
1759        handles.push(handle);
1760    }
1761
1762    // Collect results
1763    let mut results: Vec<(usize, String, String, String)> = Vec::new();
1764    for handle in handles {
1765        match handle.await {
1766            Ok(Ok((idx, provider, model, result))) => {
1767                info!(
1768                    "Agent {} ({}:{}) completed: {} chars",
1769                    idx,
1770                    provider,
1771                    model,
1772                    result.len()
1773                );
1774                results.push((idx, provider, model, result));
1775            }
1776            Ok(Err(e)) => warn!("Agent failed: {}", e),
1777            Err(e) => warn!("Agent join failed: {}", e),
1778        }
1779    }
1780
1781    // Print swarm results
1782    println!(
1783        "\n🐦‍⬛ Swarm Results ({} agents, multi-model):",
1784        results.len()
1785    );
1786    for (idx, provider, model, result) in &results {
1787        println!("\n── Agent {} ({}:{}) ──", idx + 1, provider, model);
1788        println!("{}", result);
1789    }
1790
1791    // Broadcast swarm results to RavenFabric if configured
1792    if let Some(ref rf) = ravenfabric {
1793        if rf.is_enabled() {
1794            let summary = format!("Multi-model swarm completed: {} agents", results.len());
1795            let _ = rf.broadcast(&summary, 30).await;
1796            info!("Multi-model swarm results broadcast to RavenFabric mesh");
1797        }
1798    }
1799
1800    Ok(())
1801}
1802
1803/// Run supervisor agent coordinating sub-agents (multi-model) — v0.6
1804///
1805/// The supervisor decomposes a task and assigns subtasks to different providers
1806/// based on their strengths. Results are aggregated.
1807pub async fn run_supervisor_multi(
1808    multi_llm: MultiModelManager,
1809    config: Config,
1810    ravenfabric: Option<RavenFabricClient>,
1811) -> Result<()> {
1812    info!(
1813        "Starting supervisor mode (multi-model) with {} providers",
1814        multi_llm.client_count()
1815    );
1816
1817    // Perform RavenFabric health check if configured
1818    if let Some(ref rf) = ravenfabric {
1819        if rf.is_enabled() {
1820            info!("RavenFabric remote execution available for supervisor coordination");
1821            match rf.health().await {
1822                Ok(true) => info!("RavenFabric mesh is healthy"),
1823                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1824                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1825            }
1826        }
1827    }
1828
1829    let system_prompt = &config.llm.system_prompt;
1830    let policy_engine = PolicyEngine::default_secure();
1831    let mut sandbox = Sandbox::default();
1832    sandbox.init().await.map_err(|e| {
1833        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
1834    })?;
1835    let audit_log = AuditLog::new(format!("supervisor-multi-{}", std::process::id()));
1836    let registry = ToolRegistry::with_default_tools();
1837
1838    // Supervisor prompt with multi-model awareness
1839    let supervisor_prompt = format!(
1840        "You are a supervisor agent coordinating multiple LLM providers. \
1841         Decompose tasks and assign them to appropriate providers based on their strengths. \
1842         \n\nFor each subtask, respond with:\n\
1843         SUBTASK: <description>\n\
1844         PROVIDER: <provider_index 0-{}>\n\
1845         \nWhen complete, respond with:\n\
1846         FINAL: <aggregated result>\n\
1847         \nTask: {}",
1848        multi_llm.client_count() - 1,
1849        "Coordinate the completion of the assigned task using available providers."
1850    );
1851
1852    let mut memory = ConversationMemory::new(system_prompt, 20);
1853    memory.add_user_message(&supervisor_prompt);
1854
1855    let mut subtask_results: Vec<String> = Vec::new();
1856    let mut iteration = 0;
1857    let max_iterations = 15;
1858
1859    loop {
1860        iteration += 1;
1861        if iteration > max_iterations {
1862            warn!("Supervisor reached max iterations");
1863            break;
1864        }
1865
1866        // Use round-robin for supervisor itself
1867        let supervisor_client = multi_llm
1868            .get_client(iteration % multi_llm.client_count())
1869            .or_else(|| multi_llm.get_client(0))
1870            .cloned();
1871
1872        let messages = memory.history().to_vec();
1873        let response =
1874            match supervisor_client.map(|c| tokio::spawn(async move { c.chat(messages).await })) {
1875                Some(handle) => match handle.await {
1876                    Ok(Ok(r)) => r,
1877                    Ok(Err(e)) => {
1878                        warn!(error = %e, "Supervisor LLM request failed");
1879                        continue;
1880                    }
1881                    Err(e) => {
1882                        warn!(error = %e, "Supervisor task join failed");
1883                        continue;
1884                    }
1885                },
1886                None => {
1887                    warn!("No LLM clients available");
1888                    break;
1889                }
1890            };
1891
1892        let content = response
1893            .choices
1894            .first()
1895            .map(|c| c.message.content.clone())
1896            .unwrap_or_default();
1897
1898        // Check for FINAL: completion
1899        if content.contains("FINAL:") {
1900            let final_response = content
1901                .split("FINAL:")
1902                .nth(1)
1903                .unwrap_or("")
1904                .trim()
1905                .to_string();
1906            info!("Supervisor completed task: {} chars", final_response.len());
1907
1908            let _ = audit_log.append(
1909                AuditEventType::AgentFinish,
1910                "supervisor",
1911                "Supervisor completed task coordination",
1912                Some(serde_json::json!({
1913                    "iterations": iteration,
1914                    "subtasks_completed": subtask_results.len(),
1915                    "providers_used": multi_llm.client_count(),
1916                })),
1917            );
1918
1919            println!("\n🐦‍⬛ Supervisor Result (multi-model):\n{}", final_response);
1920            return Ok(());
1921        }
1922
1923        // Check for SUBTASK: decomposition
1924        if content.contains("SUBTASK:") && content.contains("PROVIDER:") {
1925            let subtask_block = content.split("SUBTASK:").nth(1).unwrap_or("");
1926            let subtask_lines: Vec<&str> = subtask_block.lines().take(4).collect();
1927
1928            let subtask_desc = subtask_lines.first().unwrap_or(&"").trim();
1929            let provider_idx = subtask_lines
1930                .iter()
1931                .find(|l| l.starts_with("PROVIDER:"))
1932                .and_then(|l| l.split(':').nth(1))
1933                .and_then(|s| s.trim().parse::<usize>().ok())
1934                .unwrap_or(0);
1935
1936            if !subtask_desc.is_empty() {
1937                info!("Subtask for provider {}: {}", provider_idx, subtask_desc);
1938
1939                let client = multi_llm
1940                    .get_client(provider_idx)
1941                    .or_else(|| multi_llm.get_client(0));
1942
1943                if let Some(client) = client {
1944                    let subtask_result = run_subtask_agent(
1945                        client.clone(),
1946                        subtask_desc,
1947                        system_prompt,
1948                        &policy_engine,
1949                        &sandbox,
1950                        &audit_log,
1951                        &registry,
1952                    )
1953                    .await;
1954
1955                    match subtask_result {
1956                        Ok(result) => {
1957                            info!("Subtask {} completed: {} chars", provider_idx, result.len());
1958                            subtask_results.push(format!(
1959                                "Provider {} ({}): {}",
1960                                provider_idx,
1961                                client.provider_name(),
1962                                result
1963                            ));
1964
1965                            memory.add_assistant_message(&format!(
1966                                "Assigned subtask to provider {}: {}",
1967                                provider_idx, subtask_desc
1968                            ));
1969                            memory.add_user_message(&format!(
1970                                "Provider {} result: {}",
1971                                provider_idx, result
1972                            ));
1973                        }
1974                        Err(e) => {
1975                            warn!("Subtask {} failed: {}", provider_idx, e);
1976                            memory.add_assistant_message(&format!(
1977                                "Provider {} subtask failed: {}",
1978                                provider_idx, e
1979                            ));
1980                        }
1981                    }
1982                }
1983            }
1984        } else {
1985            memory.add_assistant_message(&content);
1986        }
1987    }
1988
1989    // Fallback: return aggregated results
1990    if !subtask_results.is_empty() {
1991        let aggregated = subtask_results.join("\n\n");
1992        info!(
1993            "Supervisor aggregated {} subtask results",
1994            subtask_results.len()
1995        );
1996
1997        // Broadcast supervisor result to RavenFabric if configured
1998        if let Some(ref rf) = ravenfabric {
1999            if rf.is_enabled() {
2000                let summary = format!(
2001                    "Multi-model supervisor completed: {} subtasks, result: {} chars",
2002                    subtask_results.len(),
2003                    aggregated.len()
2004                );
2005                let _ = rf.broadcast(&summary, 30).await;
2006                info!("Multi-model supervisor result broadcast to RavenFabric mesh");
2007            }
2008        }
2009
2010        println!(
2011            "\n🐦‍⬛ Supervisor Aggregated Result (multi-model):\n{}",
2012            aggregated
2013        );
2014        return Ok(());
2015    }
2016
2017    Err(crate::error::RavenClawsError::CommandExecution(
2018        "Supervisor mode completed without results".to_string(),
2019    ))
2020}
2021
2022/// Run interactive REPL mode
2023pub async fn run_repl(llm: Arc<dyn LLMProviderTrait>, config: Config) -> Result<()> {
2024    use tokio::io::{AsyncBufReadExt, BufReader};
2025
2026    info!("Starting interactive REPL mode");
2027
2028    let system_prompt = &config.llm.system_prompt;
2029    let mut memory = ConversationMemory::new(system_prompt, 0);
2030
2031    let stdin = BufReader::new(tokio::io::stdin());
2032    let mut lines = stdin.lines();
2033
2034    println!("RavenClaws REPL — type /exit to quit, /reset to clear history");
2035
2036    loop {
2037        print!("\n> ");
2038        use tokio::io::AsyncWriteExt;
2039        tokio::io::stdout().flush().await?;
2040
2041        let line = match lines.next_line().await {
2042            Ok(Some(l)) => l,
2043            Ok(None) => break, // EOF
2044            Err(e) => {
2045                warn!(error = %e, "REPL read error");
2046                break;
2047            }
2048        };
2049
2050        let input = line.trim();
2051
2052        if input.is_empty() {
2053            continue;
2054        }
2055
2056        match input {
2057            "/exit" | "/quit" => {
2058                println!("Exiting REPL.");
2059                break;
2060            }
2061            "/reset" => {
2062                memory = ConversationMemory::new(system_prompt, 0);
2063                println!("Conversation history reset.");
2064                continue;
2065            }
2066            _ => {}
2067        }
2068
2069        memory.add_user_message(input);
2070        let messages = memory.history().to_vec();
2071
2072        match llm.chat(messages).await {
2073            Ok(response) => {
2074                if let Some(choice) = response.choices.first() {
2075                    let content = &choice.message.content;
2076                    println!("{}", content);
2077                    memory.add_assistant_message(content);
2078                }
2079            }
2080            Err(e) => {
2081                warn!(error = %e, "LLM request failed");
2082                println!("Error: {}", e);
2083            }
2084        }
2085    }
2086
2087    Ok(())
2088}
2089
2090#[cfg(test)]
2091mod tests {
2092    use super::*;
2093
2094    #[test]
2095    fn test_swarm_function_exists() {
2096        // Verify swarm function signature compiles
2097        let _fn_ptr: fn(Arc<dyn LLMProviderTrait>, Config, Option<RavenFabricClient>) -> _ =
2098            run_swarm;
2099    }
2100
2101    #[test]
2102    fn test_supervisor_function_exists() {
2103        // Verify supervisor function signature compiles
2104        let _fn_ptr: fn(Arc<dyn LLMProviderTrait>, Config, Option<RavenFabricClient>) -> _ =
2105            run_supervisor;
2106    }
2107
2108    #[test]
2109    fn test_conversation_memory_new() {
2110        let mem = ConversationMemory::new("system prompt", 10);
2111        assert_eq!(mem.messages.len(), 1);
2112        assert_eq!(mem.messages[0].role, "system");
2113        assert_eq!(mem.messages[0].content, "system prompt");
2114    }
2115
2116    #[test]
2117    fn test_conversation_memory_add_user() {
2118        let mut mem = ConversationMemory::new("system", 10);
2119        mem.add_user_message("hello");
2120        assert_eq!(mem.messages.len(), 2);
2121        assert_eq!(mem.messages[1].role, "user");
2122        assert_eq!(mem.messages[1].content, "hello");
2123    }
2124
2125    #[test]
2126    fn test_conversation_memory_trim() {
2127        let mut mem = ConversationMemory::new("system", 3);
2128        mem.add_user_message("msg1");
2129        mem.add_assistant_message("resp1");
2130        mem.add_user_message("msg2");
2131        mem.add_assistant_message("resp2");
2132        // Should trim to keep system + 2 messages
2133        assert!(mem.messages.len() <= 3);
2134    }
2135
2136    #[test]
2137    fn test_parse_tool_call_valid() {
2138        let content = "THOUGHT: I need to run a command\nTOOL_CALL: shell_exec\nARGS: {\"command\": \"echo hello\"}";
2139        let (name, args) = parse_tool_call(content).unwrap();
2140        assert_eq!(name, "shell_exec");
2141        assert_eq!(args["command"], "echo hello");
2142    }
2143
2144    #[test]
2145    fn test_parse_tool_call_missing_tool() {
2146        let content = "THOUGHT: no tool here";
2147        assert!(parse_tool_call(content).is_none());
2148    }
2149
2150    #[test]
2151    fn test_parse_tool_call_missing_args() {
2152        let content = "TOOL_CALL: shell_exec\nNo args line";
2153        assert!(parse_tool_call(content).is_none());
2154    }
2155
2156    #[test]
2157    fn test_parse_tool_call_invalid_json() {
2158        let content = "TOOL_CALL: shell_exec\nARGS: not valid json";
2159        assert!(parse_tool_call(content).is_none());
2160    }
2161
2162    #[test]
2163    fn test_agent_loop_config_default() {
2164        let config = AgentLoopConfig::default();
2165        assert_eq!(config.max_iterations, 10);
2166        assert!(!config.enable_tools);
2167        assert!(!config.require_approval);
2168    }
2169
2170    #[test]
2171    fn test_agent_loop_config_require_approval() {
2172        let config = AgentLoopConfig {
2173            max_iterations: 5,
2174            enable_tools: true,
2175            require_approval: true,
2176            prompt_injection_protection: true,
2177            token_lifetime_secs: 0,
2178            no_final_required: false,
2179            fallback_chain: None,
2180            token_budget: None,
2181            ravenfabric: None,
2182        };
2183        assert_eq!(config.max_iterations, 5);
2184        assert!(config.enable_tools);
2185        assert!(config.require_approval);
2186        assert!(config.prompt_injection_protection);
2187        assert_eq!(config.token_lifetime_secs, 0);
2188    }
2189
2190    #[test]
2191    fn test_prompt_for_approval_yes() {
2192        let args = serde_json::json!({"command": "echo hello"});
2193        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "y"));
2194        assert!(result, "Should approve for 'y'");
2195    }
2196
2197    #[test]
2198    fn test_prompt_for_approval_yes_full() {
2199        let args = serde_json::json!({"command": "echo hello"});
2200        let result =
2201            tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "yes"));
2202        assert!(result, "Should approve for 'yes'");
2203    }
2204
2205    #[test]
2206    fn test_prompt_for_approval_no() {
2207        let args = serde_json::json!({"command": "echo hello"});
2208        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "n"));
2209        assert!(!result, "Should deny for 'n'");
2210    }
2211
2212    #[test]
2213    fn test_prompt_for_approval_no_full() {
2214        let args = serde_json::json!({"command": "echo hello"});
2215        let result =
2216            tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "no"));
2217        assert!(!result, "Should deny for 'no'");
2218    }
2219
2220    #[test]
2221    fn test_prompt_for_approval_empty() {
2222        let args = serde_json::json!({"command": "echo hello"});
2223        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, ""));
2224        assert!(!result, "Should deny for empty input (default N)");
2225    }
2226
2227    #[test]
2228    fn test_prompt_for_approval_uppercase() {
2229        let args = serde_json::json!({"command": "echo hello"});
2230        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "Y"));
2231        assert!(result, "Should approve for uppercase 'Y'");
2232    }
2233
2234    #[test]
2235    fn test_prompt_for_approval_auto_approves_non_tty() {
2236        // When stdin is not a TTY (e.g., piped), prompt_for_approval auto-approves.
2237        // This test is only meaningful in CI/non-TTY environments.
2238        // In a TTY (interactive terminal), this test is skipped because it would
2239        // block waiting for stdin input.
2240        // We verify the behavior by checking the function signature compiles.
2241        #[allow(clippy::let_underscore_future)]
2242        let _ = prompt_for_approval_with_input("test", &serde_json::json!({}), "y");
2243    }
2244
2245    #[test]
2246    fn test_execute_parsed_tool_call_skips_approval_when_not_required() {
2247        let registry = ToolRegistry::with_default_tools();
2248        let policy_engine = PolicyEngine::default_secure();
2249        let sandbox = Sandbox::default();
2250        let audit_log = AuditLog::new("test-session".to_string());
2251
2252        let args = serde_json::json!({"command": "echo hello"});
2253        let result = tokio_test::block_on(execute_parsed_tool_call(
2254            "shell_exec".to_string(),
2255            args,
2256            &registry,
2257            &policy_engine,
2258            &sandbox,
2259            &audit_log,
2260            false, // require_approval = false
2261        ));
2262
2263        assert!(result.is_some());
2264        let tool_result = result.unwrap();
2265        assert_eq!(tool_result.tool_name, "shell_exec");
2266    }
2267
2268    #[test]
2269    fn test_execute_parsed_tool_call_approval_not_needed_for_read_only_tools() {
2270        // read_file does not require approval per policy, so even with
2271        // require_approval=true, it should execute without prompting
2272        let registry = ToolRegistry::with_default_tools();
2273        let policy_engine = PolicyEngine::default_secure();
2274        let sandbox = Sandbox::default();
2275        let audit_log = AuditLog::new("test-session".to_string());
2276
2277        let args = serde_json::json!({"path": "/tmp/test.txt"});
2278        let result = tokio_test::block_on(execute_parsed_tool_call(
2279            "read_file".to_string(),
2280            args,
2281            &registry,
2282            &policy_engine,
2283            &sandbox,
2284            &audit_log,
2285            true, // require_approval = true
2286        ));
2287
2288        // read_file doesn't require approval, so it should proceed
2289        assert!(result.is_some());
2290        let tool_result = result.unwrap();
2291        assert_eq!(tool_result.tool_name, "read_file");
2292    }
2293
2294    #[test]
2295    fn test_agent_loop_config_token_lifetime_zero_disabled() {
2296        let config = AgentLoopConfig {
2297            max_iterations: 10,
2298            enable_tools: false,
2299            require_approval: false,
2300            prompt_injection_protection: false,
2301            token_lifetime_secs: 0,
2302            no_final_required: false,
2303            fallback_chain: None,
2304            token_budget: None,
2305            ravenfabric: None,
2306        };
2307        assert_eq!(config.token_lifetime_secs, 0);
2308        // 0 means unlimited — no timeout enforced
2309    }
2310
2311    #[test]
2312    fn test_agent_loop_config_token_lifetime_nonzero() {
2313        let config = AgentLoopConfig {
2314            max_iterations: 10,
2315            enable_tools: false,
2316            require_approval: false,
2317            prompt_injection_protection: false,
2318            token_lifetime_secs: 3600,
2319            no_final_required: false,
2320            fallback_chain: None,
2321            token_budget: None,
2322            ravenfabric: None,
2323        };
2324        assert_eq!(config.token_lifetime_secs, 3600);
2325    }
2326
2327    #[test]
2328    fn test_agent_loop_config_default_includes_token_lifetime() {
2329        let config = AgentLoopConfig::default();
2330        assert_eq!(config.token_lifetime_secs, 0);
2331    }
2332}
ravenclaws/agent.rs

ravenclaws/
agent.rs