ravenclaws/
agent.rs

1//! RavenClaws
2//!
3//! Supports single-provider and multi-model (multi-provider) modes.
4//! Security-integrated: PolicyEngine, Sandbox, and AuditLog wired to agent loop.
5
6use crate::audit::{AuditEventType, AuditLog};
7use crate::config::Config;
8use crate::error::Result;
9use crate::llm::{
10    ChatMessage, Choice, LLMProviderTrait, MultiModelManager, ProviderFallbackChain, RetryConfig,
11    TokenBudget,
12};
13use crate::mcp::McpClient;
14use crate::policy::{Decision, PolicyEngine};
15use crate::ravenfabric::RavenFabricClient;
16use crate::sandbox::Sandbox;
17use crate::tools::{ToolCall, ToolRegistry, ToolResult};
18use serde::{Deserialize, Serialize};
19use std::path::PathBuf;
20use std::sync::Arc;
21use tokio::sync::RwLock;
22use tracing::{debug, info, instrument, warn};
23
24/// In-memory conversation memory — stores message history for the session.
25///
26/// With durable execution, messages can be serialized to disk between iterations
27/// so the agent loop can survive process restarts.
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct ConversationMemory {
30    /// Maximum number of messages to retain (0 = unlimited)
31    max_messages: usize,
32    /// Stored message history
33    messages: Vec<ChatMessage>,
34}
35
36impl ConversationMemory {
37    /// Create a new conversation memory with the given system prompt.
38    /// `max_messages` caps history length (oldest user/assistant pairs are dropped first).
39    pub fn new(system_prompt: &str, max_messages: usize) -> Self {
40        Self {
41            max_messages,
42            messages: vec![ChatMessage::new("system", system_prompt.to_string())],
43        }
44    }
45
46    /// Add a user message and return the full message history for an LLM call.
47    pub fn add_user_message(&mut self, content: &str) -> &[ChatMessage] {
48        self.messages
49            .push(ChatMessage::new("user", content.to_string()));
50        self.trim_to_max();
51        &self.messages
52    }
53
54    /// Add a multi-modal user message with image attachments.
55    pub fn add_user_message_with_images(
56        &mut self,
57        text: &str,
58        image_data_uris: Vec<String>,
59    ) -> &[ChatMessage] {
60        self.messages.push(ChatMessage::with_images(
61            "user",
62            text.to_string(),
63            image_data_uris,
64        ));
65        self.trim_to_max();
66        &self.messages
67    }
68
69    /// Add an assistant message to history.
70    pub fn add_assistant_message(&mut self, content: &str) {
71        self.messages
72            .push(ChatMessage::new("assistant", content.to_string()));
73        self.trim_to_max();
74    }
75
76    /// Get the current message history.
77    pub fn history(&self) -> &[ChatMessage] {
78        &self.messages
79    }
80
81    /// Create a ConversationMemory from an existing message history.
82    /// Used when restoring from a checkpoint.
83    pub fn from_history(messages: Vec<ChatMessage>, max_messages: usize) -> Self {
84        Self {
85            max_messages,
86            messages,
87        }
88    }
89
90    /// Get the number of messages in history.
91    #[allow(dead_code)]
92    pub fn len(&self) -> usize {
93        self.messages.len()
94    }
95
96    /// Check if history is empty (only system prompt or nothing).
97    #[allow(dead_code)]
98    pub fn is_empty(&self) -> bool {
99        self.messages.is_empty()
100    }
101
102    /// Trim oldest non-system messages when over the limit.
103    fn trim_to_max(&mut self) {
104        if self.max_messages == 0 {
105            return;
106        }
107        while self.messages.len() > self.max_messages {
108            // Remove the oldest non-system message (index 1, since index 0 is system)
109            if self.messages.len() > 1 {
110                self.messages.remove(1);
111            } else {
112                break;
113            }
114        }
115    }
116}
117
118/// Checkpoint state for durable execution — captures agent loop state between iterations.
119///
120/// This struct is serialized to disk after each iteration so the agent loop can
121/// survive process restarts. On resume, the checkpoint is loaded and the loop
122/// continues from where it left off.
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct CheckpointState {
125    /// Unique session identifier
126    pub session_id: String,
127    /// Current iteration number
128    pub iteration: usize,
129    /// Maximum iterations configured for this loop
130    pub max_iterations: usize,
131    /// Serialized conversation memory (message history)
132    pub messages: Vec<ChatMessage>,
133    /// The initial prompt that started this session
134    pub initial_prompt: String,
135    /// The system prompt for this session
136    pub system_prompt: String,
137    /// Provider name used for this session
138    pub provider: String,
139    /// Model name used for this session
140    pub model: String,
141    /// Whether tools were enabled
142    pub enable_tools: bool,
143    /// Timestamp of last checkpoint (ISO 8601)
144    pub last_checkpoint: String,
145}
146
147impl CheckpointState {
148    /// Create a new checkpoint state from current agent loop state.
149    #[allow(clippy::too_many_arguments)]
150    pub fn new(
151        session_id: String,
152        iteration: usize,
153        max_iterations: usize,
154        messages: Vec<ChatMessage>,
155        initial_prompt: &str,
156        system_prompt: &str,
157        provider: &str,
158        model: &str,
159        enable_tools: bool,
160    ) -> Self {
161        Self {
162            session_id,
163            iteration,
164            max_iterations,
165            messages,
166            initial_prompt: initial_prompt.to_string(),
167            system_prompt: system_prompt.to_string(),
168            provider: provider.to_string(),
169            model: model.to_string(),
170            enable_tools,
171            last_checkpoint: chrono::Utc::now().to_rfc3339(),
172        }
173    }
174}
175
176/// Save a checkpoint to disk.
177///
178/// Writes the checkpoint state as a JSON file to `{checkpoint_dir}/{session_id}.json`.
179/// Returns the path that was written to, or `None` if checkpointing is not configured.
180pub fn save_checkpoint(
181    checkpoint_dir: &std::path::Path,
182    state: &CheckpointState,
183) -> std::result::Result<std::path::PathBuf, String> {
184    let path = checkpoint_dir.join(format!("{}.json", state.session_id));
185
186    // Ensure the checkpoint directory exists
187    std::fs::create_dir_all(checkpoint_dir)
188        .map_err(|e| format!("Failed to create checkpoint directory: {}", e))?;
189
190    let content = serde_json::to_string_pretty(state)
191        .map_err(|e| format!("Failed to serialize checkpoint: {}", e))?;
192
193    // Write atomically: write to temp file, then rename
194    let tmp_path = path.with_extension("json.tmp");
195    std::fs::write(&tmp_path, &content)
196        .map_err(|e| format!("Failed to write checkpoint: {}", e))?;
197    std::fs::rename(&tmp_path, &path)
198        .map_err(|e| format!("Failed to finalize checkpoint: {}", e))?;
199
200    Ok(path)
201}
202
203/// Load a checkpoint from disk.
204///
205/// Reads the checkpoint state from `{checkpoint_dir}/{session_id}.json`.
206/// Returns `None` if the checkpoint file doesn't exist or can't be read.
207pub fn load_checkpoint(
208    checkpoint_dir: &std::path::Path,
209    session_id: &str,
210) -> Option<CheckpointState> {
211    let path = checkpoint_dir.join(format!("{}.json", session_id));
212
213    match std::fs::read_to_string(&path) {
214        Ok(content) => match serde_json::from_str::<CheckpointState>(&content) {
215            Ok(state) => {
216                info!(
217                    session_id = %session_id,
218                    iteration = state.iteration,
219                    max_iterations = state.max_iterations,
220                    "Loaded checkpoint"
221                );
222                Some(state)
223            }
224            Err(e) => {
225                warn!(
226                    session_id = %session_id,
227                    error = %e,
228                    "Failed to deserialize checkpoint"
229                );
230                None
231            }
232        },
233        Err(e) => {
234            if e.kind() != std::io::ErrorKind::NotFound {
235                warn!(
236                    session_id = %session_id,
237                    error = %e,
238                    "Failed to read checkpoint"
239                );
240            }
241            None
242        }
243    }
244}
245
246/// Delete a checkpoint file from disk.
247///
248/// Called when the agent loop completes successfully or fails definitively.
249pub fn delete_checkpoint(checkpoint_dir: &std::path::Path, session_id: &str) {
250    let path = checkpoint_dir.join(format!("{}.json", session_id));
251    if path.exists() {
252        if let Err(e) = std::fs::remove_file(&path) {
253            warn!(
254                session_id = %session_id,
255                error = %e,
256                "Failed to delete checkpoint"
257            );
258        } else {
259            debug!(
260                session_id = %session_id,
261                "Deleted checkpoint"
262            );
263        }
264    }
265}
266
267/// Agent loop configuration
268///
269/// Note: `Debug` and `Clone` are implemented manually because `metrics_callback`
270/// is a boxed closure that doesn't implement either trait.
271pub struct AgentLoopConfig {
272    /// Maximum iterations before forcing completion
273    pub max_iterations: usize,
274    /// Whether to enable tool calling
275    pub enable_tools: bool,
276    /// Require human approval for tool calls
277    pub require_approval: bool,
278    /// Enable prompt-injection defense on LLM responses
279    pub prompt_injection_protection: bool,
280    /// Maximum session lifetime in seconds (0 = unlimited)
281    /// When non-zero, the agent loop will stop after this duration
282    /// to enforce credential/session expiry.
283    pub token_lifetime_secs: u64,
284    /// When true, treat any non-tool-call response as completion (no FINAL: required)
285    pub no_final_required: bool,
286    /// Optional provider fallback chain — tries providers in order on failure
287    pub fallback_chain: Option<Arc<std::sync::Mutex<ProviderFallbackChain>>>,
288    /// Optional token budget — limits total tokens used per session
289    pub token_budget: Option<Arc<std::sync::Mutex<TokenBudget>>>,
290    /// Optional RavenFabric client — reports agent status and results to mesh
291    pub ravenfabric: Option<RavenFabricClient>,
292    /// Optional checkpoint directory for durable execution.
293    /// When set, the agent loop saves state after each iteration and can resume
294    /// from the latest checkpoint if interrupted.
295    pub checkpoint_dir: Option<PathBuf>,
296    /// Unique session ID for checkpointing.
297    /// If not set but checkpoint_dir is set, a UUID is generated automatically.
298    pub session_id: Option<String>,
299    /// Optional callback for recording metrics (token usage, tool calls).
300    /// Called with (tokens_used, tool_calls_count) after each iteration.
301    /// This allows the HTTP server to wire ServerMetrics without coupling agent.rs to server.rs.
302    pub metrics_callback: Option<Box<dyn Fn(u64, u64) + Send + Sync>>,
303
304    /// Optional load manager for graceful degradation.
305    /// When set, the agent loop checks admission before LLM calls and records outcomes.
306    pub load_manager: Option<Arc<crate::load::LoadManager>>,
307
308    /// Optional retry configuration for LLM calls.
309    /// When set, transient LLM failures are retried with exponential backoff
310    /// before falling back to the provider fallback chain.
311    /// Default: None (no retry — uses fallback chain directly).
312    pub retry_config: Option<RetryConfig>,
313}
314
315impl Default for AgentLoopConfig {
316    fn default() -> Self {
317        Self {
318            max_iterations: 10,
319            enable_tools: false,
320            require_approval: false,
321            prompt_injection_protection: true,
322            token_lifetime_secs: 0,
323            no_final_required: true,
324            fallback_chain: None,
325            token_budget: None,
326            ravenfabric: None,
327            checkpoint_dir: None,
328            session_id: None,
329            metrics_callback: None,
330            load_manager: None,
331            retry_config: None,
332        }
333    }
334}
335
336// Manual Debug implementation — skips metrics_callback (boxed closure doesn't impl Debug)
337impl std::fmt::Debug for AgentLoopConfig {
338    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
339        f.debug_struct("AgentLoopConfig")
340            .field("max_iterations", &self.max_iterations)
341            .field("enable_tools", &self.enable_tools)
342            .field("require_approval", &self.require_approval)
343            .field(
344                "prompt_injection_protection",
345                &self.prompt_injection_protection,
346            )
347            .field("token_lifetime_secs", &self.token_lifetime_secs)
348            .field("no_final_required", &self.no_final_required)
349            .field("fallback_chain", &self.fallback_chain)
350            .field("token_budget", &self.token_budget)
351            .field("ravenfabric", &self.ravenfabric)
352            .field("checkpoint_dir", &self.checkpoint_dir)
353            .field("session_id", &self.session_id)
354            .field(
355                "metrics_callback",
356                &self.metrics_callback.as_ref().map(|_| "Box<Fn>"),
357            )
358            .field(
359                "load_manager",
360                &self.load_manager.as_ref().map(|_| "Arc<LoadManager>"),
361            )
362            .finish()
363    }
364}
365
366// Manual Clone implementation — metrics_callback is NOT cloned (intentionally dropped)
367// because the callback is only needed by the original caller (e.g., HTTP server).
368impl Clone for AgentLoopConfig {
369    fn clone(&self) -> Self {
370        Self {
371            max_iterations: self.max_iterations,
372            enable_tools: self.enable_tools,
373            require_approval: self.require_approval,
374            prompt_injection_protection: self.prompt_injection_protection,
375            token_lifetime_secs: self.token_lifetime_secs,
376            no_final_required: self.no_final_required,
377            fallback_chain: self.fallback_chain.clone(),
378            token_budget: self.token_budget.clone(),
379            ravenfabric: self.ravenfabric.clone(),
380            checkpoint_dir: self.checkpoint_dir.clone(),
381            session_id: self.session_id.clone(),
382            metrics_callback: None,
383            load_manager: self.load_manager.clone(),
384            retry_config: self.retry_config.clone(),
385        }
386    }
387}
388
389/// Run the agent loop with security integration (PolicyEngine + Sandbox + AuditLog)
390///
391/// This is the security-integrated version that:
392/// 1. Checks all tool calls against PolicyEngine before execution
393/// 2. Executes shell commands in the Sandbox
394/// 3. Logs all tool calls, policy decisions, and results to AuditLog
395#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
396pub async fn run_agent_loop(
397    llm: Arc<dyn LLMProviderTrait>,
398    initial_prompt: &str,
399    system_prompt: &str,
400    config: AgentLoopConfig,
401) -> Result<String> {
402    run_agent_loop_with_registry(llm, initial_prompt, system_prompt, config, None).await
403}
404
405/// Run the agent loop with an optional pre-configured ToolRegistry
406///
407/// This allows callers to pass a registry with custom tool configurations
408/// (e.g., configured web search endpoint). If `None` is passed, default tools are used.
409#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
410pub async fn run_agent_loop_with_registry(
411    llm: Arc<dyn LLMProviderTrait>,
412    initial_prompt: &str,
413    system_prompt: &str,
414    config: AgentLoopConfig,
415    tool_registry: Option<ToolRegistry>,
416) -> Result<String> {
417    let registry = tool_registry.unwrap_or_else(ToolRegistry::with_default_tools);
418    run_agent_loop_inner(
419        llm,
420        initial_prompt,
421        system_prompt,
422        config,
423        registry,
424        "security integration",
425        false,
426        Vec::new(),
427    )
428    .await
429}
430
431/// Run the agent loop with multi-modal image input.
432///
433/// Accepts a list of base64-encoded image data URIs that will be attached
434/// to the initial user message. Supported formats: PNG, JPEG, GIF, WebP.
435#[allow(dead_code)]
436#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model(), image_count = image_data_uris.len()))]
437pub async fn run_agent_loop_with_images(
438    llm: Arc<dyn LLMProviderTrait>,
439    initial_prompt: &str,
440    system_prompt: &str,
441    config: AgentLoopConfig,
442    tool_registry: Option<ToolRegistry>,
443    image_data_uris: Vec<String>,
444) -> Result<String> {
445    let registry = tool_registry.unwrap_or_else(ToolRegistry::with_default_tools);
446    run_agent_loop_inner(
447        llm,
448        initial_prompt,
449        system_prompt,
450        config,
451        registry,
452        "security integration",
453        false,
454        image_data_uris,
455    )
456    .await
457}
458
459/// Call the LLM with retry logic (exponential backoff).
460///
461/// Retries on transient errors (RequestFailed, RateLimited, CircuitBreakerOpen)
462/// but NOT on non-transient errors (AuthFailed, TokenBudgetExceeded, InvalidResponse).
463/// Checkpoints are preserved during retries — only deleted on permanent failure.
464///
465/// # Parameters
466///
467/// * `llm` — The LLM provider to call.
468/// * `messages` — The message history to send.
469/// * `retry_config` — Optional retry configuration. If `None`, no retry is attempted.
470/// * `audit_log` — Audit log for recording retry events.
471/// * `session_id` — Session ID for checkpoint management.
472/// * `checkpoint_dir` — Optional checkpoint directory (checkpoints preserved during retries).
473/// * `iteration` — Current agent loop iteration (for logging).
474async fn call_llm_with_retry(
475    llm: &Arc<dyn LLMProviderTrait>,
476    messages: Vec<ChatMessage>,
477    retry_config: Option<&RetryConfig>,
478    audit_log: &AuditLog,
479    session_id: &str,
480    checkpoint_dir: &Option<PathBuf>,
481    iteration: usize,
482) -> std::result::Result<crate::llm::ChatResponse, crate::llm::LLMError> {
483    let max_attempts = match retry_config {
484        Some(cfg) => cfg.max_retries + 1, // +1 for the initial attempt
485        None => 1,
486    };
487
488    let mut last_error = None;
489
490    for attempt in 0..max_attempts {
491        if attempt > 0 {
492            let delay = retry_config.unwrap().delay_for_attempt(attempt - 1);
493            info!(
494                attempt = attempt + 1,
495                max_attempts = max_attempts,
496                delay_ms = delay.as_millis(),
497                iteration = iteration,
498                "Retrying LLM call after transient error"
499            );
500            tokio::time::sleep(delay).await;
501        }
502
503        match llm.chat(messages.clone()).await {
504            Ok(response) => {
505                if attempt > 0 {
506                    info!(
507                        attempt = attempt + 1,
508                        iteration = iteration,
509                        "LLM call succeeded on retry"
510                    );
511                    let _ = audit_log.append(
512                        AuditEventType::Custom("Info".to_string()),
513                        "llm_retry",
514                        &format!("LLM call succeeded on retry attempt {}", attempt + 1),
515                        None,
516                    );
517                }
518                return Ok(response);
519            }
520            Err(e) => {
521                let is_transient = matches!(
522                    &e,
523                    crate::llm::LLMError::RequestFailed(_)
524                        | crate::llm::LLMError::RateLimited
525                        | crate::llm::LLMError::CircuitBreakerOpen(_)
526                );
527
528                if is_transient && attempt + 1 < max_attempts {
529                    warn!(
530                        error = %e,
531                        attempt = attempt + 1,
532                        max_attempts = max_attempts,
533                        iteration = iteration,
534                        "Transient LLM error, will retry"
535                    );
536                    let _ = audit_log.append(
537                        AuditEventType::Error,
538                        "llm_retry",
539                        &format!("Transient LLM error on attempt {}: {}", attempt + 1, e),
540                        None,
541                    );
542                    last_error = Some(e);
543                    // Checkpoint is NOT deleted here — preserved for retry
544                    continue;
545                }
546
547                // Non-transient error, or out of retries
548                if attempt + 1 >= max_attempts && is_transient {
549                    warn!(
550                        error = %e,
551                        attempts = max_attempts,
552                        iteration = iteration,
553                        "All retry attempts exhausted"
554                    );
555                    let _ = audit_log.append(
556                        AuditEventType::Error,
557                        "llm_retry",
558                        &format!("All {} retry attempts exhausted: {}", max_attempts, e),
559                        None,
560                    );
561                }
562
563                // Delete checkpoint on permanent LLM failure
564                if let Some(ref cp_dir) = checkpoint_dir {
565                    delete_checkpoint(cp_dir, session_id);
566                }
567                return Err(e);
568            }
569        }
570    }
571
572    // Should not reach here, but handle gracefully
573    Err(last_error.unwrap_or(crate::llm::LLMError::RequestFailed(
574        "All retry attempts exhausted".to_string(),
575    )))
576}
577
578/// Shared inner agent loop — contains all iteration logic.
579///
580/// Both `run_agent_loop_with_registry` and `run_agent_loop_with_mcp_and_registry`
581/// delegate to this function, avoiding ~400 lines of duplicated code.
582///
583/// # Parameters
584///
585/// * `registry` — A fully initialized `ToolRegistry` (caller resolves defaults/MCP tools).
586/// * `loop_label` — Label for log messages (e.g. "security integration" or "MCP integration").
587/// * `mcp_enabled` — Whether MCP is active, used in audit event metadata.
588/// * `image_data_uris` — Optional list of base64-encoded image data URIs for multi-modal input.
589#[allow(clippy::too_many_arguments)]
590#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
591async fn run_agent_loop_inner(
592    llm: Arc<dyn LLMProviderTrait>,
593    initial_prompt: &str,
594    system_prompt: &str,
595    config: AgentLoopConfig,
596    registry: ToolRegistry,
597    loop_label: &str,
598    mcp_enabled: bool,
599    image_data_uris: Vec<String>,
600) -> Result<String> {
601    // Initialize security components
602    let policy_engine = PolicyEngine::default_secure();
603    let mut sandbox = Sandbox::default();
604    sandbox.init().await.map_err(|e| {
605        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
606    })?;
607    let audit_log = AuditLog::new(format!("agent-{}", std::process::id()));
608
609    // Initialize injection detector
610    let injection_detector = if config.prompt_injection_protection {
611        Some(crate::policy::InjectionDetector::new())
612    } else {
613        None
614    };
615
616    // Track session start time for token lifetime enforcement
617    let session_start = std::time::Instant::now();
618
619    info!(
620        provider = llm.provider_name(),
621        model = llm.model(),
622        max_iterations = config.max_iterations,
623        enable_tools = config.enable_tools,
624        tool_count = registry.len(),
625        require_approval = config.require_approval,
626        prompt_injection_protection = config.prompt_injection_protection,
627        token_lifetime_secs = config.token_lifetime_secs,
628        "Agent loop starting with {}",
629        loop_label
630    );
631
632    // Audit: agent start
633    let _ = audit_log.append(
634        AuditEventType::AgentStart,
635        "agent",
636        &format!(
637            "Agent loop started with {} (model: {})",
638            llm.provider_name(),
639            llm.model()
640        ),
641        Some(serde_json::json!({
642            "provider": llm.provider_name(),
643            "model": llm.model(),
644            "max_iterations": config.max_iterations,
645            "enable_tools": config.enable_tools,
646            "mcp_enabled": mcp_enabled,
647            "tool_count": registry.len(),
648            "require_approval": config.require_approval,
649            "prompt_injection_protection": config.prompt_injection_protection,
650            "token_lifetime_secs": config.token_lifetime_secs,
651        })),
652    );
653
654    // ── Durable execution: checkpoint resume ──────────────────────────────
655    // If a checkpoint exists for this session, restore state from it instead
656    // of starting fresh. This allows the agent loop to survive process restarts.
657    let (mut memory, start_iteration) = if let Some(ref checkpoint_dir) = config.checkpoint_dir {
658        if let Some(ref session_id) = config.session_id {
659            if let Some(checkpoint) = load_checkpoint(checkpoint_dir, session_id) {
660                info!(
661                    session_id = %session_id,
662                    iteration = checkpoint.iteration,
663                    max_iterations = checkpoint.max_iterations,
664                    "Resuming agent loop from checkpoint"
665                );
666                (
667                    ConversationMemory::from_history(checkpoint.messages, 0),
668                    checkpoint.iteration + 1, // resume from next iteration
669                )
670            } else {
671                info!(
672                    session_id = %session_id,
673                    "No checkpoint found, starting fresh"
674                );
675                let mut m = ConversationMemory::new(system_prompt, 0);
676                if image_data_uris.is_empty() {
677                    m.add_user_message(initial_prompt);
678                } else {
679                    m.add_user_message_with_images(initial_prompt, image_data_uris.clone());
680                }
681                (m, 0)
682            }
683        } else {
684            let mut m = ConversationMemory::new(system_prompt, 0);
685            if image_data_uris.is_empty() {
686                m.add_user_message(initial_prompt);
687            } else {
688                m.add_user_message_with_images(initial_prompt, image_data_uris.clone());
689            }
690            (m, 0)
691        }
692    } else {
693        let mut m = ConversationMemory::new(system_prompt, 0);
694        if image_data_uris.is_empty() {
695            m.add_user_message(initial_prompt);
696        } else {
697            m.add_user_message_with_images(initial_prompt, image_data_uris.clone());
698        }
699        (m, 0)
700    };
701
702    // Generate a session ID if checkpointing is enabled but no ID was provided
703    let session_id = config
704        .session_id
705        .clone()
706        .unwrap_or_else(|| uuid::Uuid::new_v4().to_string());
707
708    for iteration in start_iteration..config.max_iterations {
709        // Check token lifetime: enforce session expiry
710        if config.token_lifetime_secs > 0 {
711            let elapsed = session_start.elapsed().as_secs();
712            if elapsed >= config.token_lifetime_secs {
713                warn!(
714                    iteration = iteration,
715                    elapsed_secs = elapsed,
716                    token_lifetime_secs = config.token_lifetime_secs,
717                    "Agent loop reached token lifetime limit"
718                );
719                let _ = audit_log.append(
720                    AuditEventType::SecurityViolation,
721                    "token_lifetime",
722                    &format!(
723                        "Session expired after {} seconds (limit: {}s)",
724                        elapsed, config.token_lifetime_secs
725                    ),
726                    Some(serde_json::json!({
727                        "elapsed_secs": elapsed,
728                        "token_lifetime_secs": config.token_lifetime_secs,
729                        "iteration": iteration,
730                    })),
731                );
732                // Delete checkpoint on security violation
733                if let Some(ref checkpoint_dir) = config.checkpoint_dir {
734                    delete_checkpoint(checkpoint_dir, &session_id);
735                }
736                return Err(crate::error::RavenClawsError::SecurityViolation(format!(
737                    "Session token expired after {} seconds (limit: {}s)",
738                    elapsed, config.token_lifetime_secs
739                )));
740            }
741        }
742        let messages = memory.history().to_vec();
743
744        // Check token budget before making LLM call
745        if let Some(ref budget) = config.token_budget {
746            let budget = budget.lock().unwrap();
747            if budget.remaining() < 100 {
748                warn!(
749                    iteration = iteration,
750                    remaining = budget.remaining(),
751                    "Token budget exhausted"
752                );
753                let _ = audit_log.append(
754                    AuditEventType::SecurityViolation,
755                    "token_budget",
756                    &format!("Token budget exhausted (remaining: {})", budget.remaining()),
757                    Some(serde_json::json!({
758                        "remaining": budget.remaining(),
759                        "used": budget.used_tokens,
760                        "iteration": iteration,
761                    })),
762                );
763                // Delete checkpoint on budget exhaustion
764                if let Some(ref checkpoint_dir) = config.checkpoint_dir {
765                    delete_checkpoint(checkpoint_dir, &session_id);
766                }
767                return Err(crate::error::RavenClawsError::SecurityViolation(
768                    "Token budget exhausted".to_string(),
769                ));
770            }
771        }
772
773        // Check admission control before LLM call
774        if let Some(ref load_manager) = config.load_manager {
775            let admission = load_manager.check_admission();
776            if !admission.is_allowed() {
777                warn!(
778                    ?admission,
779                    iteration = iteration,
780                    "Admission denied before LLM call"
781                );
782                let _ = audit_log.append(
783                    AuditEventType::Error,
784                    "load_manager",
785                    &format!("Admission denied: {:?}", admission),
786                    None,
787                );
788                load_manager.record_outcome(crate::load::RequestOutcome::Failure);
789                // Delete checkpoint on admission denial
790                if let Some(ref checkpoint_dir) = config.checkpoint_dir {
791                    delete_checkpoint(checkpoint_dir, &session_id);
792                }
793                return Err(crate::error::RavenClawsError::SecurityViolation(format!(
794                    "Admission denied: {:?}",
795                    admission
796                )));
797            }
798        }
799
800        // ── LLM call with retry (exponential backoff) ──────────────────────
801        // Retry handles transient errors (RequestFailed, RateLimited, CircuitBreakerOpen)
802        // before falling back to the provider fallback chain.
803        let response = match call_llm_with_retry(
804            &llm,
805            messages.clone(),
806            config.retry_config.as_ref(),
807            &audit_log,
808            &session_id,
809            &config.checkpoint_dir,
810            iteration,
811        )
812        .await
813        {
814            Ok(r) => {
815                // Record success
816                if let Some(ref load_manager) = config.load_manager {
817                    load_manager.record_outcome(crate::load::RequestOutcome::Success);
818                }
819                r
820            }
821            Err(e) => {
822                // Record failure
823                if let Some(ref load_manager) = config.load_manager {
824                    load_manager.record_outcome(crate::load::RequestOutcome::Failure);
825                }
826                // Try fallback chain if available
827                if let Some(ref chain) = config.fallback_chain {
828                    warn!(error = %e, "Primary LLM failed after retries, trying fallback chain");
829                    let _ = audit_log.append(
830                        AuditEventType::Error,
831                        "llm",
832                        &format!("Primary LLM failed after retries, trying fallback: {}", e),
833                        None,
834                    );
835                    // Clone configs out of mutex to avoid holding MutexGuard across .await
836                    let configs = {
837                        let c = chain.lock().unwrap();
838                        c.configs.clone()
839                    };
840                    let mut temp_chain = ProviderFallbackChain::new(configs);
841                    match temp_chain.chat_with_fallback(messages).await {
842                        Ok(r) => {
843                            info!("Fallback chain succeeded");
844                            // Record token usage from fallback response
845                            if let Some(ref budget) = config.token_budget {
846                                if let Some(usage) = &r.usage {
847                                    let mut b = budget.lock().unwrap();
848                                    b.record_usage(usage.total_tokens);
849                                }
850                            }
851                            r
852                        }
853                        Err(fallback_e) => {
854                            warn!(error = %fallback_e, "Fallback chain also failed");
855                            let _ = audit_log.append(
856                                AuditEventType::Error,
857                                "llm",
858                                &format!("All providers failed: {}", fallback_e),
859                                None,
860                            );
861                            // Checkpoint already deleted by call_llm_with_retry on permanent failure
862                            return Err(crate::error::RavenClawsError::Llm(fallback_e));
863                        }
864                    }
865                } else {
866                    warn!(error = %e, "LLM request failed after retries");
867                    let _ = audit_log.append(
868                        AuditEventType::Error,
869                        "llm",
870                        &format!("LLM request failed after retries: {}", e),
871                        None,
872                    );
873                    // Checkpoint already deleted by call_llm_with_retry on permanent failure
874                    return Err(crate::error::RavenClawsError::Llm(e));
875                }
876            }
877        };
878
879        // Record token usage from response
880        let mut iteration_tokens: u64 = 0;
881        if let Some(ref budget) = config.token_budget {
882            if let Some(usage) = &response.usage {
883                let mut b = budget.lock().unwrap();
884                b.record_usage(usage.total_tokens);
885                iteration_tokens = usage.total_tokens as u64;
886                debug!(
887                    iteration = iteration,
888                    tokens_used = usage.total_tokens,
889                    total_used = b.used_tokens,
890                    remaining = b.remaining(),
891                    "Token usage recorded"
892                );
893            }
894        } else if let Some(usage) = &response.usage {
895            iteration_tokens = usage.total_tokens as u64;
896        }
897
898        // Report metrics via callback if configured
899        if let Some(ref cb) = config.metrics_callback {
900            cb(iteration_tokens, 0);
901        }
902
903        // Report progress to RavenFabric if configured
904        if let Some(ref rf) = config.ravenfabric {
905            if rf.is_enabled() {
906                let _ = rf.health().await;
907                info!(
908                    iteration = iteration,
909                    ravenfabric = true,
910                    "RavenFabric health check completed"
911                );
912            }
913        }
914
915        let first_choice = response.choices.first();
916        let content = first_choice
917            .map(|c| c.message.content.clone())
918            .unwrap_or_default();
919
920        debug!(
921            iteration = iteration,
922            response_length = content.len(),
923            response_preview = %content[..content.len().min(500)],
924            "LLM response received"
925        );
926
927        // Prompt-injection defense: check LLM response before processing
928        if let Some(ref detector) = injection_detector {
929            match detector.check(&content) {
930                crate::policy::InjectionVerdict::Suspicious(reason) => {
931                    warn!(
932                        iteration = iteration,
933                        reason = %reason,
934                        "Prompt-injection detected in LLM response"
935                    );
936                    let _ = audit_log.append(
937                        AuditEventType::SecurityViolation,
938                        "injection_detector",
939                        &format!("Prompt-injection detected: {}", reason),
940                        Some(serde_json::json!({
941                            "reason": reason,
942                            "iteration": iteration,
943                            "content_preview": &content[..content.len().min(200)],
944                        })),
945                    );
946                    // Delete checkpoint on injection detection
947                    if let Some(ref checkpoint_dir) = config.checkpoint_dir {
948                        delete_checkpoint(checkpoint_dir, &session_id);
949                    }
950                    return Err(crate::error::RavenClawsError::SecurityViolation(format!(
951                        "LLM response blocked: potential prompt injection ({})",
952                        reason
953                    )));
954                }
955                crate::policy::InjectionVerdict::Clean => {}
956            }
957        }
958
959        // Check for structured tool calls first (OpenAI Tools format)
960        if config.enable_tools {
961            if let Some((tool_name, args)) = first_choice.and_then(parse_structured_tool_call) {
962                info!(tool = %tool_name, "Structured tool call detected");
963
964                // Execute tool with security
965                if let Some(tool_result) = execute_parsed_tool_call(
966                    tool_name,
967                    args,
968                    &registry,
969                    &policy_engine,
970                    &sandbox,
971                    &audit_log,
972                    config.require_approval,
973                )
974                .await
975                {
976                    let observation = if tool_result.success {
977                        format!("OBSERVATION: {}", tool_result.output)
978                    } else {
979                        format!(
980                            "OBSERVATION: Tool failed with error: {}",
981                            tool_result.error.as_deref().unwrap_or("unknown error")
982                        )
983                    };
984
985                    memory.add_user_message(&observation);
986
987                    // Report tool call via metrics callback
988                    if let Some(ref cb) = config.metrics_callback {
989                        cb(0, 1);
990                    }
991
992                    info!(
993                        iteration = iteration,
994                        tool = %tool_result.tool_name,
995                        success = tool_result.success,
996                        "Structured tool executed"
997                    );
998                    continue;
999                }
1000            }
1001        }
1002
1003        // Check for completion signal
1004        if content.contains("FINAL:") {
1005            let final_response = content
1006                .split("FINAL:")
1007                .nth(1)
1008                .unwrap_or("")
1009                .trim()
1010                .to_string();
1011
1012            memory.add_assistant_message(&content);
1013
1014            // Audit: agent finish
1015            let _ = audit_log.append(
1016                AuditEventType::AgentFinish,
1017                "agent",
1018                "Agent loop completed successfully",
1019                Some(serde_json::json!({
1020                    "iterations": iteration + 1,
1021                    "final_response_length": final_response.len(),
1022                })),
1023            );
1024
1025            // Delete checkpoint on successful completion
1026            if let Some(ref checkpoint_dir) = config.checkpoint_dir {
1027                delete_checkpoint(checkpoint_dir, &session_id);
1028            }
1029
1030            return Ok(final_response);
1031        }
1032
1033        // Execute tool calls if enabled (legacy pattern-matching fallback)
1034        if config.enable_tools {
1035            if let Some(tool_result) = execute_tool_call_with_security(
1036                &content,
1037                &registry,
1038                &policy_engine,
1039                &sandbox,
1040                &audit_log,
1041            )
1042            .await
1043            {
1044                let observation = if tool_result.success {
1045                    format!("OBSERVATION: {}", tool_result.output)
1046                } else {
1047                    format!(
1048                        "OBSERVATION: Tool failed with error: {}",
1049                        tool_result.error.as_deref().unwrap_or("unknown error")
1050                    )
1051                };
1052
1053                memory.add_assistant_message(&content);
1054                memory.add_user_message(&observation);
1055
1056                // Report tool call via metrics callback
1057                if let Some(ref cb) = config.metrics_callback {
1058                    cb(0, 1);
1059                }
1060
1061                info!(
1062                    iteration = iteration,
1063                    tool = %tool_result.tool_name,
1064                    success = tool_result.success,
1065                    "Tool executed"
1066                );
1067                continue;
1068            }
1069        }
1070
1071        // No tool call found and no FINAL: — treat as regular response
1072        memory.add_assistant_message(&content);
1073
1074        // ── Durable execution: save checkpoint after each iteration ────────
1075        if let Some(ref checkpoint_dir) = config.checkpoint_dir {
1076            let checkpoint = CheckpointState::new(
1077                session_id.clone(),
1078                iteration,
1079                config.max_iterations,
1080                memory.history().to_vec(),
1081                initial_prompt,
1082                system_prompt,
1083                llm.provider_name(),
1084                llm.model(),
1085                config.enable_tools,
1086            );
1087            if let Err(e) = save_checkpoint(checkpoint_dir, &checkpoint) {
1088                warn!(
1089                    session_id = %session_id,
1090                    iteration = iteration,
1091                    error = %e,
1092                    "Failed to save checkpoint"
1093                );
1094            } else {
1095                debug!(
1096                    session_id = %session_id,
1097                    iteration = iteration,
1098                    "Checkpoint saved"
1099                );
1100            }
1101        }
1102
1103        // If no_final_required is set, treat any non-tool-call response as completion
1104        if config.no_final_required {
1105            info!(
1106                iteration = iteration,
1107                response_length = content.len(),
1108                "no_final_required: treating response as completion"
1109            );
1110            let _ = audit_log.append(
1111                AuditEventType::AgentFinish,
1112                "agent",
1113                "Agent loop completed (no_final_required)",
1114                Some(serde_json::json!({
1115                    "iterations": iteration + 1,
1116                    "final_response_length": content.len(),
1117                })),
1118            );
1119            // Delete checkpoint on successful completion
1120            if let Some(ref checkpoint_dir) = config.checkpoint_dir {
1121                delete_checkpoint(checkpoint_dir, &session_id);
1122            }
1123            return Ok(content);
1124        }
1125
1126        info!(
1127            iteration = iteration,
1128            thought = %content.lines().find(|l| l.starts_with("THOUGHT:")).unwrap_or("<no thought>"),
1129            "Agent loop progress"
1130        );
1131    }
1132
1133    // Max iterations reached
1134    warn!(
1135        max_iterations = config.max_iterations,
1136        "Agent loop reached max iterations"
1137    );
1138
1139    let _ = audit_log.append(
1140        AuditEventType::Error,
1141        "agent",
1142        "Agent loop reached max iterations without completing",
1143        Some(serde_json::json!({
1144            "max_iterations": config.max_iterations,
1145        })),
1146    );
1147
1148    // Delete checkpoint on max iterations (task is done, even if incomplete)
1149    if let Some(ref checkpoint_dir) = config.checkpoint_dir {
1150        delete_checkpoint(checkpoint_dir, &session_id);
1151    }
1152
1153    let history = memory.history();
1154    if history.len() > 1 {
1155        if let Some(last) = history.last() {
1156            return Ok(last.content.clone());
1157        }
1158    }
1159
1160    Err(crate::error::RavenClawsError::CommandExecution(
1161        "Agent loop reached max iterations without completing the task".to_string(),
1162    ))
1163}
1164
1165/// Run the agent loop with MCP tool integration (v0.5.2)
1166///
1167/// This version extends run_agent_loop with MCP tool support:
1168/// 1. Registers MCP tools into the ToolRegistry
1169/// 2. MCP tools are executed alongside built-in tools
1170#[allow(dead_code)]
1171#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
1172pub async fn run_agent_loop_with_mcp(
1173    llm: Arc<dyn LLMProviderTrait>,
1174    initial_prompt: &str,
1175    system_prompt: &str,
1176    config: AgentLoopConfig,
1177    mcp_client: Option<Arc<RwLock<McpClient>>>,
1178) -> Result<String> {
1179    run_agent_loop_with_mcp_and_registry(
1180        llm,
1181        initial_prompt,
1182        system_prompt,
1183        config,
1184        mcp_client,
1185        None,
1186    )
1187    .await
1188}
1189
1190/// Run the agent loop with MCP tools and an optional pre-configured ToolRegistry
1191#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model()))]
1192pub async fn run_agent_loop_with_mcp_and_registry(
1193    llm: Arc<dyn LLMProviderTrait>,
1194    initial_prompt: &str,
1195    system_prompt: &str,
1196    config: AgentLoopConfig,
1197    mcp_client: Option<Arc<RwLock<McpClient>>>,
1198    tool_registry: Option<ToolRegistry>,
1199) -> Result<String> {
1200    // Initialize tool registry (use provided one or default)
1201    let mut registry = tool_registry.unwrap_or_else(ToolRegistry::with_default_tools);
1202
1203    // Register MCP tools if client is provided
1204    if let Some(client) = &mcp_client {
1205        match crate::mcp::register_mcp_tools(&mut registry, client.clone()).await {
1206            Ok(count) => {
1207                info!(count, "MCP tools registered");
1208            }
1209            Err(e) => {
1210                warn!(error = %e, "Failed to register MCP tools");
1211            }
1212        }
1213    }
1214
1215    let mcp_enabled = mcp_client.is_some();
1216    run_agent_loop_inner(
1217        llm,
1218        initial_prompt,
1219        system_prompt,
1220        config,
1221        registry,
1222        "MCP integration",
1223        mcp_enabled,
1224        Vec::new(),
1225    )
1226    .await
1227}
1228
1229/// Run the agent loop with MCP tools and multi-modal image input.
1230#[instrument(skip_all, fields(provider = %llm.provider_name(), model = %llm.model(), image_count = image_data_uris.len()))]
1231pub async fn run_agent_loop_with_mcp_and_images(
1232    llm: Arc<dyn LLMProviderTrait>,
1233    initial_prompt: &str,
1234    system_prompt: &str,
1235    config: AgentLoopConfig,
1236    mcp_client: Option<Arc<RwLock<McpClient>>>,
1237    tool_registry: Option<ToolRegistry>,
1238    image_data_uris: Vec<String>,
1239) -> Result<String> {
1240    let mut registry = tool_registry.unwrap_or_else(ToolRegistry::with_default_tools);
1241
1242    if let Some(client) = &mcp_client {
1243        match crate::mcp::register_mcp_tools(&mut registry, client.clone()).await {
1244            Ok(count) => {
1245                info!(count, "MCP tools registered");
1246            }
1247            Err(e) => {
1248                warn!(error = %e, "Failed to register MCP tools");
1249            }
1250        }
1251    }
1252
1253    let mcp_enabled = mcp_client.is_some();
1254    run_agent_loop_inner(
1255        llm,
1256        initial_prompt,
1257        system_prompt,
1258        config,
1259        registry,
1260        "MCP integration",
1261        mcp_enabled,
1262        image_data_uris,
1263    )
1264    .await
1265}
1266
1267/// Prompt the user for approval of a tool call via stdin.
1268///
1269/// Returns `true` if the user approved, `false` if denied.
1270/// If stdin is not a terminal (piped), auto-approves with a warning.
1271async fn prompt_for_approval(tool_name: &str, args: &serde_json::Value) -> bool {
1272    use std::io::{IsTerminal, Write};
1273
1274    let args_str = serde_json::to_string_pretty(args).unwrap_or_default();
1275
1276    // Check if stdin is a terminal
1277    if !std::io::stdin().is_terminal() {
1278        warn!(
1279            tool = %tool_name,
1280            "stdin is not a TTY — auto-approving tool call (use --require-approval only in interactive mode)"
1281        );
1282        return true;
1283    }
1284
1285    // Print the approval prompt to stderr so it doesn't interfere with stdout output
1286    eprintln!("\n⚠️  Tool requires approval:");
1287    eprintln!("   Tool: {}", tool_name);
1288    for line in args_str.lines() {
1289        eprintln!("   {}", line);
1290    }
1291    eprint!("   Approve? [y/N] ");
1292    std::io::stderr().flush().ok();
1293
1294    let mut input = String::new();
1295    match std::io::stdin().read_line(&mut input) {
1296        Ok(_) => {
1297            let trimmed = input.trim().to_lowercase();
1298            trimmed == "y" || trimmed == "yes"
1299        }
1300        Err(e) => {
1301            warn!(error = %e, "Failed to read approval input — denying by default");
1302            false
1303        }
1304    }
1305}
1306
1307/// Testable version of prompt_for_approval that reads from a given input string.
1308/// Used in unit tests to avoid blocking on stdin.
1309#[cfg(test)]
1310async fn prompt_for_approval_with_input(
1311    tool_name: &str,
1312    args: &serde_json::Value,
1313    input: &str,
1314) -> bool {
1315    use std::io::Write;
1316
1317    let args_str = serde_json::to_string_pretty(args).unwrap_or_default();
1318
1319    eprintln!("\n⚠️  Tool requires approval:");
1320    eprintln!("   Tool: {}", tool_name);
1321    for line in args_str.lines() {
1322        eprintln!("   {}", line);
1323    }
1324    eprint!("   Approve? [y/N] ");
1325    std::io::stderr().flush().ok();
1326
1327    let trimmed = input.trim().to_lowercase();
1328    trimmed == "y" || trimmed == "yes"
1329}
1330
1331/// Execute a parsed tool call with security integration
1332///
1333/// This function:
1334/// 1. Checks the tool call against PolicyEngine
1335/// 2. Logs the policy decision to AuditLog
1336/// 3. Prompts for human approval if required (HITL)
1337/// 4. Executes the tool (sandbox is applied at the tool implementation level for shell_exec)
1338/// 5. Logs the result to AuditLog
1339async fn execute_parsed_tool_call(
1340    tool_name: String,
1341    args: serde_json::Value,
1342    registry: &ToolRegistry,
1343    policy_engine: &PolicyEngine,
1344    _sandbox: &Sandbox,
1345    audit_log: &AuditLog,
1346    require_approval: bool,
1347) -> Option<ToolResult> {
1348    info!(tool = %tool_name, "Executing parsed tool call");
1349
1350    // Audit: tool call requested
1351    let _ = audit_log.tool_call(&tool_name, &args);
1352
1353    // Check if tool requires approval
1354    if require_approval && policy_engine.requires_approval(&tool_name) {
1355        let _ = audit_log.append(
1356            AuditEventType::ApprovalRequested,
1357            "approval",
1358            &format!("Approval required for tool: {}", tool_name),
1359            Some(serde_json::json!({"tool": tool_name, "args": args})),
1360        );
1361
1362        // Prompt user for approval via stdin
1363        let granted = prompt_for_approval(&tool_name, &args).await;
1364
1365        if !granted {
1366            let _ = audit_log.approval(&tool_name, false, Some("Denied by user"));
1367            warn!(tool = %tool_name, "Tool call denied by user");
1368            return Some(ToolResult {
1369                tool_name: tool_name.clone(),
1370                success: false,
1371                output: String::new(),
1372                error: Some(format!("Approval denied by user for tool: {}", tool_name)),
1373                exit_code: Some(-1),
1374                duration_ms: None,
1375            });
1376        }
1377
1378        let _ = audit_log.approval(&tool_name, true, Some("Approved by user"));
1379        info!(tool = %tool_name, "Tool call approved by user");
1380    }
1381
1382    // Check policy BEFORE execution
1383    let policy_decision = policy_engine.check_tool_call(&tool_name, &args);
1384
1385    // Audit: policy decision
1386    match &policy_decision {
1387        Decision::Allow => {
1388            let _ = audit_log.policy_decision(&tool_name, true, None);
1389        }
1390        Decision::Deny(reason) => {
1391            let _ = audit_log.policy_decision(&tool_name, false, Some(reason));
1392            warn!(tool = %tool_name, reason = %reason, "Tool call denied by policy");
1393            return Some(ToolResult {
1394                tool_name: tool_name.clone(),
1395                success: false,
1396                output: String::new(),
1397                error: Some(format!("Policy denied: {}", reason)),
1398                exit_code: Some(-1),
1399                duration_ms: None,
1400            });
1401        }
1402    }
1403
1404    // Execute tool
1405    let tool_name_clone = tool_name.clone();
1406    let call = ToolCall {
1407        name: tool_name.clone(),
1408        arguments: args,
1409        id: None,
1410    };
1411
1412    let result = match registry.execute(call).await {
1413        Ok(result) => {
1414            // Audit: tool result
1415            let _ = audit_log.append(
1416                AuditEventType::ToolResult,
1417                &tool_name_clone,
1418                &format!(
1419                    "Tool executed: {} (success: {})",
1420                    tool_name_clone, result.success
1421                ),
1422                Some(serde_json::json!({
1423                    "success": result.success,
1424                    "exit_code": result.exit_code,
1425                    "duration_ms": result.duration_ms,
1426                })),
1427            );
1428            result
1429        }
1430        Err(e) => {
1431            // Audit: error
1432            let _ = audit_log.append(
1433                AuditEventType::Error,
1434                &tool_name_clone,
1435                &format!("Tool execution failed: {}", e),
1436                None,
1437            );
1438            ToolResult {
1439                tool_name: tool_name_clone,
1440                success: false,
1441                output: String::new(),
1442                error: Some(e.to_string()),
1443                exit_code: Some(-1),
1444                duration_ms: None,
1445            }
1446        }
1447    };
1448
1449    Some(result)
1450}
1451
1452/// Execute a tool call with security integration (legacy pattern-matching fallback)
1453///
1454/// This function:
1455/// 1. Parses the tool call from the LLM response (legacy TOOL_CALL:/ARGS: format)
1456/// 2. Checks the tool call against PolicyEngine
1457/// 3. Logs the policy decision to AuditLog
1458/// 4. Executes the tool (sandbox is applied at the tool implementation level for shell_exec)
1459/// 5. Logs the result to AuditLog
1460async fn execute_tool_call_with_security(
1461    content: &str,
1462    registry: &ToolRegistry,
1463    policy_engine: &PolicyEngine,
1464    _sandbox: &Sandbox,
1465    audit_log: &AuditLog,
1466) -> Option<ToolResult> {
1467    // Parse tool call from content (legacy format)
1468    let (tool_name, args) = parse_tool_call(content)?;
1469
1470    // Delegate to the common execution logic
1471    execute_parsed_tool_call(
1472        tool_name,
1473        args,
1474        registry,
1475        policy_engine,
1476        _sandbox,
1477        audit_log,
1478        false, // legacy path — no approval prompt
1479    )
1480    .await
1481}
1482
1483/// Parse a tool call from LLM response content
1484/// Returns (tool_name, args) if found, None otherwise
1485/// Parse tool call from structured LLM response (OpenAI Tools format)
1486fn parse_structured_tool_call(choice: &Choice) -> Option<(String, serde_json::Value)> {
1487    let tool_calls = choice.tool_calls.as_ref()?;
1488    let first_call = tool_calls.first()?;
1489
1490    let tool_name = first_call.function.name.clone();
1491    let args: serde_json::Value = serde_json::from_str(&first_call.function.arguments).ok()?;
1492
1493    Some((tool_name, args))
1494}
1495
1496/// Parse tool call from legacy pattern-matching format (TOOL_CALL: / ARGS:)
1497fn parse_tool_call(content: &str) -> Option<(String, serde_json::Value)> {
1498    let mut lines = content.lines();
1499    let tool_call_line = lines.find(|l| l.trim().starts_with("TOOL_CALL:"))?;
1500
1501    let tool_name = tool_call_line
1502        .trim()
1503        .strip_prefix("TOOL_CALL:")
1504        .map(|s| s.trim())
1505        .filter(|s| !s.is_empty())?
1506        .to_string();
1507
1508    // Find the ARGS line
1509    let args_line = lines.find(|l| l.trim().starts_with("ARGS:"))?;
1510    let args_str = args_line.trim().strip_prefix("ARGS:").map(|s| s.trim())?;
1511
1512    let args: serde_json::Value = serde_json::from_str(args_str).ok()?;
1513
1514    Some((tool_name, args))
1515}
1516
1517/// Run a single autonomous agent (single-provider mode)
1518pub async fn run_single(
1519    llm: Arc<dyn LLMProviderTrait>,
1520    config: Config,
1521    ravenfabric: Option<RavenFabricClient>,
1522) -> Result<()> {
1523    info!(
1524        "Starting single agent mode with provider: {}",
1525        llm.provider_name()
1526    );
1527
1528    // Perform RavenFabric health check if configured
1529    if let Some(ref rf) = ravenfabric {
1530        if rf.is_enabled() {
1531            info!("RavenFabric remote execution available");
1532            match rf.health().await {
1533                Ok(true) => info!("RavenFabric mesh is healthy"),
1534                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1535                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1536            }
1537        }
1538    }
1539
1540    let system_prompt = &config.llm.system_prompt;
1541
1542    let messages = vec![
1543        ChatMessage::new("system", system_prompt.to_string()),
1544        ChatMessage::new("user", "Ready. Awaiting instructions."),
1545    ];
1546
1547    match llm.chat(messages).await {
1548        Ok(response) => {
1549            if let Some(choice) = response.choices.first() {
1550                info!(provider = llm.provider_name(), model = llm.model(), response = %choice.message.content, "Agent response received");
1551
1552                // Broadcast result to RavenFabric if configured
1553                if let Some(ref rf) = ravenfabric {
1554                    if rf.is_enabled() {
1555                        let preview = choice.message.content.chars().take(500).collect::<String>();
1556                        let _ = rf.broadcast(&preview, 30).await;
1557                        info!("Agent result broadcast to RavenFabric mesh");
1558                    }
1559                }
1560            }
1561        }
1562        Err(e) => {
1563            warn!(error = %e, provider = llm.provider_name(), "LLM request failed");
1564        }
1565    }
1566
1567    Ok(())
1568}
1569
1570/// Run multiple agents in swarm mode (single-provider) — v0.6
1571///
1572/// Swarm mode runs multiple agents in parallel, each working on the same task
1573/// with different approaches. Results are collected and compared.
1574pub async fn run_swarm(
1575    llm: Arc<dyn LLMProviderTrait>,
1576    config: Config,
1577    ravenfabric: Option<RavenFabricClient>,
1578) -> Result<()> {
1579    info!("Starting swarm mode (single-provider) — 3 parallel agents");
1580
1581    // Perform RavenFabric health check if configured
1582    if let Some(ref rf) = ravenfabric {
1583        if rf.is_enabled() {
1584            info!("RavenFabric remote execution available for swarm coordination");
1585            match rf.health().await {
1586                Ok(true) => info!("RavenFabric mesh is healthy"),
1587                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1588                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1589            }
1590        }
1591    }
1592
1593    let _system_prompt = &config.llm.system_prompt;
1594    let num_agents = 3;
1595    let mut handles = Vec::new();
1596
1597    // Spawn parallel agents with different personas
1598    let personas = [
1599        "You are an analytical agent. Focus on logic, structure, and precision.",
1600        "You are a creative agent. Focus on innovation, alternatives, and possibilities.",
1601        "You are a pragmatic agent. Focus on simplicity, efficiency, and practicality.",
1602    ];
1603
1604    for (i, persona) in personas.iter().enumerate().take(num_agents) {
1605        let llm_clone = llm.clone();
1606        let persona = persona.to_string();
1607        let task = "Analyze the given task and provide your solution.".to_string();
1608
1609        let handle = tokio::spawn(async move {
1610            let mut memory = ConversationMemory::new(&persona, 10);
1611            memory.add_user_message(&task);
1612
1613            let messages = memory.history().to_vec();
1614            match llm_clone.chat(messages).await {
1615                Ok(response) => {
1616                    let content = response
1617                        .choices
1618                        .first()
1619                        .map(|c| c.message.content.clone())
1620                        .unwrap_or_default();
1621                    Ok((i, content))
1622                }
1623                Err(e) => Err(format!("Agent {} failed: {}", i, e)),
1624            }
1625        });
1626
1627        handles.push(handle);
1628    }
1629
1630    // Collect results
1631    let mut results: Vec<(usize, String)> = Vec::new();
1632    for handle in handles {
1633        match handle.await {
1634            Ok(Ok((idx, result))) => {
1635                info!("Agent {} completed: {} chars", idx, result.len());
1636                results.push((idx, result));
1637            }
1638            Ok(Err(e)) => warn!("Agent failed: {}", e),
1639            Err(e) => warn!("Agent join failed: {}", e),
1640        }
1641    }
1642
1643    // Print swarm results
1644    println!("\n🐦‍⬛ Swarm Results ({} agents):", results.len());
1645    for (idx, result) in &results {
1646        println!(
1647            "\n── Agent {} ({}) ──",
1648            idx + 1,
1649            personas[*idx].split('.').next().unwrap_or("Unknown")
1650        );
1651        println!("{}", result);
1652    }
1653
1654    // Broadcast swarm results to RavenFabric if configured
1655    if let Some(ref rf) = ravenfabric {
1656        if rf.is_enabled() {
1657            let summary = format!(
1658                "Swarm completed: {} agents, results: {}",
1659                results.len(),
1660                results
1661                    .iter()
1662                    .map(|(i, r)| format!("Agent {}: {} chars", i, r.len()))
1663                    .collect::<Vec<_>>()
1664                    .join(", ")
1665            );
1666            let _ = rf.broadcast(&summary, 30).await;
1667            info!("Swarm results broadcast to RavenFabric mesh");
1668        }
1669    }
1670
1671    Ok(())
1672}
1673
1674/// Run supervisor agent coordinating sub-agents (single-provider) — v0.6
1675///
1676/// The supervisor decomposes a task into subtasks, spawns sub-agents for each,
1677/// and aggregates results. Uses the same LLM provider for all agents.
1678pub async fn run_supervisor(
1679    llm: Arc<dyn LLMProviderTrait>,
1680    config: Config,
1681    ravenfabric: Option<RavenFabricClient>,
1682) -> Result<()> {
1683    info!("Starting supervisor mode (single-provider)");
1684
1685    // Perform RavenFabric health check if configured
1686    if let Some(ref rf) = ravenfabric {
1687        if rf.is_enabled() {
1688            info!("RavenFabric remote execution available for supervisor coordination");
1689            match rf.health().await {
1690                Ok(true) => info!("RavenFabric mesh is healthy"),
1691                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1692                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1693            }
1694        }
1695    }
1696
1697    let system_prompt = &config.llm.system_prompt;
1698    let policy_engine = PolicyEngine::default_secure();
1699    let mut sandbox = Sandbox::default();
1700    sandbox.init().await.map_err(|e| {
1701        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
1702    })?;
1703    let audit_log = AuditLog::new(format!("supervisor-{}", std::process::id()));
1704    let registry = ToolRegistry::with_default_tools();
1705
1706    // Initial prompt to supervisor
1707    let supervisor_prompt = format!(
1708        "You are a supervisor agent. Your task is to decompose complex tasks into subtasks \
1709         and coordinate sub-agents to complete them. \
1710         \n\nFor each subtask, respond with:\n\
1711         SUBTASK: <description>\n\
1712         AGENT: <agent_number>\n\
1713         \nWhen all subtasks are complete, respond with:\n\
1714         FINAL: <aggregated result>\n\
1715         \nTask: {}",
1716        "Coordinate the completion of the assigned task."
1717    );
1718
1719    let mut memory = ConversationMemory::new(system_prompt, 20);
1720    memory.add_user_message(&supervisor_prompt);
1721
1722    let mut subtask_results: Vec<String> = Vec::new();
1723    let mut iteration = 0;
1724    let max_iterations = 15;
1725
1726    loop {
1727        iteration += 1;
1728        if iteration > max_iterations {
1729            warn!("Supervisor reached max iterations");
1730            break;
1731        }
1732
1733        let messages = memory.history().to_vec();
1734        let response = match llm.chat(messages).await {
1735            Ok(r) => r,
1736            Err(e) => {
1737                warn!(error = %e, "Supervisor LLM request failed");
1738                continue;
1739            }
1740        };
1741
1742        let content = response
1743            .choices
1744            .first()
1745            .map(|c| c.message.content.clone())
1746            .unwrap_or_default();
1747
1748        // Check for FINAL: completion
1749        if content.contains("FINAL:") {
1750            let final_response = content
1751                .split("FINAL:")
1752                .nth(1)
1753                .unwrap_or("")
1754                .trim()
1755                .to_string();
1756            info!("Supervisor completed task: {} chars", final_response.len());
1757
1758            let _ = audit_log.append(
1759                AuditEventType::AgentFinish,
1760                "supervisor",
1761                "Supervisor completed task coordination",
1762                Some(serde_json::json!({
1763                    "iterations": iteration,
1764                    "subtasks_completed": subtask_results.len(),
1765                })),
1766            );
1767
1768            println!("\n🐦‍⬛ Supervisor Result:\n{}", final_response);
1769            return Ok(());
1770        }
1771
1772        // Check for SUBTASK: decomposition
1773        if content.contains("SUBTASK:") {
1774            let subtask_block = content.split("SUBTASK:").nth(1).unwrap_or("");
1775            let subtask_lines: Vec<&str> = subtask_block.lines().take(3).collect();
1776
1777            let subtask_desc = subtask_lines.first().unwrap_or(&"").trim();
1778            let agent_num = subtask_lines
1779                .iter()
1780                .find(|l| l.starts_with("AGENT:"))
1781                .and_then(|l| l.split(':').nth(1))
1782                .unwrap_or("1")
1783                .trim();
1784
1785            if !subtask_desc.is_empty() {
1786                info!("Subtask {}: {}", agent_num, subtask_desc);
1787
1788                // Execute subtask
1789                let subtask_result = run_subtask_agent(
1790                    llm.clone(),
1791                    subtask_desc,
1792                    system_prompt,
1793                    &policy_engine,
1794                    &sandbox,
1795                    &audit_log,
1796                    &registry,
1797                )
1798                .await;
1799
1800                match subtask_result {
1801                    Ok(result) => {
1802                        info!("Subtask {} completed: {} chars", agent_num, result.len());
1803                        subtask_results.push(format!("Agent {} result: {}", agent_num, result));
1804
1805                        memory.add_assistant_message(&format!(
1806                            "Decomposed subtask {}: {}",
1807                            agent_num, subtask_desc
1808                        ));
1809                        memory
1810                            .add_user_message(&format!("Subtask {} result: {}", agent_num, result));
1811                    }
1812                    Err(e) => {
1813                        warn!("Subtask {} failed: {}", agent_num, e);
1814                        memory
1815                            .add_assistant_message(&format!("Subtask {} failed: {}", agent_num, e));
1816                    }
1817                }
1818            }
1819        } else {
1820            memory.add_assistant_message(&content);
1821        }
1822    }
1823
1824    // Fallback: return aggregated results
1825    if !subtask_results.is_empty() {
1826        let aggregated = subtask_results.join("\n\n");
1827        info!(
1828            "Supervisor aggregated {} subtask results",
1829            subtask_results.len()
1830        );
1831
1832        // Broadcast supervisor result to RavenFabric if configured
1833        if let Some(ref rf) = ravenfabric {
1834            if rf.is_enabled() {
1835                let summary = format!(
1836                    "Supervisor completed: {} subtasks, result: {} chars",
1837                    subtask_results.len(),
1838                    aggregated.len()
1839                );
1840                let _ = rf.broadcast(&summary, 30).await;
1841                info!("Supervisor result broadcast to RavenFabric mesh");
1842            }
1843        }
1844
1845        println!("\n🐦‍⬛ Supervisor Aggregated Result:\n{}", aggregated);
1846        return Ok(());
1847    }
1848
1849    Err(crate::error::RavenClawsError::CommandExecution(
1850        "Supervisor mode completed without results".to_string(),
1851    ))
1852}
1853
1854/// Run a subtask agent — helper for supervisor mode
1855async fn run_subtask_agent(
1856    llm: Arc<dyn LLMProviderTrait>,
1857    subtask: &str,
1858    system_prompt: &str,
1859    policy_engine: &PolicyEngine,
1860    sandbox: &Sandbox,
1861    audit_log: &AuditLog,
1862    registry: &ToolRegistry,
1863) -> Result<String> {
1864    let mut memory = ConversationMemory::new(system_prompt, 10);
1865    memory.add_user_message(&format!("Execute this subtask: {}", subtask));
1866
1867    for i in 0..5 {
1868        let messages = memory.history().to_vec();
1869        let response = match llm.chat(messages).await {
1870            Ok(r) => r,
1871            Err(e) => {
1872                warn!(error = %e, iteration = i, "Subtask agent LLM failed");
1873                continue;
1874            }
1875        };
1876
1877        let content = response
1878            .choices
1879            .first()
1880            .map(|c| c.message.content.clone())
1881            .unwrap_or_default();
1882
1883        if content.contains("FINAL:") || content.contains("DONE:") {
1884            return Ok(content
1885                .replace("FINAL:", "")
1886                .replace("DONE:", "")
1887                .trim()
1888                .to_string());
1889        }
1890
1891        // Try tool execution
1892        if let Some(tool_result) =
1893            execute_tool_call_with_security(&content, registry, policy_engine, sandbox, audit_log)
1894                .await
1895        {
1896            memory.add_assistant_message(&content);
1897            memory.add_user_message(&format!("Tool result: {}", tool_result.output));
1898        } else {
1899            memory.add_assistant_message(&content);
1900            memory.add_user_message("Continue with next step.");
1901        }
1902    }
1903
1904    Ok("Subtask completed".to_string())
1905}
1906
1907/// Run a single autonomous agent (multi-model mode)
1908pub async fn run_single_multi(
1909    multi_llm: MultiModelManager,
1910    config: Config,
1911    ravenfabric: Option<RavenFabricClient>,
1912) -> Result<()> {
1913    info!(
1914        "Starting single agent mode (multi-model) with {} providers",
1915        multi_llm.client_count()
1916    );
1917
1918    // Perform RavenFabric health check if configured
1919    if let Some(ref rf) = ravenfabric {
1920        if rf.is_enabled() {
1921            info!("RavenFabric remote execution available");
1922            match rf.health().await {
1923                Ok(true) => info!("RavenFabric mesh is healthy"),
1924                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1925                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1926            }
1927        }
1928    }
1929
1930    let system_prompt = &config.llm.system_prompt;
1931
1932    let messages = vec![
1933        ChatMessage::new("system", system_prompt.to_string()),
1934        ChatMessage::new("user", "Ready. Awaiting instructions."),
1935    ];
1936
1937    // Round-robin: start with first provider, then rotate
1938    let mut last_index = 0;
1939    for i in 0..multi_llm.client_count() {
1940        let client = if i == 0 {
1941            multi_llm.get_client(0)
1942        } else {
1943            multi_llm.next_client(last_index)
1944        };
1945
1946        if let Some(client) = client {
1947            match client.chat(messages.clone()).await {
1948                Ok(response) => {
1949                    if let Some(choice) = response.choices.first() {
1950                        info!(provider = client.provider_name(), model = client.model(), response = %choice.message.content, "Provider response received");
1951                    }
1952                }
1953                Err(e) => {
1954                    warn!(error = %e, provider = client.provider_name(), model = client.model(), "Provider request failed");
1955                }
1956            }
1957            last_index = i;
1958        }
1959    }
1960
1961    // Broadcast results to RavenFabric if configured
1962    if let Some(ref rf) = ravenfabric {
1963        if rf.is_enabled() {
1964            let _ = rf
1965                .broadcast("Single agent (multi-model) completed", 30)
1966                .await;
1967            info!("Multi-model result broadcast to RavenFabric mesh");
1968        }
1969    }
1970
1971    Ok(())
1972}
1973
1974/// Run multiple agents in swarm mode (multi-model) — v0.6
1975///
1976/// Swarm mode runs multiple agents in parallel, each using a different LLM provider
1977/// for the same task. Results are collected and compared for diversity.
1978pub async fn run_swarm_multi(
1979    multi_llm: MultiModelManager,
1980    config: Config,
1981    ravenfabric: Option<RavenFabricClient>,
1982) -> Result<()> {
1983    info!(
1984        "Starting swarm mode (multi-model) — {} parallel agents",
1985        multi_llm.client_count()
1986    );
1987
1988    // Perform RavenFabric health check if configured
1989    if let Some(ref rf) = ravenfabric {
1990        if rf.is_enabled() {
1991            info!("RavenFabric remote execution available for swarm coordination");
1992            match rf.health().await {
1993                Ok(true) => info!("RavenFabric mesh is healthy"),
1994                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
1995                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
1996            }
1997        }
1998    }
1999
2000    let _system_prompt = &config.llm.system_prompt;
2001    let num_agents = multi_llm.client_count().min(3); // Cap at 3 for cost control
2002    let mut handles = Vec::new();
2003
2004    // Different personas for each agent
2005    let personas = [
2006        "You are an analytical agent. Focus on logic, structure, and precision.",
2007        "You are a creative agent. Focus on innovation, alternatives, and possibilities.",
2008        "You are a pragmatic agent. Focus on simplicity, efficiency, and practicality.",
2009    ];
2010
2011    for i in 0..num_agents {
2012        let client = multi_llm.get_client(i).unwrap().clone();
2013        let persona = personas.get(i).unwrap_or(&personas[0]).to_string();
2014        let task = "Analyze the given task and provide your solution.".to_string();
2015
2016        let handle = tokio::spawn(async move {
2017            let mut memory = ConversationMemory::new(&persona, 10);
2018            memory.add_user_message(&task);
2019
2020            let messages = memory.history().to_vec();
2021            match client.chat(messages).await {
2022                Ok(response) => {
2023                    let content = response
2024                        .choices
2025                        .first()
2026                        .map(|c| c.message.content.clone())
2027                        .unwrap_or_default();
2028                    Ok((
2029                        i,
2030                        client.provider_name().to_string(),
2031                        client.model().to_string(),
2032                        content,
2033                    ))
2034                }
2035                Err(e) => Err(format!("Agent {} failed: {}", i, e)),
2036            }
2037        });
2038
2039        handles.push(handle);
2040    }
2041
2042    // Collect results
2043    let mut results: Vec<(usize, String, String, String)> = Vec::new();
2044    for handle in handles {
2045        match handle.await {
2046            Ok(Ok((idx, provider, model, result))) => {
2047                info!(
2048                    "Agent {} ({}:{}) completed: {} chars",
2049                    idx,
2050                    provider,
2051                    model,
2052                    result.len()
2053                );
2054                results.push((idx, provider, model, result));
2055            }
2056            Ok(Err(e)) => warn!("Agent failed: {}", e),
2057            Err(e) => warn!("Agent join failed: {}", e),
2058        }
2059    }
2060
2061    // Print swarm results
2062    println!(
2063        "\n🐦‍⬛ Swarm Results ({} agents, multi-model):",
2064        results.len()
2065    );
2066    for (idx, provider, model, result) in &results {
2067        println!("\n── Agent {} ({}:{}) ──", idx + 1, provider, model);
2068        println!("{}", result);
2069    }
2070
2071    // Broadcast swarm results to RavenFabric if configured
2072    if let Some(ref rf) = ravenfabric {
2073        if rf.is_enabled() {
2074            let summary = format!("Multi-model swarm completed: {} agents", results.len());
2075            let _ = rf.broadcast(&summary, 30).await;
2076            info!("Multi-model swarm results broadcast to RavenFabric mesh");
2077        }
2078    }
2079
2080    Ok(())
2081}
2082
2083/// Run supervisor agent coordinating sub-agents (multi-model) — v0.6
2084///
2085/// The supervisor decomposes a task and assigns subtasks to different providers
2086/// based on their strengths. Results are aggregated.
2087pub async fn run_supervisor_multi(
2088    multi_llm: MultiModelManager,
2089    config: Config,
2090    ravenfabric: Option<RavenFabricClient>,
2091) -> Result<()> {
2092    info!(
2093        "Starting supervisor mode (multi-model) with {} providers",
2094        multi_llm.client_count()
2095    );
2096
2097    // Perform RavenFabric health check if configured
2098    if let Some(ref rf) = ravenfabric {
2099        if rf.is_enabled() {
2100            info!("RavenFabric remote execution available for supervisor coordination");
2101            match rf.health().await {
2102                Ok(true) => info!("RavenFabric mesh is healthy"),
2103                Ok(false) => warn!("RavenFabric mesh returned unhealthy status"),
2104                Err(e) => warn!(error = %e, "RavenFabric health check failed"),
2105            }
2106        }
2107    }
2108
2109    let system_prompt = &config.llm.system_prompt;
2110    let policy_engine = PolicyEngine::default_secure();
2111    let mut sandbox = Sandbox::default();
2112    sandbox.init().await.map_err(|e| {
2113        crate::error::RavenClawsError::CommandExecution(format!("Sandbox init failed: {}", e))
2114    })?;
2115    let audit_log = AuditLog::new(format!("supervisor-multi-{}", std::process::id()));
2116    let registry = ToolRegistry::with_default_tools();
2117
2118    // Supervisor prompt with multi-model awareness
2119    let supervisor_prompt = format!(
2120        "You are a supervisor agent coordinating multiple LLM providers. \
2121         Decompose tasks and assign them to appropriate providers based on their strengths. \
2122         \n\nFor each subtask, respond with:\n\
2123         SUBTASK: <description>\n\
2124         PROVIDER: <provider_index 0-{}>\n\
2125         \nWhen complete, respond with:\n\
2126         FINAL: <aggregated result>\n\
2127         \nTask: {}",
2128        multi_llm.client_count() - 1,
2129        "Coordinate the completion of the assigned task using available providers."
2130    );
2131
2132    let mut memory = ConversationMemory::new(system_prompt, 20);
2133    memory.add_user_message(&supervisor_prompt);
2134
2135    let mut subtask_results: Vec<String> = Vec::new();
2136    let mut iteration = 0;
2137    let max_iterations = 15;
2138
2139    loop {
2140        iteration += 1;
2141        if iteration > max_iterations {
2142            warn!("Supervisor reached max iterations");
2143            break;
2144        }
2145
2146        // Use round-robin for supervisor itself
2147        let supervisor_client = multi_llm
2148            .get_client(iteration % multi_llm.client_count())
2149            .or_else(|| multi_llm.get_client(0))
2150            .cloned();
2151
2152        let messages = memory.history().to_vec();
2153        let response =
2154            match supervisor_client.map(|c| tokio::spawn(async move { c.chat(messages).await })) {
2155                Some(handle) => match handle.await {
2156                    Ok(Ok(r)) => r,
2157                    Ok(Err(e)) => {
2158                        warn!(error = %e, "Supervisor LLM request failed");
2159                        continue;
2160                    }
2161                    Err(e) => {
2162                        warn!(error = %e, "Supervisor task join failed");
2163                        continue;
2164                    }
2165                },
2166                None => {
2167                    warn!("No LLM clients available");
2168                    break;
2169                }
2170            };
2171
2172        let content = response
2173            .choices
2174            .first()
2175            .map(|c| c.message.content.clone())
2176            .unwrap_or_default();
2177
2178        // Check for FINAL: completion
2179        if content.contains("FINAL:") {
2180            let final_response = content
2181                .split("FINAL:")
2182                .nth(1)
2183                .unwrap_or("")
2184                .trim()
2185                .to_string();
2186            info!("Supervisor completed task: {} chars", final_response.len());
2187
2188            let _ = audit_log.append(
2189                AuditEventType::AgentFinish,
2190                "supervisor",
2191                "Supervisor completed task coordination",
2192                Some(serde_json::json!({
2193                    "iterations": iteration,
2194                    "subtasks_completed": subtask_results.len(),
2195                    "providers_used": multi_llm.client_count(),
2196                })),
2197            );
2198
2199            println!("\n🐦‍⬛ Supervisor Result (multi-model):\n{}", final_response);
2200            return Ok(());
2201        }
2202
2203        // Check for SUBTASK: decomposition
2204        if content.contains("SUBTASK:") && content.contains("PROVIDER:") {
2205            let subtask_block = content.split("SUBTASK:").nth(1).unwrap_or("");
2206            let subtask_lines: Vec<&str> = subtask_block.lines().take(4).collect();
2207
2208            let subtask_desc = subtask_lines.first().unwrap_or(&"").trim();
2209            let provider_idx = subtask_lines
2210                .iter()
2211                .find(|l| l.starts_with("PROVIDER:"))
2212                .and_then(|l| l.split(':').nth(1))
2213                .and_then(|s| s.trim().parse::<usize>().ok())
2214                .unwrap_or(0);
2215
2216            if !subtask_desc.is_empty() {
2217                info!("Subtask for provider {}: {}", provider_idx, subtask_desc);
2218
2219                let client = multi_llm
2220                    .get_client(provider_idx)
2221                    .or_else(|| multi_llm.get_client(0));
2222
2223                if let Some(client) = client {
2224                    let subtask_result = run_subtask_agent(
2225                        client.clone(),
2226                        subtask_desc,
2227                        system_prompt,
2228                        &policy_engine,
2229                        &sandbox,
2230                        &audit_log,
2231                        &registry,
2232                    )
2233                    .await;
2234
2235                    match subtask_result {
2236                        Ok(result) => {
2237                            info!("Subtask {} completed: {} chars", provider_idx, result.len());
2238                            subtask_results.push(format!(
2239                                "Provider {} ({}): {}",
2240                                provider_idx,
2241                                client.provider_name(),
2242                                result
2243                            ));
2244
2245                            memory.add_assistant_message(&format!(
2246                                "Assigned subtask to provider {}: {}",
2247                                provider_idx, subtask_desc
2248                            ));
2249                            memory.add_user_message(&format!(
2250                                "Provider {} result: {}",
2251                                provider_idx, result
2252                            ));
2253                        }
2254                        Err(e) => {
2255                            warn!("Subtask {} failed: {}", provider_idx, e);
2256                            memory.add_assistant_message(&format!(
2257                                "Provider {} subtask failed: {}",
2258                                provider_idx, e
2259                            ));
2260                        }
2261                    }
2262                }
2263            }
2264        } else {
2265            memory.add_assistant_message(&content);
2266        }
2267    }
2268
2269    // Fallback: return aggregated results
2270    if !subtask_results.is_empty() {
2271        let aggregated = subtask_results.join("\n\n");
2272        info!(
2273            "Supervisor aggregated {} subtask results",
2274            subtask_results.len()
2275        );
2276
2277        // Broadcast supervisor result to RavenFabric if configured
2278        if let Some(ref rf) = ravenfabric {
2279            if rf.is_enabled() {
2280                let summary = format!(
2281                    "Multi-model supervisor completed: {} subtasks, result: {} chars",
2282                    subtask_results.len(),
2283                    aggregated.len()
2284                );
2285                let _ = rf.broadcast(&summary, 30).await;
2286                info!("Multi-model supervisor result broadcast to RavenFabric mesh");
2287            }
2288        }
2289
2290        println!(
2291            "\n🐦‍⬛ Supervisor Aggregated Result (multi-model):\n{}",
2292            aggregated
2293        );
2294        return Ok(());
2295    }
2296
2297    Err(crate::error::RavenClawsError::CommandExecution(
2298        "Supervisor mode completed without results".to_string(),
2299    ))
2300}
2301
2302/// Run interactive REPL mode
2303pub async fn run_repl(llm: Arc<dyn LLMProviderTrait>, config: Config) -> Result<()> {
2304    use tokio::io::{AsyncBufReadExt, BufReader};
2305
2306    info!("Starting interactive REPL mode");
2307
2308    let system_prompt = &config.llm.system_prompt;
2309    let mut memory = ConversationMemory::new(system_prompt, 0);
2310
2311    let stdin = BufReader::new(tokio::io::stdin());
2312    let mut lines = stdin.lines();
2313
2314    println!("RavenClaws REPL — type /exit to quit, /reset to clear history");
2315
2316    loop {
2317        print!("\n> ");
2318        use tokio::io::AsyncWriteExt;
2319        tokio::io::stdout().flush().await?;
2320
2321        let line = match lines.next_line().await {
2322            Ok(Some(l)) => l,
2323            Ok(None) => break, // EOF
2324            Err(e) => {
2325                warn!(error = %e, "REPL read error");
2326                break;
2327            }
2328        };
2329
2330        let input = line.trim();
2331
2332        if input.is_empty() {
2333            continue;
2334        }
2335
2336        match input {
2337            "/exit" | "/quit" => {
2338                println!("Exiting REPL.");
2339                break;
2340            }
2341            "/reset" => {
2342                memory = ConversationMemory::new(system_prompt, 0);
2343                println!("Conversation history reset.");
2344                continue;
2345            }
2346            _ => {}
2347        }
2348
2349        memory.add_user_message(input);
2350        let messages = memory.history().to_vec();
2351
2352        match llm.chat(messages).await {
2353            Ok(response) => {
2354                if let Some(choice) = response.choices.first() {
2355                    let content = &choice.message.content;
2356                    println!("{}", content);
2357                    memory.add_assistant_message(content);
2358                }
2359            }
2360            Err(e) => {
2361                warn!(error = %e, "LLM request failed");
2362                println!("Error: {}", e);
2363            }
2364        }
2365    }
2366
2367    Ok(())
2368}
2369
2370#[cfg(test)]
2371mod tests {
2372    use super::*;
2373
2374    #[test]
2375    fn test_swarm_function_exists() {
2376        // Verify swarm function signature compiles
2377        let _fn_ptr: fn(Arc<dyn LLMProviderTrait>, Config, Option<RavenFabricClient>) -> _ =
2378            run_swarm;
2379    }
2380
2381    #[test]
2382    fn test_supervisor_function_exists() {
2383        // Verify supervisor function signature compiles
2384        let _fn_ptr: fn(Arc<dyn LLMProviderTrait>, Config, Option<RavenFabricClient>) -> _ =
2385            run_supervisor;
2386    }
2387
2388    #[test]
2389    fn test_conversation_memory_new() {
2390        let mem = ConversationMemory::new("system prompt", 10);
2391        assert_eq!(mem.messages.len(), 1);
2392        assert_eq!(mem.messages[0].role, "system");
2393        assert_eq!(mem.messages[0].content, "system prompt");
2394    }
2395
2396    #[test]
2397    fn test_conversation_memory_add_user() {
2398        let mut mem = ConversationMemory::new("system", 10);
2399        mem.add_user_message("hello");
2400        assert_eq!(mem.messages.len(), 2);
2401        assert_eq!(mem.messages[1].role, "user");
2402        assert_eq!(mem.messages[1].content, "hello");
2403    }
2404
2405    #[test]
2406    fn test_conversation_memory_trim() {
2407        let mut mem = ConversationMemory::new("system", 3);
2408        mem.add_user_message("msg1");
2409        mem.add_assistant_message("resp1");
2410        mem.add_user_message("msg2");
2411        mem.add_assistant_message("resp2");
2412        // Should trim to keep system + 2 messages
2413        assert!(mem.messages.len() <= 3);
2414    }
2415
2416    #[test]
2417    fn test_parse_tool_call_valid() {
2418        let content = "THOUGHT: I need to run a command\nTOOL_CALL: shell_exec\nARGS: {\"command\": \"echo hello\"}";
2419        let (name, args) = parse_tool_call(content).unwrap();
2420        assert_eq!(name, "shell_exec");
2421        assert_eq!(args["command"], "echo hello");
2422    }
2423
2424    #[test]
2425    fn test_parse_tool_call_missing_tool() {
2426        let content = "THOUGHT: no tool here";
2427        assert!(parse_tool_call(content).is_none());
2428    }
2429
2430    #[test]
2431    fn test_parse_tool_call_missing_args() {
2432        let content = "TOOL_CALL: shell_exec\nNo args line";
2433        assert!(parse_tool_call(content).is_none());
2434    }
2435
2436    #[test]
2437    fn test_parse_tool_call_invalid_json() {
2438        let content = "TOOL_CALL: shell_exec\nARGS: not valid json";
2439        assert!(parse_tool_call(content).is_none());
2440    }
2441
2442    #[test]
2443    fn test_agent_loop_config_default() {
2444        let config = AgentLoopConfig::default();
2445        assert_eq!(config.max_iterations, 10);
2446        assert!(!config.enable_tools);
2447        assert!(!config.require_approval);
2448    }
2449
2450    #[test]
2451    fn test_agent_loop_config_require_approval() {
2452        let config = AgentLoopConfig {
2453            max_iterations: 5,
2454            enable_tools: true,
2455            require_approval: true,
2456            prompt_injection_protection: true,
2457            token_lifetime_secs: 0,
2458            no_final_required: false,
2459            fallback_chain: None,
2460            token_budget: None,
2461            ravenfabric: None,
2462            checkpoint_dir: None,
2463            session_id: None,
2464            metrics_callback: None,
2465            load_manager: None,
2466            retry_config: None,
2467        };
2468        assert_eq!(config.max_iterations, 5);
2469        assert!(config.enable_tools);
2470        assert!(config.require_approval);
2471        assert!(config.prompt_injection_protection);
2472        assert_eq!(config.token_lifetime_secs, 0);
2473    }
2474
2475    #[test]
2476    fn test_prompt_for_approval_yes() {
2477        let args = serde_json::json!({"command": "echo hello"});
2478        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "y"));
2479        assert!(result, "Should approve for 'y'");
2480    }
2481
2482    #[test]
2483    fn test_prompt_for_approval_yes_full() {
2484        let args = serde_json::json!({"command": "echo hello"});
2485        let result =
2486            tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "yes"));
2487        assert!(result, "Should approve for 'yes'");
2488    }
2489
2490    #[test]
2491    fn test_prompt_for_approval_no() {
2492        let args = serde_json::json!({"command": "echo hello"});
2493        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "n"));
2494        assert!(!result, "Should deny for 'n'");
2495    }
2496
2497    #[test]
2498    fn test_prompt_for_approval_no_full() {
2499        let args = serde_json::json!({"command": "echo hello"});
2500        let result =
2501            tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "no"));
2502        assert!(!result, "Should deny for 'no'");
2503    }
2504
2505    #[test]
2506    fn test_prompt_for_approval_empty() {
2507        let args = serde_json::json!({"command": "echo hello"});
2508        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, ""));
2509        assert!(!result, "Should deny for empty input (default N)");
2510    }
2511
2512    #[test]
2513    fn test_prompt_for_approval_uppercase() {
2514        let args = serde_json::json!({"command": "echo hello"});
2515        let result = tokio_test::block_on(prompt_for_approval_with_input("shell_exec", &args, "Y"));
2516        assert!(result, "Should approve for uppercase 'Y'");
2517    }
2518
2519    #[test]
2520    fn test_prompt_for_approval_auto_approves_non_tty() {
2521        // When stdin is not a TTY (e.g., piped), prompt_for_approval auto-approves.
2522        // This test is only meaningful in CI/non-TTY environments.
2523        // In a TTY (interactive terminal), this test is skipped because it would
2524        // block waiting for stdin input.
2525        // We verify the behavior by checking the function signature compiles.
2526        #[allow(clippy::let_underscore_future)]
2527        let _ = prompt_for_approval_with_input("test", &serde_json::json!({}), "y");
2528    }
2529
2530    #[test]
2531    fn test_execute_parsed_tool_call_skips_approval_when_not_required() {
2532        let registry = ToolRegistry::with_default_tools();
2533        let policy_engine = PolicyEngine::default_secure();
2534        let sandbox = Sandbox::default();
2535        let audit_log = AuditLog::new("test-session".to_string());
2536
2537        let args = serde_json::json!({"command": "echo hello"});
2538        let result = tokio_test::block_on(execute_parsed_tool_call(
2539            "shell_exec".to_string(),
2540            args,
2541            &registry,
2542            &policy_engine,
2543            &sandbox,
2544            &audit_log,
2545            false, // require_approval = false
2546        ));
2547
2548        assert!(result.is_some());
2549        let tool_result = result.unwrap();
2550        assert_eq!(tool_result.tool_name, "shell_exec");
2551    }
2552
2553    #[test]
2554    fn test_execute_parsed_tool_call_approval_not_needed_for_read_only_tools() {
2555        // read_file does not require approval per policy, so even with
2556        // require_approval=true, it should execute without prompting
2557        let registry = ToolRegistry::with_default_tools();
2558        let policy_engine = PolicyEngine::default_secure();
2559        let sandbox = Sandbox::default();
2560        let audit_log = AuditLog::new("test-session".to_string());
2561
2562        let args = serde_json::json!({"path": "/tmp/test.txt"});
2563        let result = tokio_test::block_on(execute_parsed_tool_call(
2564            "read_file".to_string(),
2565            args,
2566            &registry,
2567            &policy_engine,
2568            &sandbox,
2569            &audit_log,
2570            true, // require_approval = true
2571        ));
2572
2573        // read_file doesn't require approval, so it should proceed
2574        assert!(result.is_some());
2575        let tool_result = result.unwrap();
2576        assert_eq!(tool_result.tool_name, "read_file");
2577    }
2578
2579    #[test]
2580    fn test_agent_loop_config_token_lifetime_zero_disabled() {
2581        let config = AgentLoopConfig {
2582            max_iterations: 10,
2583            enable_tools: false,
2584            require_approval: false,
2585            prompt_injection_protection: false,
2586            token_lifetime_secs: 0,
2587            no_final_required: false,
2588            fallback_chain: None,
2589            token_budget: None,
2590            ravenfabric: None,
2591            checkpoint_dir: None,
2592            session_id: None,
2593            metrics_callback: None,
2594            load_manager: None,
2595            retry_config: None,
2596        };
2597        assert_eq!(config.token_lifetime_secs, 0);
2598        // 0 means unlimited — no timeout enforced
2599    }
2600
2601    #[test]
2602    fn test_agent_loop_config_token_lifetime_nonzero() {
2603        let config = AgentLoopConfig {
2604            max_iterations: 10,
2605            enable_tools: false,
2606            require_approval: false,
2607            prompt_injection_protection: false,
2608            token_lifetime_secs: 3600,
2609            no_final_required: false,
2610            fallback_chain: None,
2611            token_budget: None,
2612            ravenfabric: None,
2613            checkpoint_dir: None,
2614            session_id: None,
2615            metrics_callback: None,
2616            load_manager: None,
2617            retry_config: None,
2618        };
2619        assert_eq!(config.token_lifetime_secs, 3600);
2620    }
2621
2622    #[test]
2623    fn test_agent_loop_config_default_includes_token_lifetime() {
2624        let config = AgentLoopConfig::default();
2625        assert_eq!(config.token_lifetime_secs, 0);
2626    }
2627
2628    #[test]
2629    fn test_agent_loop_config_retry_config_default_none() {
2630        let config = AgentLoopConfig::default();
2631        assert!(config.retry_config.is_none());
2632    }
2633
2634    #[test]
2635    fn test_agent_loop_config_retry_config_custom() {
2636        let config = AgentLoopConfig {
2637            max_iterations: 10,
2638            enable_tools: false,
2639            require_approval: false,
2640            prompt_injection_protection: false,
2641            token_lifetime_secs: 0,
2642            no_final_required: false,
2643            fallback_chain: None,
2644            token_budget: None,
2645            ravenfabric: None,
2646            checkpoint_dir: None,
2647            session_id: None,
2648            metrics_callback: None,
2649            load_manager: None,
2650            retry_config: Some(RetryConfig {
2651                max_retries: 5,
2652                base_delay_ms: 50,
2653                max_delay_ms: 5000,
2654                jitter: 0.1,
2655            }),
2656        };
2657        assert!(config.retry_config.is_some());
2658        assert_eq!(config.retry_config.as_ref().unwrap().max_retries, 5);
2659        assert_eq!(config.retry_config.as_ref().unwrap().base_delay_ms, 50);
2660    }
2661
2662    #[test]
2663    fn test_retry_config_delay_calculation() {
2664        let config = RetryConfig {
2665            max_retries: 3,
2666            base_delay_ms: 100,
2667            max_delay_ms: 10000,
2668            jitter: 0.0, // No jitter for deterministic test
2669        };
2670
2671        // Attempt 0: 100ms * 2^0 = 100ms
2672        let d0 = config.delay_for_attempt(0);
2673        assert_eq!(d0.as_millis(), 100);
2674
2675        // Attempt 1: 100ms * 2^1 = 200ms
2676        let d1 = config.delay_for_attempt(1);
2677        assert_eq!(d1.as_millis(), 200);
2678
2679        // Attempt 2: 100ms * 2^2 = 400ms
2680        let d2 = config.delay_for_attempt(2);
2681        assert_eq!(d2.as_millis(), 400);
2682    }
2683
2684    #[test]
2685    fn test_retry_config_delay_capped() {
2686        let config = RetryConfig {
2687            max_retries: 10,
2688            base_delay_ms: 1000,
2689            max_delay_ms: 5000,
2690            jitter: 0.0, // No jitter for deterministic test
2691        };
2692
2693        // Attempt 5: 1000ms * 2^5 = 32000ms, capped at 5000ms
2694        let d5 = config.delay_for_attempt(5);
2695        assert_eq!(d5.as_millis(), 5000);
2696    }
2697
2698    #[test]
2699    fn test_retry_config_delay_with_jitter() {
2700        let config = RetryConfig {
2701            max_retries: 3,
2702            base_delay_ms: 100,
2703            max_delay_ms: 10000,
2704            jitter: 0.5,
2705        };
2706
2707        // With jitter, delay should be within [base, base*2 + jitter_range]
2708        let d = config.delay_for_attempt(0);
2709        assert!(d.as_millis() >= 100);
2710        // Max possible: 100 + 50 = 150
2711        assert!(d.as_millis() <= 150);
2712    }
2713}
ravenclaws/agent.rs

ravenclaws/
agent.rs