collet 0.1.1 - Docs.rs

use std::collections::{HashMap, HashSet};

use futures::future::join_all;
use tokio::time::timeout;

use crate::agent::approval::ApprovalResponse;
use crate::agent::guard::{AgentGuard, StopReason};
use crate::agent::journal::Journal;
use crate::api::models::Message;
use crate::tools::registry;

use super::iter::{
    GuardOutcome, StreamIterationOutput, StreamIterationResult, apply_guard_verdict,
    build_chat_request, build_tool_defs, drain_worker_instructions, finish_with_done,
    inject_discovered_schemas, run_cli_fast_path, run_stream_iteration,
};
use super::perf::PerfTracker;
use super::{AgentEvent, AgentParams, SwarmParams};

/// Wrapper that calls run_loop_with_mcp without swarm context.
pub async fn run_loop(params: AgentParams) {
    run_loop_with_mcp(params, None).await;
}

/// Inner agent loop that optionally reuses a shared MCP manager.
///
/// When `shared_mcp` is `Some`, the provided manager is reused instead of
/// spawning new MCP server processes. This is used by subagents to share
/// the parent's connections (serialized via `Arc<Mutex<McpClient>>`).
/// Shutdown is skipped when reusing shared connections.
pub async fn run_loop_with_mcp(params: AgentParams, swarm: Option<SwarmParams>) {
    let AgentParams {
        client,
        config,
        mut context,
        user_msg,
        working_dir,
        event_tx,
        cancel,
        lsp_manager,
        trust_level,
        approval_gate,
        images,
    } = params;

    let (
        shared_mcp,
        shared_knowledge,
        shared_tool_index,
        shared_skill_registry,
        mut instruction_rx,
    ) = if let Some(s) = swarm {
        (
            Some(s.mcp_manager),
            s.shared_knowledge,
            s.shared_tool_index,
            s.shared_skill_registry,
            s.instruction_rx,
        )
    } else {
        (None, None, None, None, None)
    };
    let journal = Journal::new(&working_dir);

    let effective_msg = user_msg.clone();

    // Process images through the vision fallback chain when present.
    let user_content = if images.is_empty() {
        crate::api::Content::text(effective_msg)
    } else {
        let result = crate::api::vision::process_images(
            effective_msg,
            images,
            &client.model,
            shared_mcp.as_ref(),
            &working_dir,
        )
        .await;
        tracing::debug!(method = ?result.method, "Image processing method selected");
        let _ = event_tx.send(AgentEvent::ImageNotice {
            notice: result.notice,
            install_hint: result.install_hint,
        });
        result.content
    };

    tracing::debug!(
        has_images = user_content.has_images(),
        "User content prepared"
    );
    context.push(Message {
        role: "user".to_string(),
        content: Some(user_content),
        reasoning_content: None,
        tool_calls: None,
        tool_call_id: None,
    });

    journal
        .write_status(0, "starting", &user_msg, &[], "")
        .await;

    let mut guard = AgentGuard::new(
        config.max_iterations,
        config.circuit_breaker_threshold,
        config.tool_timeout_secs,
        config.task_timeout_secs,
    );
    if let Some(budget) = config.iteration_budget.clone() {
        guard = guard.with_budget(budget);
        if let Some(b) = guard.budget() {
            tracing::debug!(
                max_iterations = b.get(),
                "Agent guard: shared iteration budget attached"
            );
        }
    }

    // Reuse shared SkillRegistry if provided, otherwise discover from filesystem.
    let skill_registry = if let Some(existing) = shared_skill_registry {
        existing
    } else {
        std::sync::Arc::new(crate::skills::SkillRegistry::discover(
            std::path::Path::new(&working_dir),
        ))
    };
    let has_skills = skill_registry.count() > 0;

    // ── Plugin discovery (once per session) ────────────────────────────────
    let plugin_mgr = crate::plugin::PluginManager::discover();
    let plugin_hook_runtime = if plugin_mgr.count() > 0 {
        Some(plugin_mgr.all_hooks())
    } else {
        None
    };

    // ── Plugin hooks: SessionStart ──────────────────────────────────────────
    if let Some(ref hook_runtime) = plugin_hook_runtime {
        if hook_runtime.has_hooks(crate::plugin::hooks::HookEvent::SessionStart) {
            let wd = std::path::Path::new(&working_dir);
            let ctx = crate::plugin::hooks::HookContext::simple(wd);
            let results = hook_runtime
                .fire(crate::plugin::hooks::HookEvent::SessionStart, &ctx)
                .await;
            for action in &results {
                if let crate::plugin::hooks::HookAction::Error(e) = action {
                    tracing::warn!(error = %e, "Plugin SessionStart hook error");
                }
            }
        }

        // Inject plugin context (CLAUDE.md) into system prompt.
        // Cap at 8 KiB and strip control characters to limit prompt-injection surface.
        const MAX_PLUGIN_CONTEXT_BYTES: usize = 8 * 1024;
        for plugin in plugin_mgr.all() {
            if let Some(ref context_file) = plugin.context_file
                && let Ok(raw) = std::fs::read_to_string(context_file)
            {
                if raw.is_empty() {
                    continue;
                }
                // Strip control chars (keep newlines/tabs); truncate to byte cap.
                let filtered: String = raw
                    .chars()
                    .filter(|c| !c.is_control() || *c == '\n' || *c == '\t')
                    .collect();
                let sanitized: String = if filtered.len() <= MAX_PLUGIN_CONTEXT_BYTES {
                    filtered
                } else {
                    // Find the last valid UTF-8 char boundary at or before the byte limit.
                    let boundary = (0..=MAX_PLUGIN_CONTEXT_BYTES.min(filtered.len()))
                        .rev()
                        .find(|&i| filtered.is_char_boundary(i))
                        .unwrap_or(0);
                    filtered[..boundary].to_string()
                };
                let safe_name = if plugin
                    .name
                    .chars()
                    .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.')
                {
                    plugin.name.as_str()
                } else {
                    tracing::warn!(plugin_name = %plugin.name, "Plugin name contains unsafe characters; skipping context injection");
                    continue;
                };
                let marker = format!("<!-- plugin:{safe_name} -->");
                if !context.system_prompt().contains(&marker) {
                    let plugin_context = format!(
                        "\n{marker}\n## Plugin Context: {safe_name}\n{sanitized}\n<!-- /plugin:{safe_name} -->"
                    );
                    context.append_system_prompt(&plugin_context);
                }
            }
        }
    }

    // Inject skill metadata into system prompt (Level 1 — always loaded).
    // Guard against duplicate injection when context is reused across calls.
    if let Some(skill_meta) = skill_registry.system_prompt_metadata()
        && !context.system_prompt().contains("## Available Skills")
    {
        context.append_system_prompt(&skill_meta);
    }

    // Wrap shared immutable state in Arc so parallel tool futures get pointer
    // clones instead of deep copies of Config and SkillRegistry each iteration.
    let config = std::sync::Arc::new(config);

    // Performance tracker for debug monitor
    let mut perf = PerfTracker::new();

    // ── MCP server connections ──────────────────────────────────────────────
    // Subagents reuse the parent's MCP connections via `shared_mcp` to avoid
    // re-spawning server processes (~45s saved per subagent). The existing
    // `Arc<Mutex<McpClient>>` serializes concurrent requests safely.
    let owns_mcp = shared_mcp.is_none();
    let mcp_manager = if let Some(existing) = shared_mcp {
        // Reuse parent's connections — inject overview if not already present.
        let overview = existing.server_overview();
        if !overview.is_empty() && !context.system_prompt().contains("## MCP Servers") {
            context.append_system_prompt(&overview);
        }
        existing
    } else {
        let mgr = crate::mcp::manager::McpManager::connect_all(&working_dir).await;
        if mgr.server_count() > 0 {
            tracing::info!(
                servers = mgr.server_count(),
                tools = mgr.total_tool_count(),
                deferred = mgr.is_deferred_mode(),
                "MCP servers initialized"
            );
        }

        // Inject MCP overview into system prompt (server descriptions + instructions).
        let mcp_overview = mgr.server_overview();
        if !mcp_overview.is_empty() && !context.system_prompt().contains("## MCP Servers") {
            context.append_system_prompt(&mcp_overview);
        }
        // Report MCP child PIDs for debug memory monitoring.
        let pids = mgr.child_pids().await;
        if !pids.is_empty() {
            let _ = event_tx.send(AgentEvent::McpPids { pids });
        }
        std::sync::Arc::new(mgr)
    };

    // ── Build RAG manager (tool-based, no auto-injection) ──────────────────
    let rag_manager: Option<std::sync::Arc<crate::rag::RagManager>> = config
        .rag
        .as_ref()
        .and_then(crate::rag::RagManager::from_config);

    // ── Reuse shared BM25 tool index or build a new one ───────────────────
    let tool_index = if let Some(existing) = shared_tool_index {
        existing
    } else {
        let mut idx = crate::tools::tool_index::ToolIndex::new();
        // Use reindex_* variants so that the public maintenance API is exercised
        // and remains reachable for future hot-reload paths.
        idx.reindex_mcp_tools(&mcp_manager);
        idx.reindex_skills(&skill_registry);
        idx.reindex_agents(&config.agents);
        if idx.entry_count() > 0 {
            tracing::info!(entries = idx.entry_count(), "Tool index built");
        }
        std::sync::Arc::new(idx)
    };

    // Check tool support: ProviderEntry override → model_profile fallback.
    let model_supports_tools = config
        .supports_tools
        .unwrap_or_else(|| client.supports_tools());
    tracing::debug!(
        model = client.model_name(),
        context_window = client.context_window(),
        reasoning = client.supports_reasoning(),
        tools = model_supports_tools,
        edit_strategy = ?crate::api::model_profile::edit_strategy_for(client.model_name()),
        "Provider capabilities"
    );
    if !model_supports_tools {
        tracing::info!(model = %client.model, "Model does not support tools, skipping tool definitions");
    }

    let mut tool_defs = build_tool_defs(
        trust_level,
        has_skills,
        rag_manager.is_some(),
        model_supports_tools,
        &mcp_manager,
        &tool_index,
        &user_msg,
    );

    // ── CLI provider fast-path ─────────────────────────────────────────────
    // When the active provider is a CLI agent (e.g. `claude -p`), bypass the
    // HTTP streaming loop entirely. CLI agents are autonomous — they handle
    // tool calls, iterations, and reasoning internally. We spawn the process,
    // capture stdout, and return the result as a single Response event.
    if let Some(cli_binary) = config.cli.clone() {
        run_cli_fast_path(
            cli_binary,
            config.cli_args.clone(),
            config.cli_yolo_args.clone(),
            config.cli_model_env.clone(),
            config.cli_skip_model,
            config.cli_yolo_env.clone(),
            config.yolo,
            user_msg,
            working_dir,
            client,
            cancel,
            event_tx,
            context,
            journal,
            mcp_manager,
            owns_mcp,
            config.task_timeout_secs,
            config.max_iterations,
            config.cli_max_turns_flag.clone(),
        )
        .await;
        return;
    }

    // Track the reason the loop stopped (None = normal completion).
    #[allow(unused_assignments)]
    let mut final_stop_reason: Option<StopReason> = None;
    // Total tool calls executed across all iterations of this agent run.
    // Used to distinguish "early clarification (no tools yet)" from
    // "final summary after completing work" — see text-only response handling below.
    let mut total_tool_calls_made: u32 = 0;
    // Number of times we've injected the [SYSTEM] continue prompt without the
    // LLM responding with any tool calls. Capped at 1 to prevent infinite
    // repetition when the model keeps responding with completion text only.
    let mut continue_injections: u32 = 0;

    // Pause flag for instruction-based pause/resume.
    let mut paused = false;

    loop {
        // Check cancellation
        if cancel.is_cancelled() {
            let _ = event_tx.send(AgentEvent::GuardStop("Cancelled by user.".to_string()));
            final_stop_reason = Some(StopReason::Cancelled);
            break;
        }

        // Drain worker instructions (swarm mode only).
        if let Some(ref mut rx) = instruction_rx
            && let Some(stop) =
                drain_worker_instructions(rx, &mut paused, &mut context, &cancel, &event_tx).await
        {
            final_stop_reason = Some(stop);
            break;
        }

        // Log remaining token budget once per iteration (debug-level, cheap).
        tracing::trace!(
            budget_remaining = context.budget_remaining(),
            "Context token budget"
        );

        // Check guard rails
        if let GuardOutcome::Stop(reason) =
            apply_guard_verdict(guard.check(), &mut guard, &mut context, &event_tx)
        {
            final_stop_reason = Some(reason);
            break;
        }

        guard.tick();

        journal
            .write_status(guard.iteration(), "calling_llm", &user_msg, &[], "")
            .await;

        // Pre-submit compaction: threshold check before building the request so
        // we never send an over-budget context even if the previous push didn't
        // cross the trigger (e.g., heuristic underestimated the API prompt size).
        // Session-level tool approvals live in `approval_gate` outside this
        // context, so they are preserved across compaction automatically.
        if context.maybe_compact() {
            let _ = event_tx.send(AgentEvent::PhaseChange {
                label: "Context compacted".to_string(),
            });
            // Re-inject decision log into system prompt so implementation
            // continuity is preserved across the compaction boundary — mirrors
            // the post-tool-result compaction path below.
            let decisions = context.decision_log();
            if !decisions.is_empty() {
                // Single-allocation formatting instead of per-decision format! + join.
                let mut decisions_text = String::with_capacity(decisions.len() * 60);
                decisions_text
                    .push_str("## Implementation History (preserved across compaction)\n\n");
                for d in decisions {
                    decisions_text.push_str("- ");
                    decisions_text.push_str(d);
                    decisions_text.push('\n');
                }
                context.append_system_prompt(&decisions_text);
            }
        }

        // Build per-iteration request with model-profile gating applied.
        let request = build_chat_request(&client, &config, &mut context, tool_defs.clone());

        // Stream LLM response with retry. On exit signal, run cleanup once.
        let StreamIterationOutput {
            full_content,
            reasoning_content,
            tool_calls,
            turn_prompt_tokens,
            turn_completion_tokens,
            turn_cached_tokens,
            api_elapsed_ms,
        } = match run_stream_iteration(&client, &request, &config, &cancel, &event_tx).await {
            StreamIterationResult::Ok(out) => out,
            StreamIterationResult::Exit => {
                finish_with_done(context, &journal, &mcp_manager, owns_mcp, &event_tx, None).await;
                return;
            }
        };

        perf.record_api_latency(api_elapsed_ms);

        // Feed actual prompt token count back into context for accurate
        // compaction decisions and TUI budget bar display.
        if turn_prompt_tokens > 0 {
            context.update_actual_tokens(turn_prompt_tokens);
        }

        // Report status — context_tokens reflects the accurate post-update value.
        let _ = event_tx.send(AgentEvent::Status {
            iteration: guard.iteration(),
            elapsed_secs: guard.elapsed().as_secs(),
            prompt_tokens: turn_prompt_tokens,
            completion_tokens: turn_completion_tokens,
            cached_tokens: turn_cached_tokens,
            context_tokens: context.used_tokens(),
        });

        // Preserve reasoning content for thought continuity
        if !reasoning_content.is_empty() {
            context.set_last_reasoning(reasoning_content);
        }

        // Text response with no tool calls.
        //
        // If the context still has headroom (< 80% used) and iterations remain,
        // inject a [SYSTEM] continue prompt so the agent keeps working rather
        // than stopping prematurely.  This prevents the common pattern where the
        // LLM emits an intermediate summary text and halts before the task is
        // actually complete.
        //
        // When context is nearly full or no iterations remain, exit normally.
        if tool_calls.is_empty() {
            let has_text = !full_content.is_empty();
            if has_text {
                let _ = event_tx.send(AgentEvent::Response(full_content.clone()));
                context.push(Message {
                    role: "assistant".to_string(),
                    content: Some(crate::api::Content::text(full_content)),
                    reasoning_content: None,
                    tool_calls: None,
                    tool_call_id: None,
                });
            }

            let ctx_ratio = context.used_tokens() as f64 / config.context_max_tokens as f64;
            let iterations_left = guard.max_iterations().saturating_sub(guard.iteration());
            // Only inject "continue" when the agent hasn't called any tools yet
            // (i.e., the LLM gave an early clarifying text before starting work).
            // If tools have already been executed, a text-only response = the agent
            // is summarising / finishing — inject would cause infinite repetition.
            if has_text
                && total_tool_calls_made == 0
                && continue_injections == 0
                && ctx_ratio < 0.80
                && iterations_left > 1
            {
                // Prompt the LLM to continue if work remains.
                // Limited to one injection: if the model responds again with only
                // text (no tools), it is summarising/finishing — stop rather than
                // looping indefinitely.
                continue_injections += 1;
                context.push(Message {
                    role: "user".to_string(),
                    content: Some(crate::api::Content::text(
                        "[SYSTEM] If the task is not fully complete, continue working. \
                         Use tools to complete any remaining steps.",
                    )),
                    reasoning_content: None,
                    tool_calls: None,
                    tool_call_id: None,
                });
                continue; // re-enter the iteration loop
            }

            journal.write_completion().await;
            let _ = event_tx.send(AgentEvent::Done {
                context,
                stop_reason: None,
            });
            return;
        }

        // Record assistant message with tool calls
        let api_tool_calls: Vec<crate::api::models::ToolCall> = tool_calls
            .iter()
            .map(|(id, name, args)| crate::api::models::ToolCall {
                id: id.clone(),
                call_type: "function".to_string(),
                function: crate::api::models::FunctionCall {
                    name: name.clone(),
                    arguments: args.clone(),
                },
            })
            .collect();

        context.push(Message {
            role: "assistant".to_string(),
            content: if full_content.is_empty() {
                None
            } else {
                Some(crate::api::Content::text(full_content))
            },
            reasoning_content: None,
            tool_calls: Some(api_tool_calls),
            tool_call_id: None,
        });

        // ── Parallel tool execution ────────────────────────────────────────
        // 1. Send all ToolCall events first so the TUI shows all pending tools
        //    before any execution starts.
        for (id, name, args) in &tool_calls {
            let _ = event_tx.send(AgentEvent::ToolCall {
                name: name.clone(),
                args: args.clone(),
                call_id: Some(id.clone()),
            });
        }

        // 1b. Plugin hooks: PreToolUse — plugins can block dangerous commands.
        let mut plugin_blocked_tools: HashSet<String> = HashSet::new();
        if let Some(ref hook_runtime) = plugin_hook_runtime
            && hook_runtime.has_hooks(crate::plugin::hooks::HookEvent::PreToolUse)
        {
            for (_, name, args) in &tool_calls {
                let ctx = crate::plugin::hooks::HookContext::pre_tool_use(
                    name,
                    args,
                    std::path::Path::new(&working_dir),
                );
                let results = hook_runtime
                    .fire(crate::plugin::hooks::HookEvent::PreToolUse, &ctx)
                    .await;
                for action in results {
                    if let crate::plugin::hooks::HookAction::Block(reason) = action {
                        tracing::warn!(
                            tool = %name,
                            reason = %reason,
                            "Plugin blocked tool execution"
                        );
                        plugin_blocked_tools.insert(name.clone());
                    }
                }
            }
        }
        let plugin_blocked_tools = std::sync::Arc::new(plugin_blocked_tools);

        // 1c. Approval gate — check each tool before execution.
        //     In Yolo mode this is a no-op. In Auto/Manual mode,
        //     the gate sends ApprovalRequired events and waits for responses.
        let mut approved_tools: Vec<bool> = Vec::with_capacity(tool_calls.len());
        let mut approve_all = false;
        for (_, name, args) in &tool_calls {
            if approve_all {
                approved_tools.push(true);
                continue;
            }
            let _ = event_tx.send(AgentEvent::ApprovalRequired {
                tool_name: name.clone(),
                tool_args: args.clone(),
            });
            let response = approval_gate.check(name, args, &working_dir).await;
            match response {
                ApprovalResponse::Approve => {
                    approved_tools.push(true);
                }
                ApprovalResponse::ApproveAll => {
                    approved_tools.push(true);
                    approve_all = true;
                }
                ApprovalResponse::Deny => {
                    let _ = event_tx.send(AgentEvent::ApprovalDenied {
                        tool_name: name.clone(),
                    });
                    approved_tools.push(false);
                }
            }
        }

        // 2. Build one future per tool; join_all preserves input order so
        //    tool results are pushed to context in the same order as tool_calls
        //    (required by the OpenAI-compatible API).
        let tool_timeout = guard.tool_timeout();
        // Clone system prompt once for the entire tool batch.
        let sys_prompt = context.system_prompt().to_string();

        // Two-phase execution: when tool_search is in the same batch as MCP
        // tools, execute tool_search FIRST so its discovered schemas are
        // available for the MCP calls. This eliminates the previous 1-turn
        // waste where MCP calls were blocked and the LLM had to retry.
        let tool_search_indices: Vec<usize> = tool_calls
            .iter()
            .enumerate()
            .filter(|(_, (_, name, _))| name == "tool_search")
            .map(|(i, _)| i)
            .collect();
        let has_mcp_in_batch = tool_calls
            .iter()
            .any(|(_, name, _)| name.starts_with("mcp__") && name != "tool_search");
        let needs_two_phase = !tool_search_indices.is_empty() && has_mcp_in_batch;

        // Phase 1 results: index → (result_str, success, elapsed_ms).
        let mut phase1_results: HashMap<usize, (String, bool, u64)> = HashMap::new();
        if needs_two_phase {
            // Phase 1: execute tool_search calls to discover schemas.
            for &ts_idx in &tool_search_indices {
                let (ref _id, ref name, ref args) = tool_calls[ts_idx];
                let is_approved = approved_tools.get(ts_idx).copied().unwrap_or(false);
                if !is_approved {
                    phase1_results.insert(ts_idx, ("Tool denied by user".to_string(), false, 0));
                    continue;
                }
                let dispatch_ctx = registry::DispatchContext {
                    skill_registry: std::sync::Arc::clone(&skill_registry),
                    client: client.clone(),
                    config: std::sync::Arc::clone(&config),
                    system_prompt: sys_prompt.clone(),
                    lsp_manager: lsp_manager.clone(),
                    mcp_manager: std::sync::Arc::clone(&mcp_manager),
                    tool_index: std::sync::Arc::clone(&tool_index),
                    rag_manager: rag_manager.clone(),
                    shared_knowledge: shared_knowledge.clone(),
                };
                let tool_start = std::time::Instant::now();
                let result = tokio::select! {
                    _ = cancel.cancelled() => Err(crate::common::AgentError::Internal("Cancelled".to_string())),
                    r = timeout(
                        tool_timeout,
                        registry::dispatch_with_context(name, args, &working_dir, &dispatch_ctx),
                    ) => match r {
                        Ok(Ok(out)) => Ok(out),
                        Ok(Err(e)) => Err(e),
                        Err(_) => Err(crate::common::AgentError::Timeout(tool_timeout.as_secs())),
                    }
                };
                let elapsed_ms = tool_start.elapsed().as_millis() as u64;
                let (result_str, success) = match result {
                    Ok(s) => {
                        inject_discovered_schemas(&s, &mut tool_defs);
                        (s, true)
                    }
                    Err(e) => {
                        let error_msg = format!("Error: {e}");
                        (
                            crate::tools::error_hint::format_with_hint(name, &error_msg),
                            false,
                        )
                    }
                };
                phase1_results.insert(ts_idx, (result_str, success, elapsed_ms));
            }
            tracing::debug!(
                "Two-phase: tool_search completed, MCP schemas injected for remaining calls"
            );
        }

        let tool_futs: Vec<_> = tool_calls
            .iter()
            .enumerate()
            .map(|(idx, (id, name, args))| {
                let is_approved = approved_tools.get(idx).copied().unwrap_or(false);
                let dispatch_ctx = registry::DispatchContext {
                    skill_registry: std::sync::Arc::clone(&skill_registry),
                    client: client.clone(),
                    config: std::sync::Arc::clone(&config),
                    system_prompt: sys_prompt.clone(),
                    lsp_manager: lsp_manager.clone(),
                    mcp_manager: std::sync::Arc::clone(&mcp_manager),
                    tool_index: std::sync::Arc::clone(&tool_index),
                    rag_manager: rag_manager.clone(),
                    shared_knowledge: shared_knowledge.clone(),
                };
                let id = id.clone();
                let name = name.clone();
                let args = args.clone();
                let working_dir = working_dir.clone();
                let cancel = cancel.clone();
                let phase1 = phase1_results.get(&idx).cloned();
                let plugin_blocked_tools = std::sync::Arc::clone(&plugin_blocked_tools);
                async move {
                    // Use Phase 1 result for tool_search calls already executed.
                    if let Some((result_str, success, elapsed_ms)) = phase1 {
                        return (id, name, args, result_str, success, elapsed_ms);
                    }
                    if plugin_blocked_tools.contains(&name) {
                        return (id, name, args, "Tool execution blocked by plugin hook".to_string(), false, 0u64);
                    }
                    if !is_approved {
                        return (id, name, args, "Tool denied by user".to_string(), false, 0u64);
                    }
                    if cancel.is_cancelled() {
                        return (id, name, args, "Cancelled".to_string(), false, 0u64);
                    }
                    let tool_start = std::time::Instant::now();
                    let result = tokio::select! {
                        _ = cancel.cancelled() => Err(crate::common::AgentError::Internal("Cancelled".to_string())),
                        r = timeout(
                            tool_timeout,
                            registry::dispatch_with_context(&name, &args, &working_dir, &dispatch_ctx),
                        ) => {
                            match r {
                                Ok(Ok(out)) => Ok(out),
                                Ok(Err(e)) => Err(e),
                                Err(_) => Err(crate::common::AgentError::Timeout(
                                    tool_timeout.as_secs()
                                )),
                            }
                        }
                    };
                    let elapsed_ms = tool_start.elapsed().as_millis() as u64;
                    let (result_str, success) = match result {
                        Ok(s) => {
                            // Detect empty-result MCP search tools and attach a hint
                            // so the LLM tries a broader query rather than giving up.
                            let enriched = if name.starts_with("mcp__")
                                && crate::tools::error_hint::is_empty_result(&s)
                            {
                                format!(
                                    "{s}\n\n[Hint]: The search returned no results. \
                                     Try broader or alternative keywords, or use a \
                                     discovery tool to verify available resources."
                                )
                            } else {
                                s
                            };
                            (enriched, true)
                        }
                        Err(e) => {
                            // S6: Enrich error with guided recovery hint
                            let error_msg = format!("Error: {e}");
                            let enriched = crate::tools::error_hint::format_with_hint(&name, &error_msg);
                            (enriched, false)
                        }
                    };
                    (id, name, args, result_str, success, elapsed_ms)
                }
            })
            .collect();

        // 3. Execute all futures concurrently.
        let batch_total = tool_futs.len();
        let _ = event_tx.send(AgentEvent::ToolBatchProgress {
            running: batch_total,
            total: batch_total,
        });
        let results = join_all(tool_futs).await;
        let _ = event_tx.send(AgentEvent::ToolBatchProgress {
            running: 0,
            total: batch_total,
        });

        // 4. Process results in original order.
        total_tool_calls_made += tool_calls.len() as u32;
        let mut executed_tools: Vec<(String, String)> = Vec::new();
        for (id, name, args, result_str, success, elapsed_ms) in results {
            perf.record_tool_call(&name, elapsed_ms, success);

            if success {
                guard.record_success();
            } else {
                guard.record_failure();
                crate::telemetry::track(
                    "tool_error",
                    serde_json::json!({
                        "tool": &name,
                        "model": &config.model,
                    }),
                );
            }

            executed_tools.push((name.clone(), args.clone()));

            // When tool_search returns results, inject discovered MCP tool
            // schemas into the tools array so the LLM can call them next turn.
            if success && name == "tool_search" {
                inject_discovered_schemas(&result_str, &mut tool_defs);
            }

            // Notify about file modifications for repo map invalidation
            if success
                && (name == "file_write" || name == "file_edit" || name == "git_patch")
                && let Ok(parsed) = serde_json::from_str::<serde_json::Value>(&args)
                && let Some(path) = parsed.get("path").and_then(|p| p.as_str())
            {
                let _ = event_tx.send(AgentEvent::FileModified {
                    path: path.to_string(),
                });
            }

            let _ = event_tx.send(AgentEvent::ToolResult {
                name: name.clone(),
                result: result_str.clone(),
                success,
                call_id: Some(id.clone()),
            });

            // Plugin hooks: PostToolUse (async — non-blocking)
            if let Some(ref hook_runtime) = plugin_hook_runtime
                && hook_runtime.has_hooks(crate::plugin::hooks::HookEvent::PostToolUse)
            {
                let tool_name = name.clone();
                // Limit captured result size to avoid copying large tool outputs.
                let tool_result = result_str.chars().take(4096).collect::<String>();
                let wd = working_dir.clone();
                let hook_runtime = hook_runtime.clone();
                let cancel_clone = cancel.clone();
                tokio::spawn(async move {
                    tokio::select! {
                        _ = cancel_clone.cancelled() => {}
                        _ = async {
                            let ctx = crate::plugin::hooks::HookContext::post_tool_use(
                                &tool_name,
                                &tool_result,
                                success,
                                std::path::Path::new(&wd),
                            );
                            let results = hook_runtime
                                .fire(crate::plugin::hooks::HookEvent::PostToolUse, &ctx)
                                .await;
                            for action in &results {
                                if let crate::plugin::hooks::HookAction::Error(e) = action {
                                    tracing::warn!(error = %e, "Plugin PostToolUse hook error");
                                }
                            }
                        } => {}
                    }
                });
            }

            // Truncate large tool results to save context window space.
            let original_len = result_str.len();
            let result_str = registry::truncate_tool_result(&result_str, None);
            if result_str.len() < original_len {
                let _ = event_tx.send(AgentEvent::ToolResultTruncated {
                    tool_name: name.clone(),
                    original_bytes: original_len,
                    truncated_bytes: result_str.len(),
                });
            }

            // Add tool result to context (may trigger compaction)
            let before_tokens = context.used_tokens();
            let compacted = context.push_and_report(Message {
                role: "tool".to_string(),
                content: Some(crate::api::Content::text(result_str)),
                reasoning_content: None,
                tool_calls: None,
                tool_call_id: Some(id),
            });
            if compacted {
                let after_tokens = context.used_tokens();
                let _ = event_tx.send(AgentEvent::CompactionDone {
                    before_tokens,
                    after_tokens,
                });
                // After compaction, re-inject the decision log into the system prompt
                // so implementation continuity is never lost across compaction events.
                let decisions = context.decision_log();
                if !decisions.is_empty() {
                    let mut section = String::with_capacity(decisions.len() * 60);
                    section.push_str("## Implementation History (preserved across compaction)\n\n");
                    for d in decisions {
                        section.push_str("- ");
                        section.push_str(d);
                        section.push('\n');
                    }
                    if !context
                        .system_prompt()
                        .contains("## Implementation History")
                    {
                        context.append_system_prompt(&section);
                    }
                }
            }
        }

        // Record file-modification decisions to preserve implementation continuity
        // across compaction events (decision_log is never compacted away).
        for (name, args) in &executed_tools {
            if matches!(name.as_str(), "file_write" | "file_edit") {
                // Extract file path from args JSON (best-effort).
                if let Ok(v) = serde_json::from_str::<serde_json::Value>(args)
                    && let Some(path) = v
                        .get("file_path")
                        .or(v.get("path"))
                        .and_then(|p| p.as_str())
                {
                    context.record_decision(format!("Modified: {path}"));
                }
            }
        }

        // Journal after tool execution
        journal
            .write_status(
                guard.iteration(),
                "tools_executed",
                &user_msg,
                &executed_tools
                    .iter()
                    .map(|(n, a)| (n.as_str(), a.as_str()))
                    .collect::<Vec<_>>(),
                "",
            )
            .await;

        // Record iteration tokens for perf tracker
        perf.record_iteration(turn_prompt_tokens as u64 + turn_completion_tokens as u64);

        // Send performance update
        let _ = event_tx.send(perf.build_event());

        // Courtesy delay between iterations. The 429 retry logic provides its
        // own exponential backoff, so this only needs to be minimal (~50 ms).
        // Configurable via `iteration_delay_ms` in config.toml or COLLET_ITERATION_DELAY_MS.
        if config.iteration_delay_ms > 0 {
            tokio::time::sleep(std::time::Duration::from_millis(config.iteration_delay_ms)).await;
        }

        if cancel.is_cancelled() {
            final_stop_reason = Some(StopReason::Cancelled);
            break;
        }
    }

    // Shut down MCP servers only if we own them (not shared from parent).
    if owns_mcp {
        mcp_manager.shutdown_all().await;
    }

    // ── Plugin hooks: Stop ─────────────────────────────────────────────────
    // Fire before the final Done event so plugins can send notifications,
    // run final validation, or trigger CI.
    if let Some(ref hook_runtime) = plugin_hook_runtime
        && hook_runtime.has_hooks(crate::plugin::hooks::HookEvent::Stop)
    {
        let reason_str = final_stop_reason
            .as_ref()
            .map(|r| r.to_string())
            .unwrap_or_else(|| "completed".to_string());
        let ctx = crate::plugin::hooks::HookContext::stop(
            &reason_str,
            std::path::Path::new(&working_dir),
        );
        let results = hook_runtime
            .fire(crate::plugin::hooks::HookEvent::Stop, &ctx)
            .await;
        for action in &results {
            if let crate::plugin::hooks::HookAction::Error(e) = action {
                tracing::warn!(error = %e, "Plugin Stop hook error");
            }
        }
    }

    // ── Plugin hooks: SessionEnd ───────────────────────────────────────────
    if let Some(ref hook_runtime) = plugin_hook_runtime
        && hook_runtime.has_hooks(crate::plugin::hooks::HookEvent::SessionEnd)
    {
        let ctx = crate::plugin::hooks::HookContext::simple(std::path::Path::new(&working_dir));
        let results = hook_runtime
            .fire(crate::plugin::hooks::HookEvent::SessionEnd, &ctx)
            .await;
        for action in &results {
            if let crate::plugin::hooks::HookAction::Error(e) = action {
                tracing::warn!(error = %e, "Plugin SessionEnd hook error");
            }
        }
    }

    // Reached here via guard break or cancellation — return context with stop reason
    journal.write_completion().await;
    let _ = event_tx.send(AgentEvent::Done {
        context,
        stop_reason: final_stop_reason,
    });
}