Skip to main content

syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod persistence;
38pub mod prompts;
39pub mod session;
40pub mod tools;
41pub mod ui;
42use colored::Colorize;
43use commands::TokenUsage;
44use history::{ConversationHistory, ToolCallRecord};
45use ide::IdeClient;
46use rig::{
47    client::{CompletionClient, ProviderClient},
48    completion::Prompt,
49    providers::{anthropic, openai},
50};
51use session::{ChatSession, PlanMode};
52use std::path::Path;
53use std::sync::Arc;
54use tokio::sync::Mutex as TokioMutex;
55use ui::{ResponseFormatter, ToolDisplayHook};
56
57/// Provider type for the agent
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ProviderType {
60    #[default]
61    OpenAI,
62    Anthropic,
63    Bedrock,
64}
65
66impl std::fmt::Display for ProviderType {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            ProviderType::OpenAI => write!(f, "openai"),
70            ProviderType::Anthropic => write!(f, "anthropic"),
71            ProviderType::Bedrock => write!(f, "bedrock"),
72        }
73    }
74}
75
76impl std::str::FromStr for ProviderType {
77    type Err = String;
78
79    fn from_str(s: &str) -> Result<Self, Self::Err> {
80        match s.to_lowercase().as_str() {
81            "openai" => Ok(ProviderType::OpenAI),
82            "anthropic" => Ok(ProviderType::Anthropic),
83            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
84            _ => Err(format!(
85                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
86                s
87            )),
88        }
89    }
90}
91
92/// Error types for the agent
93#[derive(Debug, thiserror::Error)]
94pub enum AgentError {
95    #[error("Missing API key. Set {0} environment variable.")]
96    MissingApiKey(String),
97
98    #[error("Provider error: {0}")]
99    ProviderError(String),
100
101    #[error("Tool error: {0}")]
102    ToolError(String),
103}
104
105pub type AgentResult<T> = Result<T, AgentError>;
106
107// =============================================================================
108// AG-UI State Types
109// =============================================================================
110
111/// Agent state for AG-UI state synchronization
112#[derive(Debug, Clone, serde::Serialize)]
113pub struct AgentState {
114    /// Project being analyzed
115    pub project_path: String,
116    /// LLM provider name
117    pub provider: String,
118    /// Model being used
119    pub model: String,
120    /// Whether plan mode is active
121    pub plan_mode: bool,
122    /// Token usage statistics
123    pub token_usage: TokenUsageState,
124    /// Conversation state
125    pub conversation: ConversationState,
126}
127
128/// Token usage state for AG-UI
129#[derive(Debug, Clone, serde::Serialize)]
130pub struct TokenUsageState {
131    /// Estimated input tokens
132    pub input_tokens: usize,
133    /// Estimated output tokens
134    pub output_tokens: usize,
135    /// Total tokens
136    pub total_tokens: usize,
137}
138
139/// Conversation state for AG-UI
140#[derive(Debug, Clone, serde::Serialize)]
141pub struct ConversationState {
142    /// Number of conversation turns
143    pub turn_count: usize,
144    /// Whether history has been compacted
145    pub has_compacted: bool,
146}
147
148/// Build AgentState from session and conversation history
149fn build_agent_state(session: &ChatSession, history: &ConversationHistory) -> AgentState {
150    // Check if history has been compacted (status contains "compacted")
151    let has_compacted = history.status().contains("compacted");
152    let input = session.token_usage.prompt_tokens as usize;
153    let output = session.token_usage.completion_tokens as usize;
154
155    AgentState {
156        project_path: session.project_path.display().to_string(),
157        provider: session.provider.to_string(),
158        model: session.model.clone(),
159        plan_mode: session.plan_mode.is_planning(),
160        token_usage: TokenUsageState {
161            input_tokens: input,
162            output_tokens: output,
163            total_tokens: input + output,
164        },
165        conversation: ConversationState {
166            turn_count: history.turn_count(),
167            has_compacted,
168        },
169    }
170}
171
172/// Get the system prompt for the agent based on query type and plan mode
173fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
174    // In planning mode, use the read-only exploration prompt
175    if plan_mode.is_planning() {
176        return prompts::get_planning_prompt(project_path);
177    }
178
179    if let Some(q) = query {
180        // First check if it's a code development task (highest priority)
181        if prompts::is_code_development_query(q) {
182            return prompts::get_code_development_prompt(project_path);
183        }
184        // Then check if it's DevOps generation (Docker, Terraform, Helm)
185        if prompts::is_generation_query(q) {
186            return prompts::get_devops_prompt(project_path, Some(q));
187        }
188    }
189    // Default to analysis prompt
190    prompts::get_analysis_prompt(project_path)
191}
192
193/// Run the agent as a dedicated AG-UI server (headless mode for containers/deployments).
194///
195/// This starts the AG-UI server without interactive stdin, accepting connections
196/// from frontends via SSE or WebSocket. The agent processes messages received
197/// through the AG-UI protocol.
198///
199/// # Arguments
200///
201/// * `project_path` - Path to the project directory
202/// * `provider` - LLM provider to use
203/// * `model` - Optional model override
204/// * `host` - Host address to bind to
205/// * `port` - Port number to listen on
206pub async fn run_agent_server(
207    project_path: &Path,
208    provider: ProviderType,
209    model: Option<String>,
210    host: &str,
211    port: u16,
212) -> AgentResult<()> {
213    use crate::server::{AgUiConfig, AgUiServer, ProcessorConfig};
214
215    // Configure the agent processor with provider, model, and project path
216    // Use regional model IDs (no global. prefix) for wider availability
217    let default_model = match provider {
218        // Claude 3.5 Sonnet v2 is widely available across regions
219        ProviderType::Bedrock => "anthropic.claude-3-5-sonnet-20241022-v2:0".to_string(),
220        ProviderType::Anthropic => "claude-3-5-sonnet-20241022".to_string(),
221        ProviderType::OpenAI => "gpt-4o".to_string(),
222    };
223    let processor_config = ProcessorConfig::new()
224        .with_provider(&provider.to_string())
225        .with_model(&model.unwrap_or(default_model))
226        .with_project_path(project_path);
227
228    let config = AgUiConfig::new()
229        .port(port)
230        .host(host)
231        .with_processor_config(processor_config);
232    let server = AgUiServer::new(config);
233
234    println!("AG-UI agent server listening on http://{}:{}", host, port);
235    println!("Project path: {}", project_path.display());
236    println!("Connect frontends via SSE (/sse) or WebSocket (/ws)");
237    println!("Press Ctrl+C to stop the server");
238
239    // Run server (blocks until shutdown signal)
240    server
241        .run()
242        .await
243        .map_err(|e| AgentError::ProviderError(e.to_string()))
244}
245
246/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
247pub async fn run_interactive(
248    project_path: &Path,
249    provider: ProviderType,
250    model: Option<String>,
251    event_bridge: Option<crate::server::EventBridge>,
252) -> AgentResult<()> {
253    use tools::*;
254
255    let mut session = ChatSession::new(project_path, provider, model);
256
257    // Store event bridge for use in tool hooks
258    let event_bridge = event_bridge;
259
260    // Shared background process manager for Prometheus port-forwards
261    let bg_manager = Arc::new(BackgroundProcessManager::new());
262
263    // Terminal layout for split screen is disabled for now - see notes below
264    // let terminal_layout = ui::TerminalLayout::new();
265    // let layout_state = terminal_layout.state();
266
267    // Initialize conversation history with compaction support
268    let mut conversation_history = ConversationHistory::new();
269
270    // Initialize IDE client for native diff viewing
271    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
272        let mut client = IdeClient::new().await;
273        if client.is_ide_available() {
274            match client.connect().await {
275                Ok(()) => {
276                    println!(
277                        "{} Connected to {} IDE companion",
278                        "✓".green(),
279                        client.ide_name().unwrap_or("VS Code")
280                    );
281                    Some(Arc::new(TokioMutex::new(client)))
282                }
283                Err(e) => {
284                    // IDE detected but companion not running or connection failed
285                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
286                    None
287                }
288            }
289        } else {
290            println!(
291                "{} No IDE detected (TERM_PROGRAM={})",
292                "·".dimmed(),
293                std::env::var("TERM_PROGRAM").unwrap_or_default()
294            );
295            None
296        }
297    };
298
299    // Load API key from config file to env if not already set
300    ChatSession::load_api_key_to_env(session.provider);
301
302    // Check if API key is configured, prompt if not
303    if !ChatSession::has_api_key(session.provider) {
304        ChatSession::prompt_api_key(session.provider)?;
305    }
306
307    session.print_banner();
308
309    // Display platform context if a project is selected
310    if session.platform_session.is_project_selected() {
311        println!(
312            "{}",
313            format!(
314                "Platform context: {}",
315                session.platform_session.display_context()
316            )
317            .dimmed()
318        );
319    }
320
321    // NOTE: Terminal layout with ANSI scroll regions is disabled for now.
322    // The scroll region approach conflicts with the existing input/output flow.
323    // TODO: Implement proper scroll region support that integrates with the input handler.
324    // For now, we rely on the pause/resume mechanism in progress indicator.
325    //
326    // if let Err(e) = terminal_layout.init() {
327    //     eprintln!(
328    //         "{}",
329    //         format!("Note: Terminal layout initialization failed: {}. Using fallback mode.", e)
330    //             .dimmed()
331    //     );
332    // }
333
334    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
335    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
336    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
337
338    // Pending input for auto-continue after plan creation
339    let mut pending_input: Option<String> = None;
340    // Auto-accept mode for plan execution (skips write confirmations)
341    let mut auto_accept_writes = false;
342
343    // Initialize session recorder for conversation persistence
344    let mut session_recorder = persistence::SessionRecorder::new(project_path);
345
346    // Track if we exit due to an error (for AG-UI error events)
347    let mut exit_error: Option<String> = None;
348
349    // Emit AG-UI RunStarted event and initial state for connected frontends
350    if let Some(ref bridge) = event_bridge {
351        bridge.start_run().await;
352        // Emit initial agent state snapshot
353        let state = build_agent_state(&session, &conversation_history);
354        if let Ok(state_json) = serde_json::to_value(&state) {
355            bridge.emit_state_snapshot(state_json).await;
356        }
357    }
358
359    loop {
360        // Show conversation status if we have history
361        if !conversation_history.is_empty() {
362            println!(
363                "{}",
364                format!("  💬 Context: {}", conversation_history.status()).dimmed()
365            );
366        }
367
368        // Check for pending input (from plan menu selection)
369        let input = if let Some(pending) = pending_input.take() {
370            // Show what we're executing
371            println!("{} {}", "→".cyan(), pending.dimmed());
372            pending
373        } else {
374            // New user turn - reset auto-accept mode from previous plan execution
375            auto_accept_writes = false;
376
377            // Read user input (returns InputResult)
378            let input_result = match session.read_input() {
379                Ok(result) => result,
380                Err(_) => break,
381            };
382
383            // Handle the input result
384            match input_result {
385                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
386                ui::InputResult::Cancel | ui::InputResult::Exit => break,
387                ui::InputResult::TogglePlanMode => {
388                    // Toggle planning mode - minimal feedback, no extra newlines
389                    let new_mode = session.toggle_plan_mode();
390                    if new_mode.is_planning() {
391                        println!("{}", "★ plan mode".yellow());
392                    } else {
393                        println!("{}", "▶ standard mode".green());
394                    }
395                    // Emit AG-UI state delta for plan mode change
396                    if let Some(ref bridge) = event_bridge {
397                        bridge
398                            .emit_state_delta(vec![serde_json::json!({
399                                "op": "replace",
400                                "path": "/plan_mode",
401                                "value": new_mode.is_planning()
402                            })])
403                            .await;
404                    }
405                    continue;
406                }
407            }
408        };
409
410        if input.is_empty() {
411            continue;
412        }
413
414        // Check for commands
415        if ChatSession::is_command(&input) {
416            // Special handling for /clear to also clear conversation history
417            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
418                conversation_history.clear();
419                raw_chat_history.clear();
420            }
421            match session.process_command(&input) {
422                Ok(true) => {
423                    // Check if /resume loaded a session
424                    if let Some(record) = session.pending_resume.take() {
425                        // Display previous messages
426                        println!();
427                        println!("{}", "─── Previous Conversation ───".dimmed());
428                        for msg in &record.messages {
429                            match msg.role {
430                                persistence::MessageRole::User => {
431                                    println!();
432                                    println!(
433                                        "{} {}",
434                                        "You:".cyan().bold(),
435                                        truncate_string(&msg.content, 500)
436                                    );
437                                }
438                                persistence::MessageRole::Assistant => {
439                                    println!();
440                                    // Show tool calls if any (same format as live display)
441                                    if let Some(ref tools) = msg.tool_calls {
442                                        for tc in tools {
443                                            // Match live tool display: green dot for completed, cyan bold name
444                                            if tc.args_summary.is_empty() {
445                                                println!(
446                                                    "{} {}",
447                                                    "●".green(),
448                                                    tc.name.cyan().bold()
449                                                );
450                                            } else {
451                                                println!(
452                                                    "{} {}({})",
453                                                    "●".green(),
454                                                    tc.name.cyan().bold(),
455                                                    truncate_string(&tc.args_summary, 50).dimmed()
456                                                );
457                                            }
458                                        }
459                                    }
460                                    // Show response (same ResponseFormatter as live)
461                                    if !msg.content.is_empty() {
462                                        ResponseFormatter::print_response(&truncate_string(
463                                            &msg.content,
464                                            1000,
465                                        ));
466                                    }
467                                }
468                                persistence::MessageRole::System => {
469                                    // Skip system messages in display
470                                }
471                            }
472                        }
473                        println!("{}", "─── End of History ───".dimmed());
474                        println!();
475
476                        // Try to restore from history_snapshot (new format with full context)
477                        let restored_from_snapshot = if let Some(history_json) =
478                            &record.history_snapshot
479                        {
480                            match ConversationHistory::from_json(history_json) {
481                                Ok(restored) => {
482                                    conversation_history = restored;
483                                    // Rebuild raw_chat_history from restored conversation_history
484                                    raw_chat_history = conversation_history.to_messages();
485                                    println!(
486                                            "{}",
487                                            "  ✓ Restored full conversation context (including compacted history)".green()
488                                        );
489                                    true
490                                }
491                                Err(e) => {
492                                    eprintln!(
493                                        "{}",
494                                        format!(
495                                            "  Warning: Failed to restore history snapshot: {}",
496                                            e
497                                        )
498                                        .yellow()
499                                    );
500                                    false
501                                }
502                            }
503                        } else {
504                            false
505                        };
506
507                        // Fallback: Load from messages (old format or if snapshot failed)
508                        if !restored_from_snapshot {
509                            // Load messages into raw_chat_history for AI context
510                            for msg in &record.messages {
511                                match msg.role {
512                                    persistence::MessageRole::User => {
513                                        raw_chat_history.push(rig::completion::Message::User {
514                                            content: rig::one_or_many::OneOrMany::one(
515                                                rig::completion::message::UserContent::text(
516                                                    &msg.content,
517                                                ),
518                                            ),
519                                        });
520                                    }
521                                    persistence::MessageRole::Assistant => {
522                                        raw_chat_history
523                                            .push(rig::completion::Message::Assistant {
524                                            id: Some(msg.id.clone()),
525                                            content: rig::one_or_many::OneOrMany::one(
526                                                rig::completion::message::AssistantContent::text(
527                                                    &msg.content,
528                                                ),
529                                            ),
530                                        });
531                                    }
532                                    persistence::MessageRole::System => {}
533                                }
534                            }
535
536                            // Load into conversation_history with tool calls from message records
537                            for msg in &record.messages {
538                                if msg.role == persistence::MessageRole::User {
539                                    // Find the next assistant message
540                                    let (response, tool_calls) = record
541                                        .messages
542                                        .iter()
543                                        .skip_while(|m| m.id != msg.id)
544                                        .skip(1)
545                                        .find(|m| m.role == persistence::MessageRole::Assistant)
546                                        .map(|m| {
547                                            let tcs = m.tool_calls.as_ref().map(|calls| {
548                                                calls
549                                                    .iter()
550                                                    .map(|tc| history::ToolCallRecord {
551                                                        tool_name: tc.name.clone(),
552                                                        args_summary: tc.args_summary.clone(),
553                                                        result_summary: tc.result_summary.clone(),
554                                                        tool_id: None,
555                                                        droppable: false,
556                                                    })
557                                                    .collect::<Vec<_>>()
558                                            });
559                                            (m.content.clone(), tcs.unwrap_or_default())
560                                        })
561                                        .unwrap_or_default();
562
563                                    conversation_history.add_turn(
564                                        msg.content.clone(),
565                                        response,
566                                        tool_calls,
567                                    );
568                                }
569                            }
570                            println!(
571                                "{}",
572                                format!(
573                                    "  ✓ Loaded {} messages (legacy format).",
574                                    record.messages.len()
575                                )
576                                .green()
577                            );
578                        }
579                        println!();
580                    }
581                    continue;
582                }
583                Ok(false) => break, // /exit
584                Err(e) => {
585                    eprintln!("{}", format!("Error: {}", e).red());
586                    continue;
587                }
588            }
589        }
590
591        // Check API key before making request (in case provider changed)
592        if !ChatSession::has_api_key(session.provider) {
593            eprintln!(
594                "{}",
595                "No API key configured. Use /provider to set one.".yellow()
596            );
597            continue;
598        }
599
600        // Check if compaction is needed before making the request
601        if conversation_history.needs_compaction() {
602            println!("{}", "  📦 Compacting conversation history...".dimmed());
603            if let Some(summary) = conversation_history.compact() {
604                println!(
605                    "{}",
606                    format!("  ✓ Compressed {} turns", summary.matches("Turn").count()).dimmed()
607                );
608            }
609        }
610
611        // Pre-request check: estimate if we're approaching context limit
612        // Check raw_chat_history (actual messages) not conversation_history
613        // because conversation_history may be out of sync
614        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
615            + input.len() / 4  // New input
616            + 5000; // System prompt overhead estimate
617
618        if estimated_input_tokens > 150_000 {
619            println!(
620                "{}",
621                "  ⚠ Large context detected. Pre-truncating...".yellow()
622            );
623
624            let old_count = raw_chat_history.len();
625            // Keep last 20 messages when approaching limit
626            if raw_chat_history.len() > 20 {
627                let drain_count = raw_chat_history.len() - 20;
628                raw_chat_history.drain(0..drain_count);
629                // Ensure history starts with User message for OpenAI Responses API compatibility
630                ensure_history_starts_with_user(&mut raw_chat_history);
631                // Preserve compacted summary while clearing turns to stay in sync
632                conversation_history.clear_turns_preserve_context();
633                println!(
634                    "{}",
635                    format!(
636                        "  ✓ Truncated {} → {} messages",
637                        old_count,
638                        raw_chat_history.len()
639                    )
640                    .dimmed()
641                );
642            }
643        }
644
645        // Retry loop for automatic error recovery
646        // MAX_RETRIES is for failures without progress
647        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
648        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
649        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
650        const MAX_RETRIES: u32 = 3;
651        const MAX_CONTINUATIONS: u32 = 10;
652        const _TOOL_CALL_CHECKPOINT: usize = 50;
653        const MAX_TOOL_CALLS: usize = 300;
654        let mut retry_attempt = 0;
655        let mut continuation_count = 0;
656        let mut total_tool_calls: usize = 0;
657        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
658        let mut current_input = input.clone();
659        let mut succeeded = false;
660
661        // Emit AG-UI step event for processing
662        if let Some(ref bridge) = event_bridge {
663            bridge.start_step("processing").await;
664        }
665
666        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
667            // Log if this is a continuation attempt
668            if continuation_count > 0 {
669                eprintln!("{}", "  📡 Sending continuation request...".dimmed());
670            }
671
672            // Create hook for Claude Code style tool display
673            let hook = ToolDisplayHook::new();
674
675            // Create progress indicator for visual feedback during generation
676            let progress = ui::GenerationIndicator::new();
677            // Layout connection disabled - using inline progress mode
678            // progress.state().set_layout(layout_state.clone());
679            hook.set_progress_state(progress.state()).await;
680
681            // Connect AG-UI EventBridge if provided (for streaming tool events to frontends)
682            if let Some(ref bridge) = event_bridge {
683                hook.set_event_bridge(bridge.clone()).await;
684            }
685
686            let project_path_buf = session.project_path.clone();
687            // Select prompt based on query type (analysis vs generation) and plan mode
688            let preamble = get_system_prompt(
689                &session.project_path,
690                Some(&current_input),
691                session.plan_mode,
692            );
693            let is_planning = session.plan_mode.is_planning();
694            // Inherit generation mode for short follow-up messages ("sure", "yes", "go ahead",
695            // etc.) so the write/shell tool set is not lost between turns.
696            let is_generation = prompts::is_generation_query(&current_input)
697                || (!is_planning && session.last_was_generation && current_input.trim().len() < 60);
698
699            // Note: using raw_chat_history directly which preserves Reasoning blocks
700            // This is needed for extended thinking to work with multi-turn conversations
701
702            // Get progress state for interrupt detection
703            let progress_state = progress.state();
704
705            // Use tokio::select! to race the API call against Ctrl+C
706            // This allows immediate cancellation, not just between tool calls
707            let mut user_interrupted = false;
708
709            // Emit AG-UI thinking event before LLM call
710            if let Some(ref bridge) = event_bridge {
711                bridge.start_thinking(Some("Generating response")).await;
712            }
713
714            // API call with Ctrl+C interrupt support
715            let response = tokio::select! {
716                biased; // Check ctrl_c first for faster response
717
718                _ = tokio::signal::ctrl_c() => {
719                    user_interrupted = true;
720                    Err::<String, String>("User cancelled".to_string())
721                }
722
723                result = async {
724                    match session.provider {
725                ProviderType::OpenAI => {
726                    // Use Responses API (default) for reasoning model support.
727                    // rig-core 0.28+ handles Reasoning items properly in multi-turn.
728                    let client = openai::Client::from_env();
729
730                    let mut builder = client
731                        .agent(&session.model)
732                        .preamble(&preamble)
733                        .max_tokens(4096)
734                        .tool(AnalyzeTool::new(project_path_buf.clone()))
735                        .tool(SecurityScanTool::new(project_path_buf.clone()))
736                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
737                        .tool(HadolintTool::new(project_path_buf.clone()))
738                        .tool(DclintTool::new(project_path_buf.clone()))
739                        .tool(KubelintTool::new(project_path_buf.clone()))
740                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
741                        .tool(K8sCostsTool::new(project_path_buf.clone()))
742                        .tool(K8sDriftTool::new(project_path_buf.clone()))
743                        .tool(HelmlintTool::new(project_path_buf.clone()))
744                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
745                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
746                        .tool(TerraformInstallTool::new())
747                        .tool(ReadFileTool::new(project_path_buf.clone()))
748                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
749                        .tool(WebFetchTool::new())
750                        // Prometheus discovery and connection tools for live K8s analysis
751                        .tool(PrometheusDiscoverTool::new())
752                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
753                        // RAG retrieval tools for compressed tool outputs
754                        .tool(RetrieveOutputTool::new())
755                        .tool(ListOutputsTool::new())
756                        // Platform tools for project management
757                        .tool(ListOrganizationsTool::new())
758                        .tool(ListProjectsTool::new())
759                        .tool(SelectProjectTool::new())
760                        .tool(CurrentContextTool::new())
761                        .tool(OpenProviderSettingsTool::new())
762                        .tool(CheckProviderConnectionTool::new())
763                        .tool(ListDeploymentCapabilitiesTool::new())
764                        .tool(ListHetznerAvailabilityTool::new())
765                        // Deployment tools for service management
766                        .tool(CreateDeploymentConfigTool::new())
767                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
768                        .tool(ListDeploymentConfigsTool::new())
769                        .tool(TriggerDeploymentTool::new())
770                        .tool(GetDeploymentStatusTool::new())
771                        .tool(ListDeploymentsTool::new())
772                        .tool(GetServiceLogsTool::new())
773                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
774
775                    // Add tools based on mode
776                    if is_planning {
777                        // Plan mode: read-only shell + plan creation tools
778                        builder = builder
779                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
780                            .tool(PlanCreateTool::new(project_path_buf.clone()))
781                            .tool(PlanListTool::new(project_path_buf.clone()));
782                    } else if is_generation {
783                        // Standard mode + generation query: all tools including file writes and plan execution
784                        let (mut write_file_tool, mut write_files_tool) =
785                            if let Some(ref client) = ide_client {
786                                (
787                                    WriteFileTool::new(project_path_buf.clone())
788                                        .with_ide_client(client.clone()),
789                                    WriteFilesTool::new(project_path_buf.clone())
790                                        .with_ide_client(client.clone()),
791                                )
792                            } else {
793                                (
794                                    WriteFileTool::new(project_path_buf.clone()),
795                                    WriteFilesTool::new(project_path_buf.clone()),
796                                )
797                            };
798                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
799                        if auto_accept_writes {
800                            write_file_tool = write_file_tool.without_confirmation();
801                            write_files_tool = write_files_tool.without_confirmation();
802                        }
803                        builder = builder
804                            .tool(write_file_tool)
805                            .tool(write_files_tool)
806                            .tool(ShellTool::new(project_path_buf.clone()))
807                            .tool(PlanListTool::new(project_path_buf.clone()))
808                            .tool(PlanNextTool::new(project_path_buf.clone()))
809                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
810                    }
811
812                    // Enable reasoning for OpenAI reasoning models (GPT-5.x, O1, O3, O4)
813                    let model_lower = session.model.to_lowercase();
814                    let is_reasoning_model = model_lower.starts_with("gpt-5")
815                        || model_lower.starts_with("gpt5")
816                        || model_lower.starts_with("o1")
817                        || model_lower.starts_with("o3")
818                        || model_lower.starts_with("o4");
819
820                    let agent = if is_reasoning_model {
821                        let reasoning_params = serde_json::json!({
822                            "reasoning": {
823                                "effort": "medium",
824                                "summary": "detailed"
825                            }
826                        });
827                        builder.additional_params(reasoning_params).build()
828                    } else {
829                        builder.build()
830                    };
831
832                    // Use multi_turn with Responses API
833                    agent
834                        .prompt(&current_input)
835                        .with_history(&mut raw_chat_history)
836                        .with_hook(hook.clone())
837                        .multi_turn(50)
838                        .await
839                }
840                ProviderType::Anthropic => {
841                    let client = anthropic::Client::from_env();
842
843                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
844                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
845                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
846                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
847                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
848
849                    let mut builder = client
850                        .agent(&session.model)
851                        .preamble(&preamble)
852                        .max_tokens(4096)
853                        .tool(AnalyzeTool::new(project_path_buf.clone()))
854                        .tool(SecurityScanTool::new(project_path_buf.clone()))
855                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
856                        .tool(HadolintTool::new(project_path_buf.clone()))
857                        .tool(DclintTool::new(project_path_buf.clone()))
858                        .tool(KubelintTool::new(project_path_buf.clone()))
859                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
860                        .tool(K8sCostsTool::new(project_path_buf.clone()))
861                        .tool(K8sDriftTool::new(project_path_buf.clone()))
862                        .tool(HelmlintTool::new(project_path_buf.clone()))
863                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
864                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
865                        .tool(TerraformInstallTool::new())
866                        .tool(ReadFileTool::new(project_path_buf.clone()))
867                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
868                        .tool(WebFetchTool::new())
869                        // Prometheus discovery and connection tools for live K8s analysis
870                        .tool(PrometheusDiscoverTool::new())
871                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
872                        // RAG retrieval tools for compressed tool outputs
873                        .tool(RetrieveOutputTool::new())
874                        .tool(ListOutputsTool::new())
875                        // Platform tools for project management
876                        .tool(ListOrganizationsTool::new())
877                        .tool(ListProjectsTool::new())
878                        .tool(SelectProjectTool::new())
879                        .tool(CurrentContextTool::new())
880                        .tool(OpenProviderSettingsTool::new())
881                        .tool(CheckProviderConnectionTool::new())
882                        .tool(ListDeploymentCapabilitiesTool::new())
883                        .tool(ListHetznerAvailabilityTool::new())
884                        // Deployment tools for service management
885                        .tool(CreateDeploymentConfigTool::new())
886                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
887                        .tool(ListDeploymentConfigsTool::new())
888                        .tool(TriggerDeploymentTool::new())
889                        .tool(GetDeploymentStatusTool::new())
890                        .tool(ListDeploymentsTool::new())
891                        .tool(GetServiceLogsTool::new())
892                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
893
894                    // Add tools based on mode
895                    if is_planning {
896                        // Plan mode: read-only shell + plan creation tools
897                        builder = builder
898                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
899                            .tool(PlanCreateTool::new(project_path_buf.clone()))
900                            .tool(PlanListTool::new(project_path_buf.clone()));
901                    } else if is_generation {
902                        // Standard mode + generation query: all tools including file writes and plan execution
903                        let (mut write_file_tool, mut write_files_tool) =
904                            if let Some(ref client) = ide_client {
905                                (
906                                    WriteFileTool::new(project_path_buf.clone())
907                                        .with_ide_client(client.clone()),
908                                    WriteFilesTool::new(project_path_buf.clone())
909                                        .with_ide_client(client.clone()),
910                                )
911                            } else {
912                                (
913                                    WriteFileTool::new(project_path_buf.clone()),
914                                    WriteFilesTool::new(project_path_buf.clone()),
915                                )
916                            };
917                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
918                        if auto_accept_writes {
919                            write_file_tool = write_file_tool.without_confirmation();
920                            write_files_tool = write_files_tool.without_confirmation();
921                        }
922                        builder = builder
923                            .tool(write_file_tool)
924                            .tool(write_files_tool)
925                            .tool(ShellTool::new(project_path_buf.clone()))
926                            .tool(PlanListTool::new(project_path_buf.clone()))
927                            .tool(PlanNextTool::new(project_path_buf.clone()))
928                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
929                    }
930
931                    let agent = builder.build();
932
933                    // Allow up to 50 tool call turns for complex generation tasks
934                    // Use hook to display tool calls as they happen
935                    // Pass conversation history for context continuity
936                    agent
937                        .prompt(&current_input)
938                        .with_history(&mut raw_chat_history)
939                        .with_hook(hook.clone())
940                        .multi_turn(50)
941                        .await
942                }
943                ProviderType::Bedrock => {
944                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
945                    let client = crate::bedrock::client::Client::from_env();
946
947                    // Extended thinking for Claude models via Bedrock
948                    // This enables Claude to show its reasoning process before responding.
949                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
950                    // Extended thinking budget - reduced to help with rate limits
951                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
952                    let thinking_params = serde_json::json!({
953                        "thinking": {
954                            "type": "enabled",
955                            "budget_tokens": 8000
956                        }
957                    });
958
959                    let mut builder = client
960                        .agent(&session.model)
961                        .preamble(&preamble)
962                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
963                        .tool(AnalyzeTool::new(project_path_buf.clone()))
964                        .tool(SecurityScanTool::new(project_path_buf.clone()))
965                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
966                        .tool(HadolintTool::new(project_path_buf.clone()))
967                        .tool(DclintTool::new(project_path_buf.clone()))
968                        .tool(KubelintTool::new(project_path_buf.clone()))
969                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
970                        .tool(K8sCostsTool::new(project_path_buf.clone()))
971                        .tool(K8sDriftTool::new(project_path_buf.clone()))
972                        .tool(HelmlintTool::new(project_path_buf.clone()))
973                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
974                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
975                        .tool(TerraformInstallTool::new())
976                        .tool(ReadFileTool::new(project_path_buf.clone()))
977                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
978                        .tool(WebFetchTool::new())
979                        // Prometheus discovery and connection tools for live K8s analysis
980                        .tool(PrometheusDiscoverTool::new())
981                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
982                        // RAG retrieval tools for compressed tool outputs
983                        .tool(RetrieveOutputTool::new())
984                        .tool(ListOutputsTool::new())
985                        // Platform tools for project management
986                        .tool(ListOrganizationsTool::new())
987                        .tool(ListProjectsTool::new())
988                        .tool(SelectProjectTool::new())
989                        .tool(CurrentContextTool::new())
990                        .tool(OpenProviderSettingsTool::new())
991                        .tool(CheckProviderConnectionTool::new())
992                        .tool(ListDeploymentCapabilitiesTool::new())
993                        .tool(ListHetznerAvailabilityTool::new())
994                        // Deployment tools for service management
995                        .tool(CreateDeploymentConfigTool::new())
996                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
997                        .tool(ListDeploymentConfigsTool::new())
998                        .tool(TriggerDeploymentTool::new())
999                        .tool(GetDeploymentStatusTool::new())
1000                        .tool(ListDeploymentsTool::new())
1001                        .tool(GetServiceLogsTool::new())
1002                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
1003
1004                    // Add tools based on mode
1005                    if is_planning {
1006                        // Plan mode: read-only shell + plan creation tools
1007                        builder = builder
1008                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
1009                            .tool(PlanCreateTool::new(project_path_buf.clone()))
1010                            .tool(PlanListTool::new(project_path_buf.clone()));
1011                    } else if is_generation {
1012                        // Standard mode + generation query: all tools including file writes and plan execution
1013                        let (mut write_file_tool, mut write_files_tool) =
1014                            if let Some(ref client) = ide_client {
1015                                (
1016                                    WriteFileTool::new(project_path_buf.clone())
1017                                        .with_ide_client(client.clone()),
1018                                    WriteFilesTool::new(project_path_buf.clone())
1019                                        .with_ide_client(client.clone()),
1020                                )
1021                            } else {
1022                                (
1023                                    WriteFileTool::new(project_path_buf.clone()),
1024                                    WriteFilesTool::new(project_path_buf.clone()),
1025                                )
1026                            };
1027                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
1028                        if auto_accept_writes {
1029                            write_file_tool = write_file_tool.without_confirmation();
1030                            write_files_tool = write_files_tool.without_confirmation();
1031                        }
1032                        builder = builder
1033                            .tool(write_file_tool)
1034                            .tool(write_files_tool)
1035                            .tool(ShellTool::new(project_path_buf.clone()))
1036                            .tool(PlanListTool::new(project_path_buf.clone()))
1037                            .tool(PlanNextTool::new(project_path_buf.clone()))
1038                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
1039                    }
1040
1041                    // Add thinking params for extended reasoning
1042                    builder = builder.additional_params(thinking_params);
1043
1044                    let agent = builder.build();
1045
1046                    // Use same multi-turn pattern as OpenAI/Anthropic
1047                    agent
1048                        .prompt(&current_input)
1049                        .with_history(&mut raw_chat_history)
1050                        .with_hook(hook.clone())
1051                        .multi_turn(50)
1052                        .await
1053                    }
1054                }.map_err(|e| e.to_string())
1055            } => result
1056            };
1057
1058            // Stop the progress indicator before handling the response
1059            progress.stop().await;
1060
1061            // End AG-UI thinking event
1062            if let Some(ref bridge) = event_bridge {
1063                bridge.end_thinking().await;
1064            }
1065
1066            // Suppress unused variable warnings
1067            let _ = (&progress_state, user_interrupted);
1068
1069            match response {
1070                Ok(text) => {
1071                    // Emit AG-UI text message event (for connected frontends)
1072                    if let Some(ref bridge) = event_bridge {
1073                        bridge.emit_message(&text).await;
1074                    }
1075
1076                    // Show final response
1077                    println!();
1078                    ResponseFormatter::print_response(&text);
1079
1080                    // Track token usage - use actual from hook if available, else estimate
1081                    let hook_usage = hook.get_usage().await;
1082                    if hook_usage.has_data() {
1083                        // Use actual token counts from API response
1084                        session
1085                            .token_usage
1086                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
1087                    } else {
1088                        // Fall back to estimation when API doesn't provide usage
1089                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
1090                        let completion_tokens = TokenUsage::estimate_tokens(&text);
1091                        session
1092                            .token_usage
1093                            .add_estimated(prompt_tokens, completion_tokens);
1094                    }
1095                    // Reset hook usage for next request batch
1096                    hook.reset_usage().await;
1097
1098                    // Show context indicator like Forge: [model/~tokens]
1099                    let model_short = session
1100                        .model
1101                        .split('/')
1102                        .next_back()
1103                        .unwrap_or(&session.model)
1104                        .split(':')
1105                        .next()
1106                        .unwrap_or(&session.model);
1107                    println!();
1108                    println!(
1109                        "  {}[{}/{}]{}",
1110                        ui::colors::ansi::DIM,
1111                        model_short,
1112                        session.token_usage.format_compact(),
1113                        ui::colors::ansi::RESET
1114                    );
1115
1116                    // Emit AG-UI state update with new token counts
1117                    if let Some(ref bridge) = event_bridge {
1118                        let state = build_agent_state(&session, &conversation_history);
1119                        if let Ok(state_json) = serde_json::to_value(&state) {
1120                            bridge.emit_state_snapshot(state_json).await;
1121                        }
1122                    }
1123
1124                    // Extract tool calls from the hook state for history tracking
1125                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
1126                    let batch_tool_count = tool_calls.len();
1127                    total_tool_calls += batch_tool_count;
1128
1129                    // Show tool call summary if significant
1130                    if batch_tool_count > 10 {
1131                        println!(
1132                            "{}",
1133                            format!(
1134                                "  ✓ Completed with {} tool calls ({} total this session)",
1135                                batch_tool_count, total_tool_calls
1136                            )
1137                            .dimmed()
1138                        );
1139                    }
1140
1141                    // Add to conversation history with tool call records
1142                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
1143
1144                    // Remember whether this turn had generation tools active so short follow-up
1145                    // messages ("sure", "go ahead", etc.) don't lose write/shell access.
1146                    session.last_was_generation = is_generation;
1147
1148                    // Check if this heavy turn requires immediate compaction
1149                    // This helps prevent context overflow in subsequent requests
1150                    if conversation_history.needs_compaction() {
1151                        println!("{}", "  📦 Compacting conversation history...".dimmed());
1152                        if let Some(summary) = conversation_history.compact() {
1153                            println!(
1154                                "{}",
1155                                format!("  ✓ Compressed {} turns", summary.matches("Turn").count())
1156                                    .dimmed()
1157                            );
1158                        }
1159                    }
1160
1161                    // Simplify history for OpenAI Responses API reasoning models
1162                    // Keep only User text and Assistant text - strip reasoning, tool calls, tool results
1163                    // This prevents pairing errors like "rs_... without its required following item"
1164                    // and "fc_... without its required reasoning item"
1165                    if session.provider == ProviderType::OpenAI {
1166                        simplify_history_for_openai_reasoning(&mut raw_chat_history);
1167                    }
1168
1169                    // Also update legacy session history for compatibility
1170                    session.history.push(("user".to_string(), input.clone()));
1171                    session
1172                        .history
1173                        .push(("assistant".to_string(), text.clone()));
1174
1175                    // Record to persistent session storage (includes full history snapshot)
1176                    session_recorder.record_user_message(&input);
1177                    session_recorder.record_assistant_message(&text, Some(&tool_calls));
1178                    if let Err(e) = session_recorder.save_with_history(&conversation_history) {
1179                        eprintln!(
1180                            "{}",
1181                            format!("  Warning: Failed to save session: {}", e).dimmed()
1182                        );
1183                    }
1184
1185                    // Check if plan_create was called - show interactive menu
1186                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
1187                        println!(); // Space before menu
1188
1189                        // Show the plan action menu (don't switch modes yet - let user choose)
1190                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
1191                            ui::PlanActionResult::ExecuteAutoAccept => {
1192                                // Now switch to standard mode for execution
1193                                if session.plan_mode.is_planning() {
1194                                    session.plan_mode = session.plan_mode.toggle();
1195                                }
1196                                auto_accept_writes = true;
1197                                pending_input = Some(format!(
1198                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
1199                                    plan_info.0
1200                                ));
1201                                succeeded = true;
1202                            }
1203                            ui::PlanActionResult::ExecuteWithReview => {
1204                                // Now switch to standard mode for execution
1205                                if session.plan_mode.is_planning() {
1206                                    session.plan_mode = session.plan_mode.toggle();
1207                                }
1208                                pending_input = Some(format!(
1209                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
1210                                    plan_info.0
1211                                ));
1212                                succeeded = true;
1213                            }
1214                            ui::PlanActionResult::ChangePlan(feedback) => {
1215                                // Stay in plan mode for modifications
1216                                pending_input = Some(format!(
1217                                    "Please modify the plan at '{}'. User feedback: {}",
1218                                    plan_info.0, feedback
1219                                ));
1220                                succeeded = true;
1221                            }
1222                            ui::PlanActionResult::Cancel => {
1223                                // Just complete normally, don't execute
1224                                succeeded = true;
1225                            }
1226                        }
1227                    } else {
1228                        succeeded = true;
1229                    }
1230                }
1231                Err(e) => {
1232                    let err_str = e.to_string();
1233
1234                    println!();
1235
1236                    // Check if this was a user-initiated cancellation (Ctrl+C)
1237                    if err_str.contains("cancelled") || err_str.contains("Cancelled") {
1238                        // Extract any completed work before cancellation
1239                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1240                        let tool_count = completed_tools.len();
1241
1242                        eprintln!("{}", "⚠ Generation interrupted.".yellow());
1243                        if tool_count > 0 {
1244                            eprintln!(
1245                                "{}",
1246                                format!("  {} tool calls completed before interrupt.", tool_count)
1247                                    .dimmed()
1248                            );
1249                            // Add partial progress to history
1250                            conversation_history.add_turn(
1251                                current_input.clone(),
1252                                format!("[Interrupted after {} tool calls]", tool_count),
1253                                completed_tools,
1254                            );
1255                        }
1256                        eprintln!("{}", "  Type your next message to continue.".dimmed());
1257
1258                        // Don't retry, don't mark as succeeded - just break to return to prompt
1259                        break;
1260                    }
1261
1262                    // Check if this is a max depth error - handle as checkpoint
1263                    if err_str.contains("MaxDepth")
1264                        || err_str.contains("max_depth")
1265                        || err_str.contains("reached limit")
1266                    {
1267                        // Extract what was done before hitting the limit
1268                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1269                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1270                        let batch_tool_count = completed_tools.len();
1271                        total_tool_calls += batch_tool_count;
1272
1273                        eprintln!("{}", format!(
1274                            "⚠ Reached {} tool calls this batch ({} total). Maximum allowed: {}",
1275                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
1276                        ).yellow());
1277
1278                        // Check if we've hit the absolute maximum
1279                        if total_tool_calls >= MAX_TOOL_CALLS {
1280                            eprintln!(
1281                                "{}",
1282                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
1283                                    .red()
1284                            );
1285                            eprintln!(
1286                                "{}",
1287                                "The task is too complex. Try breaking it into smaller parts."
1288                                    .dimmed()
1289                            );
1290                            break;
1291                        }
1292
1293                        // Ask user if they want to continue (unless auto-continue is enabled)
1294                        let should_continue = if auto_continue_tools {
1295                            eprintln!(
1296                                "{}",
1297                                "  Auto-continuing (you selected 'always')...".dimmed()
1298                            );
1299                            true
1300                        } else {
1301                            eprintln!(
1302                                "{}",
1303                                "Excessive tool calls used. Want to continue?".yellow()
1304                            );
1305                            eprintln!(
1306                                "{}",
1307                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
1308                            );
1309                            print!("  > ");
1310                            let _ = std::io::Write::flush(&mut std::io::stdout());
1311
1312                            // Read user input
1313                            let mut response = String::new();
1314                            match std::io::stdin().read_line(&mut response) {
1315                                Ok(_) => {
1316                                    let resp = response.trim().to_lowercase();
1317                                    if resp == "a" || resp == "always" {
1318                                        auto_continue_tools = true;
1319                                        true
1320                                    } else {
1321                                        resp == "y" || resp == "yes" || resp.is_empty()
1322                                    }
1323                                }
1324                                Err(_) => false,
1325                            }
1326                        };
1327
1328                        if !should_continue {
1329                            eprintln!(
1330                                "{}",
1331                                "Stopped by user. Type 'continue' to resume later.".dimmed()
1332                            );
1333                            // Add partial progress to history
1334                            if !completed_tools.is_empty() {
1335                                conversation_history.add_turn(
1336                                    current_input.clone(),
1337                                    format!(
1338                                        "[Stopped at checkpoint - {} tools completed]",
1339                                        batch_tool_count
1340                                    ),
1341                                    vec![],
1342                                );
1343                            }
1344                            break;
1345                        }
1346
1347                        // Continue from checkpoint
1348                        eprintln!(
1349                            "{}",
1350                            format!(
1351                                "  → Continuing... {} remaining tool calls available",
1352                                MAX_TOOL_CALLS - total_tool_calls
1353                            )
1354                            .dimmed()
1355                        );
1356
1357                        // Add partial progress to history (without duplicating tool calls)
1358                        conversation_history.add_turn(
1359                            current_input.clone(),
1360                            format!(
1361                                "[Checkpoint - {} tools completed, continuing...]",
1362                                batch_tool_count
1363                            ),
1364                            vec![],
1365                        );
1366
1367                        // Build continuation prompt
1368                        current_input =
1369                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
1370
1371                        // Brief delay before continuation
1372                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1373                        continue; // Continue the loop without incrementing retry_attempt
1374                    } else if err_str.contains("rate")
1375                        || err_str.contains("Rate")
1376                        || err_str.contains("429")
1377                        || err_str.contains("Too many tokens")
1378                        || err_str.contains("please wait")
1379                        || err_str.contains("throttl")
1380                        || err_str.contains("Throttl")
1381                    {
1382                        eprintln!("{}", "⚠ Rate limited by API provider.".yellow());
1383                        // Wait before retry for rate limits (longer wait for "too many tokens")
1384                        retry_attempt += 1;
1385                        let wait_secs = if err_str.contains("Too many tokens") {
1386                            30
1387                        } else {
1388                            5
1389                        };
1390                        eprintln!(
1391                            "{}",
1392                            format!(
1393                                "  Waiting {} seconds before retry ({}/{})...",
1394                                wait_secs, retry_attempt, MAX_RETRIES
1395                            )
1396                            .dimmed()
1397                        );
1398                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
1399                    } else if is_input_too_long_error(&err_str) {
1400                        // Context too large - truncate raw_chat_history directly
1401                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
1402                        // because conversation_history may be empty/stale during errors
1403                        eprintln!(
1404                            "{}",
1405                            "⚠ Context too large for model. Truncating history...".yellow()
1406                        );
1407
1408                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
1409                        let old_msg_count = raw_chat_history.len();
1410
1411                        // Strategy 1: Keep only the last N messages (user/assistant pairs)
1412                        // More aggressive truncation on each retry: 10 → 6 → 4 messages
1413                        let keep_count = match retry_attempt {
1414                            0 => 10,
1415                            1 => 6,
1416                            _ => 4,
1417                        };
1418
1419                        if raw_chat_history.len() > keep_count {
1420                            // Drain older messages, keep the most recent ones
1421                            let drain_count = raw_chat_history.len() - keep_count;
1422                            raw_chat_history.drain(0..drain_count);
1423                            // Ensure history starts with User message for OpenAI Responses API compatibility
1424                            ensure_history_starts_with_user(&mut raw_chat_history);
1425                        }
1426
1427                        // Strategy 2: Compact large tool outputs to temp files + summaries
1428                        // This preserves data (agent can read file if needed) while reducing context
1429                        let max_output_chars = match retry_attempt {
1430                            0 => 50_000, // 50KB on first try
1431                            1 => 20_000, // 20KB on second
1432                            _ => 5_000,  // 5KB on third (aggressive)
1433                        };
1434                        compact_large_tool_outputs(&mut raw_chat_history, max_output_chars);
1435
1436                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
1437                        eprintln!("{}", format!(
1438                            "  ✓ Truncated: {} messages (~{} tokens) → {} messages (~{} tokens)",
1439                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
1440                        ).green());
1441
1442                        // Preserve compacted summary while clearing turns to stay in sync
1443                        conversation_history.clear_turns_preserve_context();
1444
1445                        // Retry with truncated context
1446                        retry_attempt += 1;
1447                        if retry_attempt < MAX_RETRIES {
1448                            eprintln!(
1449                                "{}",
1450                                format!(
1451                                    "  → Retrying with truncated context ({}/{})...",
1452                                    retry_attempt, MAX_RETRIES
1453                                )
1454                                .dimmed()
1455                            );
1456                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1457                        } else {
1458                            eprintln!(
1459                                "{}",
1460                                "Context still too large after truncation. Try /clear to reset."
1461                                    .red()
1462                            );
1463                            break;
1464                        }
1465                    } else if is_truncation_error(&err_str) {
1466                        // Truncation error - try intelligent continuation
1467                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1468                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1469
1470                        // Count actually completed tools (not in-progress)
1471                        let completed_count = completed_tools
1472                            .iter()
1473                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
1474                            .count();
1475                        let in_progress_count = completed_tools.len() - completed_count;
1476
1477                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
1478                            // We have partial progress - continue from where we left off
1479                            continuation_count += 1;
1480                            let status_msg = if in_progress_count > 0 {
1481                                format!(
1482                                    "⚠ Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
1483                                    completed_count,
1484                                    in_progress_count,
1485                                    continuation_count,
1486                                    MAX_CONTINUATIONS
1487                                )
1488                            } else {
1489                                format!(
1490                                    "⚠ Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
1491                                    completed_count, continuation_count, MAX_CONTINUATIONS
1492                                )
1493                            };
1494                            eprintln!("{}", status_msg.yellow());
1495
1496                            // Add partial progress to conversation history
1497                            // NOTE: We intentionally pass empty tool_calls here because the
1498                            // continuation prompt already contains the detailed file list.
1499                            // Including them in history would duplicate the context and waste tokens.
1500                            conversation_history.add_turn(
1501                                current_input.clone(),
1502                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
1503                                    completed_count, in_progress_count),
1504                                vec![]  // Don't duplicate - continuation prompt has the details
1505                            );
1506
1507                            // Check if we need compaction after adding this heavy turn
1508                            // This is important for long multi-turn sessions with many tool calls
1509                            if conversation_history.needs_compaction() {
1510                                eprintln!(
1511                                    "{}",
1512                                    "  📦 Compacting history before continuation...".dimmed()
1513                                );
1514                                if let Some(summary) = conversation_history.compact() {
1515                                    eprintln!(
1516                                        "{}",
1517                                        format!(
1518                                            "  ✓ Compressed {} turns",
1519                                            summary.matches("Turn").count()
1520                                        )
1521                                        .dimmed()
1522                                    );
1523                                }
1524                            }
1525
1526                            // Build continuation prompt with context
1527                            current_input = build_continuation_prompt(
1528                                &input,
1529                                &completed_tools,
1530                                &agent_thinking,
1531                            );
1532
1533                            // Log continuation details for debugging
1534                            eprintln!("{}", format!(
1535                                "  → Continuing with {} files read, {} written, {} other actions tracked",
1536                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
1537                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
1538                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
1539                            ).dimmed());
1540
1541                            // Brief delay before continuation
1542                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1543                            // Don't increment retry_attempt - this is progress via continuation
1544                        } else if retry_attempt < MAX_RETRIES {
1545                            // No tool calls completed - simple retry
1546                            retry_attempt += 1;
1547                            eprintln!(
1548                                "{}",
1549                                format!(
1550                                    "⚠ Response error (attempt {}/{}). Retrying...",
1551                                    retry_attempt, MAX_RETRIES
1552                                )
1553                                .yellow()
1554                            );
1555                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1556                        } else {
1557                            // Max retries/continuations reached
1558                            eprintln!("{}", format!("Error: {}", e).red());
1559                            if continuation_count >= MAX_CONTINUATIONS {
1560                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
1561                            } else {
1562                                eprintln!(
1563                                    "{}",
1564                                    "Max retries reached. The response may be too complex."
1565                                        .dimmed()
1566                                );
1567                            }
1568                            eprintln!(
1569                                "{}",
1570                                "Try breaking your request into smaller parts.".dimmed()
1571                            );
1572                            exit_error = Some(e.to_string());
1573                            break;
1574                        }
1575                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1576                        // Timeout - simple retry
1577                        retry_attempt += 1;
1578                        if retry_attempt < MAX_RETRIES {
1579                            eprintln!(
1580                                "{}",
1581                                format!(
1582                                    "⚠ Request timed out (attempt {}/{}). Retrying...",
1583                                    retry_attempt, MAX_RETRIES
1584                                )
1585                                .yellow()
1586                            );
1587                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1588                        } else {
1589                            eprintln!("{}", "Request timed out. Please try again.".red());
1590                            exit_error = Some("Request timed out".to_string());
1591                            break;
1592                        }
1593                    } else {
1594                        // Unknown error - show details and break
1595                        eprintln!("{}", format!("Error: {}", e).red());
1596                        if continuation_count > 0 {
1597                            eprintln!(
1598                                "{}",
1599                                format!(
1600                                    "  (occurred during continuation attempt {})",
1601                                    continuation_count
1602                                )
1603                                .dimmed()
1604                            );
1605                        }
1606                        eprintln!("{}", "Error details for debugging:".dimmed());
1607                        eprintln!(
1608                            "{}",
1609                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1610                                .dimmed()
1611                        );
1612                        eprintln!(
1613                            "{}",
1614                            format!(
1615                                "  - continuation_count: {}/{}",
1616                                continuation_count, MAX_CONTINUATIONS
1617                            )
1618                            .dimmed()
1619                        );
1620                        exit_error = Some(e.to_string());
1621                        break;
1622                    }
1623                }
1624            }
1625        }
1626
1627        // End AG-UI step event for this turn
1628        if let Some(ref bridge) = event_bridge {
1629            bridge.end_step().await;
1630        }
1631
1632        println!();
1633    }
1634
1635    // Emit AG-UI run completion event for connected frontends
1636    if let Some(ref bridge) = event_bridge {
1637        if let Some(error_msg) = exit_error {
1638            bridge.finish_run_with_error(&error_msg).await;
1639        } else {
1640            bridge.finish_run().await;
1641        }
1642    }
1643
1644    // Clean up terminal layout before exiting (disabled - layout not initialized)
1645    // if let Err(e) = terminal_layout.cleanup() {
1646    //     eprintln!(
1647    //         "{}",
1648    //         format!("Warning: Terminal cleanup failed: {}", e).dimmed()
1649    //     );
1650    // }
1651
1652    Ok(())
1653}
1654
1655// NOTE: wait_for_interrupt function removed - ESC interrupt feature disabled
1656// due to terminal corruption issues with spawn_blocking raw mode handling.
1657// TODO: Re-implement using tool hook callbacks for cleaner interruption.
1658
1659/// Extract tool call records from the hook state for history tracking
1660async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1661    let state = hook.state();
1662    let guard = state.lock().await;
1663
1664    guard
1665        .tool_calls
1666        .iter()
1667        .enumerate()
1668        .map(|(i, tc)| {
1669            let result = if tc.is_running {
1670                // Tool was in progress when error occurred
1671                "[IN PROGRESS - may need to be re-run]".to_string()
1672            } else if let Some(output) = &tc.output {
1673                truncate_string(output, 200)
1674            } else {
1675                "completed".to_string()
1676            };
1677
1678            ToolCallRecord {
1679                tool_name: tc.name.clone(),
1680                args_summary: truncate_string(&tc.args, 100),
1681                result_summary: result,
1682                // Generate a unique tool ID for proper message pairing
1683                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1684                // Mark read-only tools as droppable (their results can be re-fetched)
1685                droppable: matches!(
1686                    tc.name.as_str(),
1687                    "read_file" | "list_directory" | "analyze_project"
1688                ),
1689            }
1690        })
1691        .collect()
1692}
1693
1694/// Extract any agent thinking/messages from the hook for context
1695async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1696    let state = hook.state();
1697    let guard = state.lock().await;
1698    guard.agent_messages.clone()
1699}
1700
1701/// Helper to truncate strings for summaries
1702fn truncate_string(s: &str, max_len: usize) -> String {
1703    if s.len() <= max_len {
1704        s.to_string()
1705    } else {
1706        format!("{}...", &s[..max_len.saturating_sub(3)])
1707    }
1708}
1709
1710/// Compact large tool outputs by saving them to temp files and replacing with summaries.
1711/// This preserves all data (agent can read the file) while reducing context size.
1712fn compact_large_tool_outputs(messages: &mut [rig::completion::Message], max_chars: usize) {
1713    use rig::completion::message::{Text, ToolResultContent, UserContent};
1714    use std::fs;
1715
1716    // Create temp directory for compacted outputs
1717    let temp_dir = std::env::temp_dir().join("syncable-agent-outputs");
1718    let _ = fs::create_dir_all(&temp_dir);
1719
1720    for msg in messages.iter_mut() {
1721        if let rig::completion::Message::User { content } = msg {
1722            for item in content.iter_mut() {
1723                if let UserContent::ToolResult(tr) = item {
1724                    for trc in tr.content.iter_mut() {
1725                        if let ToolResultContent::Text(text) = trc
1726                            && text.text.len() > max_chars
1727                        {
1728                            // Save full output to temp file
1729                            let file_id = format!(
1730                                "{}_{}.txt",
1731                                tr.id,
1732                                std::time::SystemTime::now()
1733                                    .duration_since(std::time::UNIX_EPOCH)
1734                                    .unwrap()
1735                                    .as_millis()
1736                            );
1737                            let file_path = temp_dir.join(&file_id);
1738
1739                            if let Ok(()) = fs::write(&file_path, &text.text) {
1740                                // Create a smart summary
1741                                let summary = create_output_summary(
1742                                    &text.text,
1743                                    &file_path.display().to_string(),
1744                                    max_chars / 2, // Use half max for summary
1745                                );
1746
1747                                // Replace with summary
1748                                *trc = ToolResultContent::Text(Text { text: summary });
1749                            }
1750                        }
1751                    }
1752                }
1753            }
1754        }
1755    }
1756}
1757
1758/// Create a smart summary of a large output using incremental chunk processing.
1759/// Processes output in logical sections, summarizes each, then combines into actionable summary.
1760fn create_output_summary(full_output: &str, file_path: &str, max_summary_len: usize) -> String {
1761    let total_lines = full_output.lines().count();
1762    let total_chars = full_output.len();
1763
1764    let summary_content =
1765        if full_output.trim_start().starts_with('{') || full_output.trim_start().starts_with('[') {
1766            // JSON output - extract structured summary
1767            summarize_json_incrementally(full_output, max_summary_len)
1768        } else {
1769            // Text output - chunk and summarize
1770            summarize_text_incrementally(full_output, max_summary_len)
1771        };
1772
1773    format!(
1774        "[COMPACTED OUTPUT]\n\
1775        Full data: {}\n\
1776        Size: {} chars, {} lines\n\
1777        \n\
1778        {}\n\
1779        \n\
1780        [Read file with offset/limit for specific sections if needed]",
1781        file_path, total_chars, total_lines, summary_content
1782    )
1783}
1784
1785/// Incrementally summarize JSON output, extracting key fields and prioritizing important items.
1786fn summarize_json_incrementally(json_str: &str, max_len: usize) -> String {
1787    let Ok(json) = serde_json::from_str::<serde_json::Value>(json_str) else {
1788        return "Failed to parse JSON".to_string();
1789    };
1790
1791    let mut parts: Vec<String> = Vec::new();
1792    let mut current_len = 0;
1793
1794    match &json {
1795        serde_json::Value::Object(obj) => {
1796            // Priority 1: Summary/stats fields
1797            for key in ["summary", "stats", "metadata", "status"] {
1798                if let Some(v) = obj.get(key) {
1799                    let s = format!("{}:\n{}", key, indent_json(v, 2, 500));
1800                    if current_len + s.len() < max_len {
1801                        parts.push(s.clone());
1802                        current_len += s.len();
1803                    }
1804                }
1805            }
1806
1807            // Priority 2: Error/critical items (summarize each)
1808            for key in [
1809                "errors",
1810                "critical",
1811                "failures",
1812                "issues",
1813                "findings",
1814                "recommendations",
1815            ] {
1816                if let Some(serde_json::Value::Array(arr)) = obj.get(key) {
1817                    if arr.is_empty() {
1818                        continue;
1819                    }
1820                    parts.push(format!("\n{} ({} items):", key, arr.len()));
1821
1822                    // Group by severity/type if present
1823                    let mut by_severity: std::collections::HashMap<
1824                        String,
1825                        Vec<&serde_json::Value>,
1826                    > = std::collections::HashMap::new();
1827
1828                    for item in arr {
1829                        let severity = item
1830                            .get("severity")
1831                            .or_else(|| item.get("level"))
1832                            .or_else(|| item.get("type"))
1833                            .and_then(|v| v.as_str())
1834                            .unwrap_or("other")
1835                            .to_string();
1836                        by_severity.entry(severity).or_default().push(item);
1837                    }
1838
1839                    // Show critical/high first, summarize others
1840                    for sev in [
1841                        "critical", "high", "error", "warning", "medium", "low", "info", "other",
1842                    ] {
1843                        if let Some(items) = by_severity.get(sev) {
1844                            let show_count = match sev {
1845                                "critical" | "high" | "error" => 5.min(items.len()),
1846                                "warning" | "medium" => 3.min(items.len()),
1847                                _ => 2.min(items.len()),
1848                            };
1849
1850                            if !items.is_empty() {
1851                                let s =
1852                                    format!("  [{}] {} items:", sev.to_uppercase(), items.len());
1853                                if current_len + s.len() < max_len {
1854                                    parts.push(s.clone());
1855                                    current_len += s.len();
1856
1857                                    for item in items.iter().take(show_count) {
1858                                        let item_summary = summarize_single_item(item);
1859                                        if current_len + item_summary.len() < max_len {
1860                                            parts.push(format!("    • {}", item_summary));
1861                                            current_len += item_summary.len();
1862                                        }
1863                                    }
1864
1865                                    if items.len() > show_count {
1866                                        parts.push(format!(
1867                                            "    ... and {} more",
1868                                            items.len() - show_count
1869                                        ));
1870                                    }
1871                                }
1872                            }
1873                        }
1874                    }
1875                }
1876            }
1877
1878            // Priority 3: Show remaining top-level keys
1879            let shown_keys: std::collections::HashSet<&str> = [
1880                "summary",
1881                "stats",
1882                "metadata",
1883                "status",
1884                "errors",
1885                "critical",
1886                "failures",
1887                "issues",
1888                "findings",
1889                "recommendations",
1890            ]
1891            .iter()
1892            .cloned()
1893            .collect();
1894
1895            let other_keys: Vec<_> = obj
1896                .keys()
1897                .filter(|k| !shown_keys.contains(k.as_str()))
1898                .collect();
1899            if !other_keys.is_empty() && current_len < max_len - 200 {
1900                parts.push(format!("\nOther fields: {:?}", other_keys));
1901            }
1902        }
1903        serde_json::Value::Array(arr) => {
1904            parts.push(format!("Array with {} items", arr.len()));
1905
1906            // Try to group by type/severity
1907            for (i, item) in arr.iter().take(10).enumerate() {
1908                let s = format!("[{}] {}", i, summarize_single_item(item));
1909                if current_len + s.len() < max_len {
1910                    parts.push(s.clone());
1911                    current_len += s.len();
1912                }
1913            }
1914            if arr.len() > 10 {
1915                parts.push(format!("... and {} more items", arr.len() - 10));
1916            }
1917        }
1918        _ => {
1919            parts.push(truncate_json_value(&json, max_len));
1920        }
1921    }
1922
1923    parts.join("\n")
1924}
1925
1926/// Summarize a single JSON item (issue, error, etc.) into a one-liner.
1927fn summarize_single_item(item: &serde_json::Value) -> String {
1928    let mut parts: Vec<String> = Vec::new();
1929
1930    // Extract common fields
1931    for key in [
1932        "message",
1933        "description",
1934        "title",
1935        "name",
1936        "file",
1937        "path",
1938        "code",
1939        "rule",
1940    ] {
1941        if let Some(v) = item.get(key)
1942            && let Some(s) = v.as_str()
1943        {
1944            parts.push(truncate_string(s, 80));
1945            break; // Only take first descriptive field
1946        }
1947    }
1948
1949    // Add location if present
1950    if let Some(file) = item
1951        .get("file")
1952        .or_else(|| item.get("path"))
1953        .and_then(|v| v.as_str())
1954    {
1955        if let Some(line) = item.get("line").and_then(|v| v.as_u64()) {
1956            parts.push(format!("at {}:{}", file, line));
1957        } else {
1958            parts.push(format!("in {}", truncate_string(file, 40)));
1959        }
1960    }
1961
1962    if parts.is_empty() {
1963        truncate_json_value(item, 100)
1964    } else {
1965        parts.join(" ")
1966    }
1967}
1968
1969/// Indent JSON for display.
1970fn indent_json(v: &serde_json::Value, indent: usize, max_len: usize) -> String {
1971    let s = serde_json::to_string_pretty(v).unwrap_or_else(|_| v.to_string());
1972    let prefix = " ".repeat(indent);
1973    let indented: String = s
1974        .lines()
1975        .map(|l| format!("{}{}", prefix, l))
1976        .collect::<Vec<_>>()
1977        .join("\n");
1978    if indented.len() > max_len {
1979        format!("{}...", &indented[..max_len.saturating_sub(3)])
1980    } else {
1981        indented
1982    }
1983}
1984
1985/// Incrementally summarize text output by processing in chunks.
1986fn summarize_text_incrementally(text: &str, max_len: usize) -> String {
1987    let lines: Vec<&str> = text.lines().collect();
1988    let mut parts: Vec<String> = Vec::new();
1989    let mut current_len = 0;
1990
1991    // Look for section headers or key patterns
1992    let mut sections: Vec<(usize, &str)> = Vec::new();
1993    for (i, line) in lines.iter().enumerate() {
1994        // Detect headers (lines that look like titles)
1995        if line.starts_with('#')
1996            || line.starts_with("==")
1997            || line.starts_with("--")
1998            || (line.ends_with(':') && line.len() < 50)
1999            || line.chars().all(|c| c.is_uppercase() || c.is_whitespace())
2000        {
2001            sections.push((i, line));
2002        }
2003    }
2004
2005    if !sections.is_empty() {
2006        // Summarize by sections
2007        parts.push(format!("Found {} sections:", sections.len()));
2008        for (i, (line_num, header)) in sections.iter().enumerate() {
2009            let next_section = sections.get(i + 1).map(|(n, _)| *n).unwrap_or(lines.len());
2010            let section_lines = next_section - line_num;
2011
2012            let s = format!(
2013                "  [L{}] {} ({} lines)",
2014                line_num + 1,
2015                header.trim(),
2016                section_lines
2017            );
2018            if current_len + s.len() < max_len / 2 {
2019                parts.push(s.clone());
2020                current_len += s.len();
2021            }
2022        }
2023        parts.push("".to_string());
2024    }
2025
2026    // Show first chunk
2027    let preview_lines = 15.min(lines.len());
2028    parts.push("Content preview:".to_string());
2029    for line in lines.iter().take(preview_lines) {
2030        let s = format!("  {}", truncate_string(line, 120));
2031        if current_len + s.len() < max_len * 3 / 4 {
2032            parts.push(s.clone());
2033            current_len += s.len();
2034        }
2035    }
2036
2037    if lines.len() > preview_lines {
2038        parts.push(format!(
2039            "  ... ({} more lines)",
2040            lines.len() - preview_lines
2041        ));
2042    }
2043
2044    // Show last few lines if space permits
2045    if lines.len() > preview_lines * 2 && current_len < max_len - 500 {
2046        parts.push("\nEnd of output:".to_string());
2047        for line in lines.iter().skip(lines.len() - 5) {
2048            let s = format!("  {}", truncate_string(line, 120));
2049            if current_len + s.len() < max_len {
2050                parts.push(s.clone());
2051                current_len += s.len();
2052            }
2053        }
2054    }
2055
2056    parts.join("\n")
2057}
2058
2059/// Truncate a JSON value for display
2060fn truncate_json_value(v: &serde_json::Value, max_len: usize) -> String {
2061    let s = v.to_string();
2062    if s.len() <= max_len {
2063        s
2064    } else {
2065        format!("{}...", &s[..max_len.saturating_sub(3)])
2066    }
2067}
2068
2069/// Simplify history for OpenAI Responses API compatibility with reasoning models.
2070///
2071/// OpenAI's Responses API has strict pairing requirements:
2072/// - Reasoning items must be followed by their output (text or function_call)
2073/// - Function_call items must be preceded by their reasoning item
2074///
2075/// When passing history across user turns, these pairings get broken, causing errors like:
2076/// - "Item 'rs_...' of type 'reasoning' was provided without its required following item"
2077/// - "Item 'fc_...' of type 'function_call' was provided without its required 'reasoning' item"
2078///
2079/// Solution: Keep only User messages and final Assistant Text responses.
2080/// This preserves conversation context without the complex internal tool/reasoning structure.
2081fn simplify_history_for_openai_reasoning(history: &mut Vec<rig::completion::Message>) {
2082    use rig::completion::message::{AssistantContent, UserContent};
2083    use rig::one_or_many::OneOrMany;
2084
2085    // Filter to keep only User text messages and Assistant text messages
2086    let simplified: Vec<rig::completion::Message> = history
2087        .iter()
2088        .filter_map(|msg| match msg {
2089            // Keep User messages, but only text content (not tool results)
2090            rig::completion::Message::User { content } => {
2091                let text_only: Vec<UserContent> = content
2092                    .iter()
2093                    .filter(|c| matches!(c, UserContent::Text(_)))
2094                    .cloned()
2095                    .collect();
2096                if text_only.is_empty() {
2097                    None
2098                } else {
2099                    let mut iter = text_only.into_iter();
2100                    let first = iter.next().unwrap();
2101                    let rest: Vec<_> = iter.collect();
2102                    let new_content = if rest.is_empty() {
2103                        OneOrMany::one(first)
2104                    } else {
2105                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
2106                    };
2107                    Some(rig::completion::Message::User {
2108                        content: new_content,
2109                    })
2110                }
2111            }
2112            // Keep Assistant messages, but only text content (not reasoning, tool calls)
2113            rig::completion::Message::Assistant { content, id } => {
2114                let text_only: Vec<AssistantContent> = content
2115                    .iter()
2116                    .filter(|c| matches!(c, AssistantContent::Text(_)))
2117                    .cloned()
2118                    .collect();
2119                if text_only.is_empty() {
2120                    None
2121                } else {
2122                    let mut iter = text_only.into_iter();
2123                    let first = iter.next().unwrap();
2124                    let rest: Vec<_> = iter.collect();
2125                    let new_content = if rest.is_empty() {
2126                        OneOrMany::one(first)
2127                    } else {
2128                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
2129                    };
2130                    Some(rig::completion::Message::Assistant {
2131                        content: new_content,
2132                        id: id.clone(),
2133                    })
2134                }
2135            }
2136        })
2137        .collect();
2138
2139    *history = simplified;
2140}
2141
2142/// Ensure history starts with a User message for OpenAI Responses API compatibility.
2143///
2144/// OpenAI's Responses API requires that reasoning items are properly structured within
2145/// a conversation. When history truncation leaves an Assistant message (containing
2146/// Reasoning blocks) at the start, OpenAI rejects it with:
2147/// "Item 'rs_...' of type 'reasoning' was provided without its required following item."
2148///
2149/// This function inserts a synthetic User message at the beginning if history starts
2150/// with an Assistant message, preserving the context while maintaining valid structure.
2151fn ensure_history_starts_with_user(history: &mut Vec<rig::completion::Message>) {
2152    if !history.is_empty()
2153        && matches!(
2154            history.first(),
2155            Some(rig::completion::Message::Assistant { .. })
2156        )
2157    {
2158        // Insert synthetic User message at the beginning to maintain valid conversation structure
2159        history.insert(
2160            0,
2161            rig::completion::Message::User {
2162                content: rig::one_or_many::OneOrMany::one(
2163                    rig::completion::message::UserContent::text("(Conversation continued)"),
2164                ),
2165            },
2166        );
2167    }
2168}
2169
2170/// Estimate token count from raw rig Messages
2171/// This is used for context length management to prevent "input too long" errors.
2172/// Estimates ~4 characters per token.
2173fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
2174    use rig::completion::message::{AssistantContent, ToolResultContent, UserContent};
2175
2176    messages
2177        .iter()
2178        .map(|msg| -> usize {
2179            match msg {
2180                rig::completion::Message::User { content } => {
2181                    content
2182                        .iter()
2183                        .map(|c| -> usize {
2184                            match c {
2185                                UserContent::Text(t) => t.text.len() / 4,
2186                                UserContent::ToolResult(tr) => {
2187                                    // Tool results can be HUGE - properly estimate them
2188                                    tr.content
2189                                        .iter()
2190                                        .map(|trc| match trc {
2191                                            ToolResultContent::Text(t) => t.text.len() / 4,
2192                                            _ => 100,
2193                                        })
2194                                        .sum::<usize>()
2195                                }
2196                                _ => 100, // Estimate for images/documents
2197                            }
2198                        })
2199                        .sum::<usize>()
2200                }
2201                rig::completion::Message::Assistant { content, .. } => {
2202                    content
2203                        .iter()
2204                        .map(|c| -> usize {
2205                            match c {
2206                                AssistantContent::Text(t) => t.text.len() / 4,
2207                                AssistantContent::ToolCall(tc) => {
2208                                    // arguments is serde_json::Value, convert to string for length estimate
2209                                    let args_len = tc.function.arguments.to_string().len();
2210                                    (tc.function.name.len() + args_len) / 4
2211                                }
2212                                _ => 100,
2213                            }
2214                        })
2215                        .sum::<usize>()
2216                }
2217            }
2218        })
2219        .sum()
2220}
2221
2222/// Find a plan_create tool call in the list and extract plan info
2223/// Returns (plan_path, task_count) if found
2224fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
2225    for tc in tool_calls {
2226        if tc.tool_name == "plan_create" {
2227            // Try to parse the result_summary as JSON to extract plan_path
2228            // Note: result_summary may be truncated, so we have multiple fallbacks
2229            let plan_path =
2230                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
2231                    result
2232                        .get("plan_path")
2233                        .and_then(|v| v.as_str())
2234                        .map(|s| s.to_string())
2235                } else {
2236                    None
2237                };
2238
2239            // If JSON parsing failed, find the most recently created plan file
2240            // This is more reliable than trying to reconstruct the path from truncated args
2241            let plan_path = plan_path.unwrap_or_else(|| {
2242                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
2243            });
2244
2245            // Count tasks by reading the plan file directly
2246            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
2247
2248            return Some((plan_path, task_count));
2249        }
2250    }
2251    None
2252}
2253
2254/// Find the most recently created plan file in the plans directory
2255fn find_most_recent_plan_file() -> Option<String> {
2256    let plans_dir = std::env::current_dir().ok()?.join("plans");
2257    if !plans_dir.exists() {
2258        return None;
2259    }
2260
2261    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
2262
2263    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
2264        let path = entry.path();
2265        if path.extension().is_some_and(|e| e == "md")
2266            && let Ok(metadata) = entry.metadata()
2267            && let Ok(modified) = metadata.modified()
2268            && newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true)
2269        {
2270            newest = Some((path, modified));
2271        }
2272    }
2273
2274    newest.map(|(path, _)| {
2275        // Return relative path
2276        path.strip_prefix(std::env::current_dir().unwrap_or_default())
2277            .map(|p| p.display().to_string())
2278            .unwrap_or_else(|_| path.display().to_string())
2279    })
2280}
2281
2282/// Count tasks (checkbox items) in a plan file
2283fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
2284    use regex::Regex;
2285
2286    // Try both relative and absolute paths
2287    let path = std::path::Path::new(plan_path);
2288    let content = if path.exists() {
2289        std::fs::read_to_string(path).ok()?
2290    } else {
2291        // Try with current directory
2292        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
2293    };
2294
2295    // Count task checkboxes: - [ ], - [x], - [~], - [!]
2296    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
2297    let count = content
2298        .lines()
2299        .filter(|line| task_regex.is_match(line))
2300        .count();
2301
2302    Some(count)
2303}
2304
2305/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
2306fn is_truncation_error(err_str: &str) -> bool {
2307    err_str.contains("JsonError")
2308        || err_str.contains("EOF while parsing")
2309        || err_str.contains("JSON")
2310        || err_str.contains("unexpected end")
2311}
2312
2313/// Check if error is "input too long" - context exceeds model limit
2314/// This happens when conversation history grows beyond what the model can handle.
2315/// Recovery: compact history and retry with reduced context.
2316fn is_input_too_long_error(err_str: &str) -> bool {
2317    err_str.contains("too long")
2318        || err_str.contains("Too long")
2319        || err_str.contains("context length")
2320        || err_str.contains("maximum context")
2321        || err_str.contains("exceeds the model")
2322        || err_str.contains("Input is too long")
2323}
2324
2325/// Build a continuation prompt that tells the AI what work was completed
2326/// and asks it to continue from where it left off
2327fn build_continuation_prompt(
2328    original_task: &str,
2329    completed_tools: &[ToolCallRecord],
2330    agent_thinking: &[String],
2331) -> String {
2332    use std::collections::HashSet;
2333
2334    // Group tools by type and extract unique files read
2335    let mut files_read: HashSet<String> = HashSet::new();
2336    let mut files_written: HashSet<String> = HashSet::new();
2337    let mut dirs_listed: HashSet<String> = HashSet::new();
2338    let mut other_tools: Vec<String> = Vec::new();
2339    let mut in_progress: Vec<String> = Vec::new();
2340
2341    for tool in completed_tools {
2342        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
2343
2344        if is_in_progress {
2345            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
2346            continue;
2347        }
2348
2349        match tool.tool_name.as_str() {
2350            "read_file" => {
2351                // Extract path from args
2352                files_read.insert(tool.args_summary.clone());
2353            }
2354            "write_file" | "write_files" => {
2355                files_written.insert(tool.args_summary.clone());
2356            }
2357            "list_directory" => {
2358                dirs_listed.insert(tool.args_summary.clone());
2359            }
2360            _ => {
2361                other_tools.push(format!(
2362                    "{}({})",
2363                    tool.tool_name,
2364                    truncate_string(&tool.args_summary, 40)
2365                ));
2366            }
2367        }
2368    }
2369
2370    let mut prompt = format!(
2371        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
2372        Original task: {}\n",
2373        truncate_string(original_task, 500)
2374    );
2375
2376    // Show files already read - CRITICAL for preventing re-reads
2377    if !files_read.is_empty() {
2378        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
2379        for file in &files_read {
2380            prompt.push_str(&format!("  - {}\n", file));
2381        }
2382    }
2383
2384    if !dirs_listed.is_empty() {
2385        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
2386        for dir in &dirs_listed {
2387            prompt.push_str(&format!("  - {}\n", dir));
2388        }
2389    }
2390
2391    if !files_written.is_empty() {
2392        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
2393        for file in &files_written {
2394            prompt.push_str(&format!("  - {}\n", file));
2395        }
2396    }
2397
2398    if !other_tools.is_empty() {
2399        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
2400        for tool in other_tools.iter().take(20) {
2401            prompt.push_str(&format!("  - {}\n", tool));
2402        }
2403        if other_tools.len() > 20 {
2404            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
2405        }
2406    }
2407
2408    if !in_progress.is_empty() {
2409        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
2410        for tool in &in_progress {
2411            prompt.push_str(&format!("  ⚠ {}\n", tool));
2412        }
2413    }
2414
2415    // Include last thinking context if available
2416    if let Some(last_thought) = agent_thinking.last() {
2417        prompt.push_str(&format!(
2418            "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
2419            truncate_string(last_thought, 300)
2420        ));
2421    }
2422
2423    prompt.push_str("\n== INSTRUCTIONS ==\n");
2424    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
2425    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
2426    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
2427    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
2428    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
2429
2430    prompt
2431}
2432
2433/// Run a single query and return the response
2434/// Note: event_bridge is accepted for API consistency but not used in single-query mode
2435pub async fn run_query(
2436    project_path: &Path,
2437    query: &str,
2438    provider: ProviderType,
2439    model: Option<String>,
2440    _event_bridge: Option<crate::server::EventBridge>,
2441) -> AgentResult<String> {
2442    use tools::*;
2443
2444    let project_path_buf = project_path.to_path_buf();
2445
2446    // Background process manager for Prometheus port-forwards (single query context)
2447    let bg_manager = Arc::new(BackgroundProcessManager::new());
2448    // Select prompt based on query type (analysis vs generation)
2449    // For single queries (non-interactive), always use standard mode
2450    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
2451    let is_generation = prompts::is_generation_query(query);
2452
2453    match provider {
2454        ProviderType::OpenAI => {
2455            // Use Responses API (default) for reasoning model support
2456            let client = openai::Client::from_env();
2457            let model_name = model.as_deref().unwrap_or("gpt-5.2");
2458
2459            let mut builder = client
2460                .agent(model_name)
2461                .preamble(&preamble)
2462                .max_tokens(4096)
2463                .tool(AnalyzeTool::new(project_path_buf.clone()))
2464                .tool(SecurityScanTool::new(project_path_buf.clone()))
2465                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2466                .tool(HadolintTool::new(project_path_buf.clone()))
2467                .tool(DclintTool::new(project_path_buf.clone()))
2468                .tool(KubelintTool::new(project_path_buf.clone()))
2469                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2470                .tool(K8sCostsTool::new(project_path_buf.clone()))
2471                .tool(K8sDriftTool::new(project_path_buf.clone()))
2472                .tool(HelmlintTool::new(project_path_buf.clone()))
2473                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2474                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2475                .tool(TerraformInstallTool::new())
2476                .tool(ReadFileTool::new(project_path_buf.clone()))
2477                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2478                .tool(WebFetchTool::new())
2479                // Prometheus discovery and connection tools for live K8s analysis
2480                .tool(PrometheusDiscoverTool::new())
2481                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2482                // RAG retrieval tools for compressed tool outputs
2483                .tool(RetrieveOutputTool::new())
2484                .tool(ListOutputsTool::new())
2485                        // Platform tools for project management
2486                        .tool(ListOrganizationsTool::new())
2487                        .tool(ListProjectsTool::new())
2488                        .tool(SelectProjectTool::new())
2489                        .tool(CurrentContextTool::new())
2490                        .tool(OpenProviderSettingsTool::new())
2491                        .tool(CheckProviderConnectionTool::new())
2492                        .tool(ListDeploymentCapabilitiesTool::new())
2493                        .tool(ListHetznerAvailabilityTool::new())
2494                        // Deployment tools for service management
2495                        .tool(CreateDeploymentConfigTool::new())
2496                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
2497                        .tool(ListDeploymentConfigsTool::new())
2498                        .tool(TriggerDeploymentTool::new())
2499                        .tool(GetDeploymentStatusTool::new())
2500                        .tool(ListDeploymentsTool::new())
2501                        .tool(GetServiceLogsTool::new())
2502                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
2503
2504            // Add generation tools if this is a generation query
2505            if is_generation {
2506                builder = builder
2507                    .tool(WriteFileTool::new(project_path_buf.clone()))
2508                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2509                    .tool(ShellTool::new(project_path_buf.clone()));
2510            }
2511
2512            // Enable reasoning for OpenAI reasoning models
2513            let model_lower = model_name.to_lowercase();
2514            let is_reasoning_model = model_lower.starts_with("gpt-5")
2515                || model_lower.starts_with("gpt5")
2516                || model_lower.starts_with("o1")
2517                || model_lower.starts_with("o3")
2518                || model_lower.starts_with("o4");
2519
2520            let agent = if is_reasoning_model {
2521                let reasoning_params = serde_json::json!({
2522                    "reasoning": {
2523                        "effort": "medium",
2524                        "summary": "detailed"
2525                    }
2526                });
2527                builder.additional_params(reasoning_params).build()
2528            } else {
2529                builder.build()
2530            };
2531
2532            agent
2533                .prompt(query)
2534                .multi_turn(50)
2535                .await
2536                .map_err(|e| AgentError::ProviderError(e.to_string()))
2537        }
2538        ProviderType::Anthropic => {
2539            let client = anthropic::Client::from_env();
2540            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
2541
2542            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
2543            // handle thinking blocks in multi-turn conversations with tool use.
2544            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
2545
2546            let mut builder = client
2547                .agent(model_name)
2548                .preamble(&preamble)
2549                .max_tokens(4096)
2550                .tool(AnalyzeTool::new(project_path_buf.clone()))
2551                .tool(SecurityScanTool::new(project_path_buf.clone()))
2552                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2553                .tool(HadolintTool::new(project_path_buf.clone()))
2554                .tool(DclintTool::new(project_path_buf.clone()))
2555                .tool(KubelintTool::new(project_path_buf.clone()))
2556                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2557                .tool(K8sCostsTool::new(project_path_buf.clone()))
2558                .tool(K8sDriftTool::new(project_path_buf.clone()))
2559                .tool(HelmlintTool::new(project_path_buf.clone()))
2560                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2561                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2562                .tool(TerraformInstallTool::new())
2563                .tool(ReadFileTool::new(project_path_buf.clone()))
2564                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2565                .tool(WebFetchTool::new())
2566                // Prometheus discovery and connection tools for live K8s analysis
2567                .tool(PrometheusDiscoverTool::new())
2568                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2569                // RAG retrieval tools for compressed tool outputs
2570                .tool(RetrieveOutputTool::new())
2571                .tool(ListOutputsTool::new())
2572                        // Platform tools for project management
2573                        .tool(ListOrganizationsTool::new())
2574                        .tool(ListProjectsTool::new())
2575                        .tool(SelectProjectTool::new())
2576                        .tool(CurrentContextTool::new())
2577                        .tool(OpenProviderSettingsTool::new())
2578                        .tool(CheckProviderConnectionTool::new())
2579                        .tool(ListDeploymentCapabilitiesTool::new())
2580                        .tool(ListHetznerAvailabilityTool::new())
2581                        // Deployment tools for service management
2582                        .tool(CreateDeploymentConfigTool::new())
2583                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
2584                        .tool(ListDeploymentConfigsTool::new())
2585                        .tool(TriggerDeploymentTool::new())
2586                        .tool(GetDeploymentStatusTool::new())
2587                        .tool(ListDeploymentsTool::new())
2588                        .tool(GetServiceLogsTool::new())
2589                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
2590
2591            // Add generation tools if this is a generation query
2592            if is_generation {
2593                builder = builder
2594                    .tool(WriteFileTool::new(project_path_buf.clone()))
2595                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2596                    .tool(ShellTool::new(project_path_buf.clone()));
2597            }
2598
2599            let agent = builder.build();
2600
2601            agent
2602                .prompt(query)
2603                .multi_turn(50)
2604                .await
2605                .map_err(|e| AgentError::ProviderError(e.to_string()))
2606        }
2607        ProviderType::Bedrock => {
2608            // Bedrock provider via rig-bedrock - same pattern as Anthropic
2609            let client = crate::bedrock::client::Client::from_env();
2610            let model_name = model
2611                .as_deref()
2612                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
2613
2614            // Extended thinking for Claude via Bedrock
2615            let thinking_params = serde_json::json!({
2616                "thinking": {
2617                    "type": "enabled",
2618                    "budget_tokens": 16000
2619                }
2620            });
2621
2622            let mut builder = client
2623                .agent(model_name)
2624                .preamble(&preamble)
2625                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
2626                .tool(AnalyzeTool::new(project_path_buf.clone()))
2627                .tool(SecurityScanTool::new(project_path_buf.clone()))
2628                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2629                .tool(HadolintTool::new(project_path_buf.clone()))
2630                .tool(DclintTool::new(project_path_buf.clone()))
2631                .tool(KubelintTool::new(project_path_buf.clone()))
2632                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2633                .tool(K8sCostsTool::new(project_path_buf.clone()))
2634                .tool(K8sDriftTool::new(project_path_buf.clone()))
2635                .tool(HelmlintTool::new(project_path_buf.clone()))
2636                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2637                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2638                .tool(TerraformInstallTool::new())
2639                .tool(ReadFileTool::new(project_path_buf.clone()))
2640                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2641                .tool(WebFetchTool::new())
2642                // Prometheus discovery and connection tools for live K8s analysis
2643                .tool(PrometheusDiscoverTool::new())
2644                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2645                // RAG retrieval tools for compressed tool outputs
2646                .tool(RetrieveOutputTool::new())
2647                .tool(ListOutputsTool::new())
2648                        // Platform tools for project management
2649                        .tool(ListOrganizationsTool::new())
2650                        .tool(ListProjectsTool::new())
2651                        .tool(SelectProjectTool::new())
2652                        .tool(CurrentContextTool::new())
2653                        .tool(OpenProviderSettingsTool::new())
2654                        .tool(CheckProviderConnectionTool::new())
2655                        .tool(ListDeploymentCapabilitiesTool::new())
2656                        .tool(ListHetznerAvailabilityTool::new())
2657                        // Deployment tools for service management
2658                        .tool(CreateDeploymentConfigTool::new())
2659                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
2660                        .tool(ListDeploymentConfigsTool::new())
2661                        .tool(TriggerDeploymentTool::new())
2662                        .tool(GetDeploymentStatusTool::new())
2663                        .tool(ListDeploymentsTool::new())
2664                        .tool(GetServiceLogsTool::new())
2665                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
2666
2667            // Add generation tools if this is a generation query
2668            if is_generation {
2669                builder = builder
2670                    .tool(WriteFileTool::new(project_path_buf.clone()))
2671                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2672                    .tool(ShellTool::new(project_path_buf.clone()));
2673            }
2674
2675            let agent = builder.additional_params(thinking_params).build();
2676
2677            agent
2678                .prompt(query)
2679                .multi_turn(50)
2680                .await
2681                .map_err(|e| AgentError::ProviderError(e.to_string()))
2682        }
2683    }
2684}