Skip to main content

syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod persistence;
38pub mod prompts;
39pub mod session;
40pub mod tools;
41pub mod ui;
42use colored::Colorize;
43use commands::TokenUsage;
44use history::{ConversationHistory, ToolCallRecord};
45use ide::IdeClient;
46use rig::{
47    client::{CompletionClient, ProviderClient},
48    completion::Prompt,
49    providers::{anthropic, openai},
50};
51use session::{ChatSession, PlanMode};
52use std::path::Path;
53use std::sync::Arc;
54use tokio::sync::Mutex as TokioMutex;
55use ui::{ResponseFormatter, ToolDisplayHook};
56
57/// Provider type for the agent
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ProviderType {
60    #[default]
61    OpenAI,
62    Anthropic,
63    Bedrock,
64}
65
66impl std::fmt::Display for ProviderType {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            ProviderType::OpenAI => write!(f, "openai"),
70            ProviderType::Anthropic => write!(f, "anthropic"),
71            ProviderType::Bedrock => write!(f, "bedrock"),
72        }
73    }
74}
75
76impl std::str::FromStr for ProviderType {
77    type Err = String;
78
79    fn from_str(s: &str) -> Result<Self, Self::Err> {
80        match s.to_lowercase().as_str() {
81            "openai" => Ok(ProviderType::OpenAI),
82            "anthropic" => Ok(ProviderType::Anthropic),
83            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
84            _ => Err(format!(
85                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
86                s
87            )),
88        }
89    }
90}
91
92/// Error types for the agent
93#[derive(Debug, thiserror::Error)]
94pub enum AgentError {
95    #[error("Missing API key. Set {0} environment variable.")]
96    MissingApiKey(String),
97
98    #[error("Provider error: {0}")]
99    ProviderError(String),
100
101    #[error("Tool error: {0}")]
102    ToolError(String),
103}
104
105pub type AgentResult<T> = Result<T, AgentError>;
106
107// =============================================================================
108// AG-UI State Types
109// =============================================================================
110
111/// Agent state for AG-UI state synchronization
112#[derive(Debug, Clone, serde::Serialize)]
113pub struct AgentState {
114    /// Project being analyzed
115    pub project_path: String,
116    /// LLM provider name
117    pub provider: String,
118    /// Model being used
119    pub model: String,
120    /// Whether plan mode is active
121    pub plan_mode: bool,
122    /// Token usage statistics
123    pub token_usage: TokenUsageState,
124    /// Conversation state
125    pub conversation: ConversationState,
126}
127
128/// Token usage state for AG-UI
129#[derive(Debug, Clone, serde::Serialize)]
130pub struct TokenUsageState {
131    /// Estimated input tokens
132    pub input_tokens: usize,
133    /// Estimated output tokens
134    pub output_tokens: usize,
135    /// Total tokens
136    pub total_tokens: usize,
137}
138
139/// Conversation state for AG-UI
140#[derive(Debug, Clone, serde::Serialize)]
141pub struct ConversationState {
142    /// Number of conversation turns
143    pub turn_count: usize,
144    /// Whether history has been compacted
145    pub has_compacted: bool,
146}
147
148/// Build AgentState from session and conversation history
149fn build_agent_state(session: &ChatSession, history: &ConversationHistory) -> AgentState {
150    // Check if history has been compacted (status contains "compacted")
151    let has_compacted = history.status().contains("compacted");
152    let input = session.token_usage.prompt_tokens as usize;
153    let output = session.token_usage.completion_tokens as usize;
154
155    AgentState {
156        project_path: session.project_path.display().to_string(),
157        provider: session.provider.to_string(),
158        model: session.model.clone(),
159        plan_mode: session.plan_mode.is_planning(),
160        token_usage: TokenUsageState {
161            input_tokens: input,
162            output_tokens: output,
163            total_tokens: input + output,
164        },
165        conversation: ConversationState {
166            turn_count: history.turn_count(),
167            has_compacted,
168        },
169    }
170}
171
172/// Get the system prompt for the agent based on query type and plan mode
173fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
174    // In planning mode, use the read-only exploration prompt
175    if plan_mode.is_planning() {
176        return prompts::get_planning_prompt(project_path);
177    }
178
179    if let Some(q) = query {
180        // First check if it's a code development task (highest priority)
181        if prompts::is_code_development_query(q) {
182            return prompts::get_code_development_prompt(project_path);
183        }
184        // Then check if it's DevOps generation (Docker, Terraform, Helm)
185        if prompts::is_generation_query(q) {
186            return prompts::get_devops_prompt(project_path, Some(q));
187        }
188    }
189    // Default to analysis prompt
190    prompts::get_analysis_prompt(project_path)
191}
192
193/// Run the agent as a dedicated AG-UI server (headless mode for containers/deployments).
194///
195/// This starts the AG-UI server without interactive stdin, accepting connections
196/// from frontends via SSE or WebSocket. The agent processes messages received
197/// through the AG-UI protocol.
198///
199/// # Arguments
200///
201/// * `project_path` - Path to the project directory
202/// * `provider` - LLM provider to use
203/// * `model` - Optional model override
204/// * `host` - Host address to bind to
205/// * `port` - Port number to listen on
206pub async fn run_agent_server(
207    project_path: &Path,
208    provider: ProviderType,
209    model: Option<String>,
210    host: &str,
211    port: u16,
212) -> AgentResult<()> {
213    use crate::server::{AgUiConfig, AgUiServer, ProcessorConfig};
214
215    // Configure the agent processor with provider, model, and project path
216    // Use regional model IDs (no global. prefix) for wider availability
217    let default_model = match provider {
218        // Claude 3.5 Sonnet v2 is widely available across regions
219        ProviderType::Bedrock => "anthropic.claude-3-5-sonnet-20241022-v2:0".to_string(),
220        ProviderType::Anthropic => "claude-3-5-sonnet-20241022".to_string(),
221        ProviderType::OpenAI => "gpt-4o".to_string(),
222    };
223    let processor_config = ProcessorConfig::new()
224        .with_provider(&provider.to_string())
225        .with_model(&model.unwrap_or(default_model))
226        .with_project_path(project_path);
227
228    let config = AgUiConfig::new()
229        .port(port)
230        .host(host)
231        .with_processor_config(processor_config);
232    let server = AgUiServer::new(config);
233
234    println!("AG-UI agent server listening on http://{}:{}", host, port);
235    println!("Project path: {}", project_path.display());
236    println!("Connect frontends via SSE (/sse) or WebSocket (/ws)");
237    println!("Press Ctrl+C to stop the server");
238
239    // Run server (blocks until shutdown signal)
240    server
241        .run()
242        .await
243        .map_err(|e| AgentError::ProviderError(e.to_string()))
244}
245
246/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
247pub async fn run_interactive(
248    project_path: &Path,
249    provider: ProviderType,
250    model: Option<String>,
251    event_bridge: Option<crate::server::EventBridge>,
252) -> AgentResult<()> {
253    use tools::*;
254
255    let mut session = ChatSession::new(project_path, provider, model);
256
257    // Store event bridge for use in tool hooks
258    let event_bridge = event_bridge;
259
260    // Shared background process manager for Prometheus port-forwards
261    let bg_manager = Arc::new(BackgroundProcessManager::new());
262
263    // Terminal layout for split screen is disabled for now - see notes below
264    // let terminal_layout = ui::TerminalLayout::new();
265    // let layout_state = terminal_layout.state();
266
267    // Initialize conversation history with compaction support
268    let mut conversation_history = ConversationHistory::new();
269
270    // Initialize IDE client for native diff viewing
271    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
272        let mut client = IdeClient::new().await;
273        if client.is_ide_available() {
274            match client.connect().await {
275                Ok(()) => {
276                    println!(
277                        "{} Connected to {} IDE companion",
278                        "✓".green(),
279                        client.ide_name().unwrap_or("VS Code")
280                    );
281                    Some(Arc::new(TokioMutex::new(client)))
282                }
283                Err(e) => {
284                    // IDE detected but companion not running or connection failed
285                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
286                    None
287                }
288            }
289        } else {
290            println!(
291                "{} No IDE detected (TERM_PROGRAM={})",
292                "·".dimmed(),
293                std::env::var("TERM_PROGRAM").unwrap_or_default()
294            );
295            None
296        }
297    };
298
299    // Load API key from config file to env if not already set
300    ChatSession::load_api_key_to_env(session.provider);
301
302    // Check if API key is configured, prompt if not
303    if !ChatSession::has_api_key(session.provider) {
304        ChatSession::prompt_api_key(session.provider)?;
305    }
306
307    session.print_banner();
308
309    // Display platform context if a project is selected
310    if session.platform_session.is_project_selected() {
311        println!(
312            "{}",
313            format!(
314                "Platform context: {}",
315                session.platform_session.display_context()
316            )
317            .dimmed()
318        );
319    }
320
321    // NOTE: Terminal layout with ANSI scroll regions is disabled for now.
322    // The scroll region approach conflicts with the existing input/output flow.
323    // TODO: Implement proper scroll region support that integrates with the input handler.
324    // For now, we rely on the pause/resume mechanism in progress indicator.
325    //
326    // if let Err(e) = terminal_layout.init() {
327    //     eprintln!(
328    //         "{}",
329    //         format!("Note: Terminal layout initialization failed: {}. Using fallback mode.", e)
330    //             .dimmed()
331    //     );
332    // }
333
334    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
335    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
336    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
337
338    // Pending input for auto-continue after plan creation
339    let mut pending_input: Option<String> = None;
340    // Auto-accept mode for plan execution (skips write confirmations)
341    let mut auto_accept_writes = false;
342
343    // Initialize session recorder for conversation persistence
344    let mut session_recorder = persistence::SessionRecorder::new(project_path);
345
346    // Track if we exit due to an error (for AG-UI error events)
347    let mut exit_error: Option<String> = None;
348
349    // Emit AG-UI RunStarted event and initial state for connected frontends
350    if let Some(ref bridge) = event_bridge {
351        bridge.start_run().await;
352        // Emit initial agent state snapshot
353        let state = build_agent_state(&session, &conversation_history);
354        if let Ok(state_json) = serde_json::to_value(&state) {
355            bridge.emit_state_snapshot(state_json).await;
356        }
357    }
358
359    loop {
360        // Show conversation status if we have history
361        if !conversation_history.is_empty() {
362            println!(
363                "{}",
364                format!("  💬 Context: {}", conversation_history.status()).dimmed()
365            );
366        }
367
368        // Check for pending input (from plan menu selection)
369        let input = if let Some(pending) = pending_input.take() {
370            // Show what we're executing
371            println!("{} {}", "→".cyan(), pending.dimmed());
372            pending
373        } else {
374            // New user turn - reset auto-accept mode from previous plan execution
375            auto_accept_writes = false;
376
377            // Read user input (returns InputResult)
378            let input_result = match session.read_input() {
379                Ok(result) => result,
380                Err(_) => break,
381            };
382
383            // Handle the input result
384            match input_result {
385                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
386                ui::InputResult::Cancel | ui::InputResult::Exit => break,
387                ui::InputResult::TogglePlanMode => {
388                    // Toggle planning mode - minimal feedback, no extra newlines
389                    let new_mode = session.toggle_plan_mode();
390                    if new_mode.is_planning() {
391                        println!("{}", "★ plan mode".yellow());
392                    } else {
393                        println!("{}", "▶ standard mode".green());
394                    }
395                    // Emit AG-UI state delta for plan mode change
396                    if let Some(ref bridge) = event_bridge {
397                        bridge
398                            .emit_state_delta(vec![serde_json::json!({
399                                "op": "replace",
400                                "path": "/plan_mode",
401                                "value": new_mode.is_planning()
402                            })])
403                            .await;
404                    }
405                    continue;
406                }
407            }
408        };
409
410        if input.is_empty() {
411            continue;
412        }
413
414        // Check for commands
415        if ChatSession::is_command(&input) {
416            // Special handling for /clear to also clear conversation history
417            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
418                conversation_history.clear();
419                raw_chat_history.clear();
420            }
421            match session.process_command(&input) {
422                Ok(true) => {
423                    // Check if /resume loaded a session
424                    if let Some(record) = session.pending_resume.take() {
425                        // Display previous messages
426                        println!();
427                        println!("{}", "─── Previous Conversation ───".dimmed());
428                        for msg in &record.messages {
429                            match msg.role {
430                                persistence::MessageRole::User => {
431                                    println!();
432                                    println!(
433                                        "{} {}",
434                                        "You:".cyan().bold(),
435                                        truncate_string(&msg.content, 500)
436                                    );
437                                }
438                                persistence::MessageRole::Assistant => {
439                                    println!();
440                                    // Show tool calls if any (same format as live display)
441                                    if let Some(ref tools) = msg.tool_calls {
442                                        for tc in tools {
443                                            // Match live tool display: green dot for completed, cyan bold name
444                                            if tc.args_summary.is_empty() {
445                                                println!(
446                                                    "{} {}",
447                                                    "●".green(),
448                                                    tc.name.cyan().bold()
449                                                );
450                                            } else {
451                                                println!(
452                                                    "{} {}({})",
453                                                    "●".green(),
454                                                    tc.name.cyan().bold(),
455                                                    truncate_string(&tc.args_summary, 50).dimmed()
456                                                );
457                                            }
458                                        }
459                                    }
460                                    // Show response (same ResponseFormatter as live)
461                                    if !msg.content.is_empty() {
462                                        ResponseFormatter::print_response(&truncate_string(
463                                            &msg.content,
464                                            1000,
465                                        ));
466                                    }
467                                }
468                                persistence::MessageRole::System => {
469                                    // Skip system messages in display
470                                }
471                            }
472                        }
473                        println!("{}", "─── End of History ───".dimmed());
474                        println!();
475
476                        // Try to restore from history_snapshot (new format with full context)
477                        let restored_from_snapshot = if let Some(history_json) =
478                            &record.history_snapshot
479                        {
480                            match ConversationHistory::from_json(history_json) {
481                                Ok(restored) => {
482                                    conversation_history = restored;
483                                    // Rebuild raw_chat_history from restored conversation_history
484                                    raw_chat_history = conversation_history.to_messages();
485                                    println!(
486                                            "{}",
487                                            "  ✓ Restored full conversation context (including compacted history)".green()
488                                        );
489                                    true
490                                }
491                                Err(e) => {
492                                    eprintln!(
493                                        "{}",
494                                        format!(
495                                            "  Warning: Failed to restore history snapshot: {}",
496                                            e
497                                        )
498                                        .yellow()
499                                    );
500                                    false
501                                }
502                            }
503                        } else {
504                            false
505                        };
506
507                        // Fallback: Load from messages (old format or if snapshot failed)
508                        if !restored_from_snapshot {
509                            // Load messages into raw_chat_history for AI context
510                            for msg in &record.messages {
511                                match msg.role {
512                                    persistence::MessageRole::User => {
513                                        raw_chat_history.push(rig::completion::Message::User {
514                                            content: rig::one_or_many::OneOrMany::one(
515                                                rig::completion::message::UserContent::text(
516                                                    &msg.content,
517                                                ),
518                                            ),
519                                        });
520                                    }
521                                    persistence::MessageRole::Assistant => {
522                                        raw_chat_history
523                                            .push(rig::completion::Message::Assistant {
524                                            id: Some(msg.id.clone()),
525                                            content: rig::one_or_many::OneOrMany::one(
526                                                rig::completion::message::AssistantContent::text(
527                                                    &msg.content,
528                                                ),
529                                            ),
530                                        });
531                                    }
532                                    persistence::MessageRole::System => {}
533                                }
534                            }
535
536                            // Load into conversation_history with tool calls from message records
537                            for msg in &record.messages {
538                                if msg.role == persistence::MessageRole::User {
539                                    // Find the next assistant message
540                                    let (response, tool_calls) = record
541                                        .messages
542                                        .iter()
543                                        .skip_while(|m| m.id != msg.id)
544                                        .skip(1)
545                                        .find(|m| m.role == persistence::MessageRole::Assistant)
546                                        .map(|m| {
547                                            let tcs = m.tool_calls.as_ref().map(|calls| {
548                                                calls
549                                                    .iter()
550                                                    .map(|tc| history::ToolCallRecord {
551                                                        tool_name: tc.name.clone(),
552                                                        args_summary: tc.args_summary.clone(),
553                                                        result_summary: tc.result_summary.clone(),
554                                                        tool_id: None,
555                                                        droppable: false,
556                                                    })
557                                                    .collect::<Vec<_>>()
558                                            });
559                                            (m.content.clone(), tcs.unwrap_or_default())
560                                        })
561                                        .unwrap_or_default();
562
563                                    conversation_history.add_turn(
564                                        msg.content.clone(),
565                                        response,
566                                        tool_calls,
567                                    );
568                                }
569                            }
570                            println!(
571                                "{}",
572                                format!(
573                                    "  ✓ Loaded {} messages (legacy format).",
574                                    record.messages.len()
575                                )
576                                .green()
577                            );
578                        }
579                        println!();
580                    }
581                    continue;
582                }
583                Ok(false) => break, // /exit
584                Err(e) => {
585                    eprintln!("{}", format!("Error: {}", e).red());
586                    continue;
587                }
588            }
589        }
590
591        // Check API key before making request (in case provider changed)
592        if !ChatSession::has_api_key(session.provider) {
593            eprintln!(
594                "{}",
595                "No API key configured. Use /provider to set one.".yellow()
596            );
597            continue;
598        }
599
600        // Check if compaction is needed before making the request
601        if conversation_history.needs_compaction() {
602            println!("{}", "  📦 Compacting conversation history...".dimmed());
603            if let Some(summary) = conversation_history.compact() {
604                println!(
605                    "{}",
606                    format!("  ✓ Compressed {} turns", summary.matches("Turn").count()).dimmed()
607                );
608            }
609        }
610
611        // Pre-request check: estimate if we're approaching context limit
612        // Check raw_chat_history (actual messages) not conversation_history
613        // because conversation_history may be out of sync
614        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
615            + input.len() / 4  // New input
616            + 5000; // System prompt overhead estimate
617
618        if estimated_input_tokens > 150_000 {
619            println!(
620                "{}",
621                "  ⚠ Large context detected. Pre-truncating...".yellow()
622            );
623
624            let old_count = raw_chat_history.len();
625            // Keep last 20 messages when approaching limit
626            if raw_chat_history.len() > 20 {
627                let drain_count = raw_chat_history.len() - 20;
628                raw_chat_history.drain(0..drain_count);
629                // Ensure history starts with User message for OpenAI Responses API compatibility
630                ensure_history_starts_with_user(&mut raw_chat_history);
631                // Preserve compacted summary while clearing turns to stay in sync
632                conversation_history.clear_turns_preserve_context();
633                println!(
634                    "{}",
635                    format!(
636                        "  ✓ Truncated {} → {} messages",
637                        old_count,
638                        raw_chat_history.len()
639                    )
640                    .dimmed()
641                );
642            }
643        }
644
645        // Retry loop for automatic error recovery
646        // MAX_RETRIES is for failures without progress
647        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
648        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
649        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
650        const MAX_RETRIES: u32 = 3;
651        const MAX_CONTINUATIONS: u32 = 10;
652        const _TOOL_CALL_CHECKPOINT: usize = 50;
653        const MAX_TOOL_CALLS: usize = 300;
654        let mut retry_attempt = 0;
655        let mut continuation_count = 0;
656        let mut total_tool_calls: usize = 0;
657        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
658        let mut current_input = input.clone();
659        let mut succeeded = false;
660
661        // Emit AG-UI step event for processing
662        if let Some(ref bridge) = event_bridge {
663            bridge.start_step("processing").await;
664        }
665
666        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
667            // Log if this is a continuation attempt
668            if continuation_count > 0 {
669                eprintln!("{}", "  📡 Sending continuation request...".dimmed());
670            }
671
672            // Create hook for Claude Code style tool display
673            let hook = ToolDisplayHook::new();
674
675            // Create progress indicator for visual feedback during generation
676            let progress = ui::GenerationIndicator::new();
677            // Layout connection disabled - using inline progress mode
678            // progress.state().set_layout(layout_state.clone());
679            hook.set_progress_state(progress.state()).await;
680
681            // Connect AG-UI EventBridge if provided (for streaming tool events to frontends)
682            if let Some(ref bridge) = event_bridge {
683                hook.set_event_bridge(bridge.clone()).await;
684            }
685
686            let project_path_buf = session.project_path.clone();
687            // Select prompt based on query type (analysis vs generation) and plan mode
688            let preamble = get_system_prompt(
689                &session.project_path,
690                Some(&current_input),
691                session.plan_mode,
692            );
693            let is_generation = prompts::is_generation_query(&current_input);
694            let is_planning = session.plan_mode.is_planning();
695
696            // Note: using raw_chat_history directly which preserves Reasoning blocks
697            // This is needed for extended thinking to work with multi-turn conversations
698
699            // Get progress state for interrupt detection
700            let progress_state = progress.state();
701
702            // Use tokio::select! to race the API call against Ctrl+C
703            // This allows immediate cancellation, not just between tool calls
704            let mut user_interrupted = false;
705
706            // Emit AG-UI thinking event before LLM call
707            if let Some(ref bridge) = event_bridge {
708                bridge.start_thinking(Some("Generating response")).await;
709            }
710
711            // API call with Ctrl+C interrupt support
712            let response = tokio::select! {
713                biased; // Check ctrl_c first for faster response
714
715                _ = tokio::signal::ctrl_c() => {
716                    user_interrupted = true;
717                    Err::<String, String>("User cancelled".to_string())
718                }
719
720                result = async {
721                    match session.provider {
722                ProviderType::OpenAI => {
723                    // Use Responses API (default) for reasoning model support.
724                    // rig-core 0.28+ handles Reasoning items properly in multi-turn.
725                    let client = openai::Client::from_env();
726
727                    let mut builder = client
728                        .agent(&session.model)
729                        .preamble(&preamble)
730                        .max_tokens(4096)
731                        .tool(AnalyzeTool::new(project_path_buf.clone()))
732                        .tool(SecurityScanTool::new(project_path_buf.clone()))
733                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
734                        .tool(HadolintTool::new(project_path_buf.clone()))
735                        .tool(DclintTool::new(project_path_buf.clone()))
736                        .tool(KubelintTool::new(project_path_buf.clone()))
737                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
738                        .tool(K8sCostsTool::new(project_path_buf.clone()))
739                        .tool(K8sDriftTool::new(project_path_buf.clone()))
740                        .tool(HelmlintTool::new(project_path_buf.clone()))
741                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
742                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
743                        .tool(TerraformInstallTool::new())
744                        .tool(ReadFileTool::new(project_path_buf.clone()))
745                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
746                        .tool(WebFetchTool::new())
747                        // Prometheus discovery and connection tools for live K8s analysis
748                        .tool(PrometheusDiscoverTool::new())
749                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
750                        // RAG retrieval tools for compressed tool outputs
751                        .tool(RetrieveOutputTool::new())
752                        .tool(ListOutputsTool::new())
753                        // Platform tools for project management
754                        .tool(ListOrganizationsTool::new())
755                        .tool(ListProjectsTool::new())
756                        .tool(SelectProjectTool::new())
757                        .tool(CurrentContextTool::new())
758                        .tool(OpenProviderSettingsTool::new())
759                        .tool(CheckProviderConnectionTool::new())
760                        .tool(ListDeploymentCapabilitiesTool::new())
761                        .tool(ListHetznerAvailabilityTool::new())
762                        // Deployment tools for service management
763                        .tool(CreateDeploymentConfigTool::new())
764                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
765                        .tool(ListDeploymentConfigsTool::new())
766                        .tool(TriggerDeploymentTool::new())
767                        .tool(GetDeploymentStatusTool::new())
768                        .tool(ListDeploymentsTool::new())
769                        .tool(GetServiceLogsTool::new())
770                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
771
772                    // Add tools based on mode
773                    if is_planning {
774                        // Plan mode: read-only shell + plan creation tools
775                        builder = builder
776                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
777                            .tool(PlanCreateTool::new(project_path_buf.clone()))
778                            .tool(PlanListTool::new(project_path_buf.clone()));
779                    } else if is_generation {
780                        // Standard mode + generation query: all tools including file writes and plan execution
781                        let (mut write_file_tool, mut write_files_tool) =
782                            if let Some(ref client) = ide_client {
783                                (
784                                    WriteFileTool::new(project_path_buf.clone())
785                                        .with_ide_client(client.clone()),
786                                    WriteFilesTool::new(project_path_buf.clone())
787                                        .with_ide_client(client.clone()),
788                                )
789                            } else {
790                                (
791                                    WriteFileTool::new(project_path_buf.clone()),
792                                    WriteFilesTool::new(project_path_buf.clone()),
793                                )
794                            };
795                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
796                        if auto_accept_writes {
797                            write_file_tool = write_file_tool.without_confirmation();
798                            write_files_tool = write_files_tool.without_confirmation();
799                        }
800                        builder = builder
801                            .tool(write_file_tool)
802                            .tool(write_files_tool)
803                            .tool(ShellTool::new(project_path_buf.clone()))
804                            .tool(PlanListTool::new(project_path_buf.clone()))
805                            .tool(PlanNextTool::new(project_path_buf.clone()))
806                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
807                    }
808
809                    // Enable reasoning for OpenAI reasoning models (GPT-5.x, O1, O3, O4)
810                    let model_lower = session.model.to_lowercase();
811                    let is_reasoning_model = model_lower.starts_with("gpt-5")
812                        || model_lower.starts_with("gpt5")
813                        || model_lower.starts_with("o1")
814                        || model_lower.starts_with("o3")
815                        || model_lower.starts_with("o4");
816
817                    let agent = if is_reasoning_model {
818                        let reasoning_params = serde_json::json!({
819                            "reasoning": {
820                                "effort": "medium",
821                                "summary": "detailed"
822                            }
823                        });
824                        builder.additional_params(reasoning_params).build()
825                    } else {
826                        builder.build()
827                    };
828
829                    // Use multi_turn with Responses API
830                    agent
831                        .prompt(&current_input)
832                        .with_history(&mut raw_chat_history)
833                        .with_hook(hook.clone())
834                        .multi_turn(50)
835                        .await
836                }
837                ProviderType::Anthropic => {
838                    let client = anthropic::Client::from_env();
839
840                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
841                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
842                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
843                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
844                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
845
846                    let mut builder = client
847                        .agent(&session.model)
848                        .preamble(&preamble)
849                        .max_tokens(4096)
850                        .tool(AnalyzeTool::new(project_path_buf.clone()))
851                        .tool(SecurityScanTool::new(project_path_buf.clone()))
852                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
853                        .tool(HadolintTool::new(project_path_buf.clone()))
854                        .tool(DclintTool::new(project_path_buf.clone()))
855                        .tool(KubelintTool::new(project_path_buf.clone()))
856                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
857                        .tool(K8sCostsTool::new(project_path_buf.clone()))
858                        .tool(K8sDriftTool::new(project_path_buf.clone()))
859                        .tool(HelmlintTool::new(project_path_buf.clone()))
860                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
861                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
862                        .tool(TerraformInstallTool::new())
863                        .tool(ReadFileTool::new(project_path_buf.clone()))
864                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
865                        .tool(WebFetchTool::new())
866                        // Prometheus discovery and connection tools for live K8s analysis
867                        .tool(PrometheusDiscoverTool::new())
868                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
869                        // RAG retrieval tools for compressed tool outputs
870                        .tool(RetrieveOutputTool::new())
871                        .tool(ListOutputsTool::new())
872                        // Platform tools for project management
873                        .tool(ListOrganizationsTool::new())
874                        .tool(ListProjectsTool::new())
875                        .tool(SelectProjectTool::new())
876                        .tool(CurrentContextTool::new())
877                        .tool(OpenProviderSettingsTool::new())
878                        .tool(CheckProviderConnectionTool::new())
879                        .tool(ListDeploymentCapabilitiesTool::new())
880                        .tool(ListHetznerAvailabilityTool::new())
881                        // Deployment tools for service management
882                        .tool(CreateDeploymentConfigTool::new())
883                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
884                        .tool(ListDeploymentConfigsTool::new())
885                        .tool(TriggerDeploymentTool::new())
886                        .tool(GetDeploymentStatusTool::new())
887                        .tool(ListDeploymentsTool::new())
888                        .tool(GetServiceLogsTool::new())
889                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
890
891                    // Add tools based on mode
892                    if is_planning {
893                        // Plan mode: read-only shell + plan creation tools
894                        builder = builder
895                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
896                            .tool(PlanCreateTool::new(project_path_buf.clone()))
897                            .tool(PlanListTool::new(project_path_buf.clone()));
898                    } else if is_generation {
899                        // Standard mode + generation query: all tools including file writes and plan execution
900                        let (mut write_file_tool, mut write_files_tool) =
901                            if let Some(ref client) = ide_client {
902                                (
903                                    WriteFileTool::new(project_path_buf.clone())
904                                        .with_ide_client(client.clone()),
905                                    WriteFilesTool::new(project_path_buf.clone())
906                                        .with_ide_client(client.clone()),
907                                )
908                            } else {
909                                (
910                                    WriteFileTool::new(project_path_buf.clone()),
911                                    WriteFilesTool::new(project_path_buf.clone()),
912                                )
913                            };
914                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
915                        if auto_accept_writes {
916                            write_file_tool = write_file_tool.without_confirmation();
917                            write_files_tool = write_files_tool.without_confirmation();
918                        }
919                        builder = builder
920                            .tool(write_file_tool)
921                            .tool(write_files_tool)
922                            .tool(ShellTool::new(project_path_buf.clone()))
923                            .tool(PlanListTool::new(project_path_buf.clone()))
924                            .tool(PlanNextTool::new(project_path_buf.clone()))
925                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
926                    }
927
928                    let agent = builder.build();
929
930                    // Allow up to 50 tool call turns for complex generation tasks
931                    // Use hook to display tool calls as they happen
932                    // Pass conversation history for context continuity
933                    agent
934                        .prompt(&current_input)
935                        .with_history(&mut raw_chat_history)
936                        .with_hook(hook.clone())
937                        .multi_turn(50)
938                        .await
939                }
940                ProviderType::Bedrock => {
941                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
942                    let client = crate::bedrock::client::Client::from_env();
943
944                    // Extended thinking for Claude models via Bedrock
945                    // This enables Claude to show its reasoning process before responding.
946                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
947                    // Extended thinking budget - reduced to help with rate limits
948                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
949                    let thinking_params = serde_json::json!({
950                        "thinking": {
951                            "type": "enabled",
952                            "budget_tokens": 8000
953                        }
954                    });
955
956                    let mut builder = client
957                        .agent(&session.model)
958                        .preamble(&preamble)
959                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
960                        .tool(AnalyzeTool::new(project_path_buf.clone()))
961                        .tool(SecurityScanTool::new(project_path_buf.clone()))
962                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
963                        .tool(HadolintTool::new(project_path_buf.clone()))
964                        .tool(DclintTool::new(project_path_buf.clone()))
965                        .tool(KubelintTool::new(project_path_buf.clone()))
966                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
967                        .tool(K8sCostsTool::new(project_path_buf.clone()))
968                        .tool(K8sDriftTool::new(project_path_buf.clone()))
969                        .tool(HelmlintTool::new(project_path_buf.clone()))
970                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
971                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
972                        .tool(TerraformInstallTool::new())
973                        .tool(ReadFileTool::new(project_path_buf.clone()))
974                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
975                        .tool(WebFetchTool::new())
976                        // Prometheus discovery and connection tools for live K8s analysis
977                        .tool(PrometheusDiscoverTool::new())
978                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
979                        // RAG retrieval tools for compressed tool outputs
980                        .tool(RetrieveOutputTool::new())
981                        .tool(ListOutputsTool::new())
982                        // Platform tools for project management
983                        .tool(ListOrganizationsTool::new())
984                        .tool(ListProjectsTool::new())
985                        .tool(SelectProjectTool::new())
986                        .tool(CurrentContextTool::new())
987                        .tool(OpenProviderSettingsTool::new())
988                        .tool(CheckProviderConnectionTool::new())
989                        .tool(ListDeploymentCapabilitiesTool::new())
990                        .tool(ListHetznerAvailabilityTool::new())
991                        // Deployment tools for service management
992                        .tool(CreateDeploymentConfigTool::new())
993                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
994                        .tool(ListDeploymentConfigsTool::new())
995                        .tool(TriggerDeploymentTool::new())
996                        .tool(GetDeploymentStatusTool::new())
997                        .tool(ListDeploymentsTool::new())
998                        .tool(GetServiceLogsTool::new())
999                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
1000
1001                    // Add tools based on mode
1002                    if is_planning {
1003                        // Plan mode: read-only shell + plan creation tools
1004                        builder = builder
1005                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
1006                            .tool(PlanCreateTool::new(project_path_buf.clone()))
1007                            .tool(PlanListTool::new(project_path_buf.clone()));
1008                    } else if is_generation {
1009                        // Standard mode + generation query: all tools including file writes and plan execution
1010                        let (mut write_file_tool, mut write_files_tool) =
1011                            if let Some(ref client) = ide_client {
1012                                (
1013                                    WriteFileTool::new(project_path_buf.clone())
1014                                        .with_ide_client(client.clone()),
1015                                    WriteFilesTool::new(project_path_buf.clone())
1016                                        .with_ide_client(client.clone()),
1017                                )
1018                            } else {
1019                                (
1020                                    WriteFileTool::new(project_path_buf.clone()),
1021                                    WriteFilesTool::new(project_path_buf.clone()),
1022                                )
1023                            };
1024                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
1025                        if auto_accept_writes {
1026                            write_file_tool = write_file_tool.without_confirmation();
1027                            write_files_tool = write_files_tool.without_confirmation();
1028                        }
1029                        builder = builder
1030                            .tool(write_file_tool)
1031                            .tool(write_files_tool)
1032                            .tool(ShellTool::new(project_path_buf.clone()))
1033                            .tool(PlanListTool::new(project_path_buf.clone()))
1034                            .tool(PlanNextTool::new(project_path_buf.clone()))
1035                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
1036                    }
1037
1038                    // Add thinking params for extended reasoning
1039                    builder = builder.additional_params(thinking_params);
1040
1041                    let agent = builder.build();
1042
1043                    // Use same multi-turn pattern as OpenAI/Anthropic
1044                    agent
1045                        .prompt(&current_input)
1046                        .with_history(&mut raw_chat_history)
1047                        .with_hook(hook.clone())
1048                        .multi_turn(50)
1049                        .await
1050                    }
1051                }.map_err(|e| e.to_string())
1052            } => result
1053            };
1054
1055            // Stop the progress indicator before handling the response
1056            progress.stop().await;
1057
1058            // End AG-UI thinking event
1059            if let Some(ref bridge) = event_bridge {
1060                bridge.end_thinking().await;
1061            }
1062
1063            // Suppress unused variable warnings
1064            let _ = (&progress_state, user_interrupted);
1065
1066            match response {
1067                Ok(text) => {
1068                    // Emit AG-UI text message event (for connected frontends)
1069                    if let Some(ref bridge) = event_bridge {
1070                        bridge.emit_message(&text).await;
1071                    }
1072
1073                    // Show final response
1074                    println!();
1075                    ResponseFormatter::print_response(&text);
1076
1077                    // Track token usage - use actual from hook if available, else estimate
1078                    let hook_usage = hook.get_usage().await;
1079                    if hook_usage.has_data() {
1080                        // Use actual token counts from API response
1081                        session
1082                            .token_usage
1083                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
1084                    } else {
1085                        // Fall back to estimation when API doesn't provide usage
1086                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
1087                        let completion_tokens = TokenUsage::estimate_tokens(&text);
1088                        session
1089                            .token_usage
1090                            .add_estimated(prompt_tokens, completion_tokens);
1091                    }
1092                    // Reset hook usage for next request batch
1093                    hook.reset_usage().await;
1094
1095                    // Show context indicator like Forge: [model/~tokens]
1096                    let model_short = session
1097                        .model
1098                        .split('/')
1099                        .next_back()
1100                        .unwrap_or(&session.model)
1101                        .split(':')
1102                        .next()
1103                        .unwrap_or(&session.model);
1104                    println!();
1105                    println!(
1106                        "  {}[{}/{}]{}",
1107                        ui::colors::ansi::DIM,
1108                        model_short,
1109                        session.token_usage.format_compact(),
1110                        ui::colors::ansi::RESET
1111                    );
1112
1113                    // Emit AG-UI state update with new token counts
1114                    if let Some(ref bridge) = event_bridge {
1115                        let state = build_agent_state(&session, &conversation_history);
1116                        if let Ok(state_json) = serde_json::to_value(&state) {
1117                            bridge.emit_state_snapshot(state_json).await;
1118                        }
1119                    }
1120
1121                    // Extract tool calls from the hook state for history tracking
1122                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
1123                    let batch_tool_count = tool_calls.len();
1124                    total_tool_calls += batch_tool_count;
1125
1126                    // Show tool call summary if significant
1127                    if batch_tool_count > 10 {
1128                        println!(
1129                            "{}",
1130                            format!(
1131                                "  ✓ Completed with {} tool calls ({} total this session)",
1132                                batch_tool_count, total_tool_calls
1133                            )
1134                            .dimmed()
1135                        );
1136                    }
1137
1138                    // Add to conversation history with tool call records
1139                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
1140
1141                    // Check if this heavy turn requires immediate compaction
1142                    // This helps prevent context overflow in subsequent requests
1143                    if conversation_history.needs_compaction() {
1144                        println!("{}", "  📦 Compacting conversation history...".dimmed());
1145                        if let Some(summary) = conversation_history.compact() {
1146                            println!(
1147                                "{}",
1148                                format!("  ✓ Compressed {} turns", summary.matches("Turn").count())
1149                                    .dimmed()
1150                            );
1151                        }
1152                    }
1153
1154                    // Simplify history for OpenAI Responses API reasoning models
1155                    // Keep only User text and Assistant text - strip reasoning, tool calls, tool results
1156                    // This prevents pairing errors like "rs_... without its required following item"
1157                    // and "fc_... without its required reasoning item"
1158                    if session.provider == ProviderType::OpenAI {
1159                        simplify_history_for_openai_reasoning(&mut raw_chat_history);
1160                    }
1161
1162                    // Also update legacy session history for compatibility
1163                    session.history.push(("user".to_string(), input.clone()));
1164                    session
1165                        .history
1166                        .push(("assistant".to_string(), text.clone()));
1167
1168                    // Record to persistent session storage (includes full history snapshot)
1169                    session_recorder.record_user_message(&input);
1170                    session_recorder.record_assistant_message(&text, Some(&tool_calls));
1171                    if let Err(e) = session_recorder.save_with_history(&conversation_history) {
1172                        eprintln!(
1173                            "{}",
1174                            format!("  Warning: Failed to save session: {}", e).dimmed()
1175                        );
1176                    }
1177
1178                    // Check if plan_create was called - show interactive menu
1179                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
1180                        println!(); // Space before menu
1181
1182                        // Show the plan action menu (don't switch modes yet - let user choose)
1183                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
1184                            ui::PlanActionResult::ExecuteAutoAccept => {
1185                                // Now switch to standard mode for execution
1186                                if session.plan_mode.is_planning() {
1187                                    session.plan_mode = session.plan_mode.toggle();
1188                                }
1189                                auto_accept_writes = true;
1190                                pending_input = Some(format!(
1191                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
1192                                    plan_info.0
1193                                ));
1194                                succeeded = true;
1195                            }
1196                            ui::PlanActionResult::ExecuteWithReview => {
1197                                // Now switch to standard mode for execution
1198                                if session.plan_mode.is_planning() {
1199                                    session.plan_mode = session.plan_mode.toggle();
1200                                }
1201                                pending_input = Some(format!(
1202                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
1203                                    plan_info.0
1204                                ));
1205                                succeeded = true;
1206                            }
1207                            ui::PlanActionResult::ChangePlan(feedback) => {
1208                                // Stay in plan mode for modifications
1209                                pending_input = Some(format!(
1210                                    "Please modify the plan at '{}'. User feedback: {}",
1211                                    plan_info.0, feedback
1212                                ));
1213                                succeeded = true;
1214                            }
1215                            ui::PlanActionResult::Cancel => {
1216                                // Just complete normally, don't execute
1217                                succeeded = true;
1218                            }
1219                        }
1220                    } else {
1221                        succeeded = true;
1222                    }
1223                }
1224                Err(e) => {
1225                    let err_str = e.to_string();
1226
1227                    println!();
1228
1229                    // Check if this was a user-initiated cancellation (Ctrl+C)
1230                    if err_str.contains("cancelled") || err_str.contains("Cancelled") {
1231                        // Extract any completed work before cancellation
1232                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1233                        let tool_count = completed_tools.len();
1234
1235                        eprintln!("{}", "⚠ Generation interrupted.".yellow());
1236                        if tool_count > 0 {
1237                            eprintln!(
1238                                "{}",
1239                                format!("  {} tool calls completed before interrupt.", tool_count)
1240                                    .dimmed()
1241                            );
1242                            // Add partial progress to history
1243                            conversation_history.add_turn(
1244                                current_input.clone(),
1245                                format!("[Interrupted after {} tool calls]", tool_count),
1246                                completed_tools,
1247                            );
1248                        }
1249                        eprintln!("{}", "  Type your next message to continue.".dimmed());
1250
1251                        // Don't retry, don't mark as succeeded - just break to return to prompt
1252                        break;
1253                    }
1254
1255                    // Check if this is a max depth error - handle as checkpoint
1256                    if err_str.contains("MaxDepth")
1257                        || err_str.contains("max_depth")
1258                        || err_str.contains("reached limit")
1259                    {
1260                        // Extract what was done before hitting the limit
1261                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1262                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1263                        let batch_tool_count = completed_tools.len();
1264                        total_tool_calls += batch_tool_count;
1265
1266                        eprintln!("{}", format!(
1267                            "⚠ Reached {} tool calls this batch ({} total). Maximum allowed: {}",
1268                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
1269                        ).yellow());
1270
1271                        // Check if we've hit the absolute maximum
1272                        if total_tool_calls >= MAX_TOOL_CALLS {
1273                            eprintln!(
1274                                "{}",
1275                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
1276                                    .red()
1277                            );
1278                            eprintln!(
1279                                "{}",
1280                                "The task is too complex. Try breaking it into smaller parts."
1281                                    .dimmed()
1282                            );
1283                            break;
1284                        }
1285
1286                        // Ask user if they want to continue (unless auto-continue is enabled)
1287                        let should_continue = if auto_continue_tools {
1288                            eprintln!(
1289                                "{}",
1290                                "  Auto-continuing (you selected 'always')...".dimmed()
1291                            );
1292                            true
1293                        } else {
1294                            eprintln!(
1295                                "{}",
1296                                "Excessive tool calls used. Want to continue?".yellow()
1297                            );
1298                            eprintln!(
1299                                "{}",
1300                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
1301                            );
1302                            print!("  > ");
1303                            let _ = std::io::Write::flush(&mut std::io::stdout());
1304
1305                            // Read user input
1306                            let mut response = String::new();
1307                            match std::io::stdin().read_line(&mut response) {
1308                                Ok(_) => {
1309                                    let resp = response.trim().to_lowercase();
1310                                    if resp == "a" || resp == "always" {
1311                                        auto_continue_tools = true;
1312                                        true
1313                                    } else {
1314                                        resp == "y" || resp == "yes" || resp.is_empty()
1315                                    }
1316                                }
1317                                Err(_) => false,
1318                            }
1319                        };
1320
1321                        if !should_continue {
1322                            eprintln!(
1323                                "{}",
1324                                "Stopped by user. Type 'continue' to resume later.".dimmed()
1325                            );
1326                            // Add partial progress to history
1327                            if !completed_tools.is_empty() {
1328                                conversation_history.add_turn(
1329                                    current_input.clone(),
1330                                    format!(
1331                                        "[Stopped at checkpoint - {} tools completed]",
1332                                        batch_tool_count
1333                                    ),
1334                                    vec![],
1335                                );
1336                            }
1337                            break;
1338                        }
1339
1340                        // Continue from checkpoint
1341                        eprintln!(
1342                            "{}",
1343                            format!(
1344                                "  → Continuing... {} remaining tool calls available",
1345                                MAX_TOOL_CALLS - total_tool_calls
1346                            )
1347                            .dimmed()
1348                        );
1349
1350                        // Add partial progress to history (without duplicating tool calls)
1351                        conversation_history.add_turn(
1352                            current_input.clone(),
1353                            format!(
1354                                "[Checkpoint - {} tools completed, continuing...]",
1355                                batch_tool_count
1356                            ),
1357                            vec![],
1358                        );
1359
1360                        // Build continuation prompt
1361                        current_input =
1362                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
1363
1364                        // Brief delay before continuation
1365                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1366                        continue; // Continue the loop without incrementing retry_attempt
1367                    } else if err_str.contains("rate")
1368                        || err_str.contains("Rate")
1369                        || err_str.contains("429")
1370                        || err_str.contains("Too many tokens")
1371                        || err_str.contains("please wait")
1372                        || err_str.contains("throttl")
1373                        || err_str.contains("Throttl")
1374                    {
1375                        eprintln!("{}", "⚠ Rate limited by API provider.".yellow());
1376                        // Wait before retry for rate limits (longer wait for "too many tokens")
1377                        retry_attempt += 1;
1378                        let wait_secs = if err_str.contains("Too many tokens") {
1379                            30
1380                        } else {
1381                            5
1382                        };
1383                        eprintln!(
1384                            "{}",
1385                            format!(
1386                                "  Waiting {} seconds before retry ({}/{})...",
1387                                wait_secs, retry_attempt, MAX_RETRIES
1388                            )
1389                            .dimmed()
1390                        );
1391                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
1392                    } else if is_input_too_long_error(&err_str) {
1393                        // Context too large - truncate raw_chat_history directly
1394                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
1395                        // because conversation_history may be empty/stale during errors
1396                        eprintln!(
1397                            "{}",
1398                            "⚠ Context too large for model. Truncating history...".yellow()
1399                        );
1400
1401                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
1402                        let old_msg_count = raw_chat_history.len();
1403
1404                        // Strategy 1: Keep only the last N messages (user/assistant pairs)
1405                        // More aggressive truncation on each retry: 10 → 6 → 4 messages
1406                        let keep_count = match retry_attempt {
1407                            0 => 10,
1408                            1 => 6,
1409                            _ => 4,
1410                        };
1411
1412                        if raw_chat_history.len() > keep_count {
1413                            // Drain older messages, keep the most recent ones
1414                            let drain_count = raw_chat_history.len() - keep_count;
1415                            raw_chat_history.drain(0..drain_count);
1416                            // Ensure history starts with User message for OpenAI Responses API compatibility
1417                            ensure_history_starts_with_user(&mut raw_chat_history);
1418                        }
1419
1420                        // Strategy 2: Compact large tool outputs to temp files + summaries
1421                        // This preserves data (agent can read file if needed) while reducing context
1422                        let max_output_chars = match retry_attempt {
1423                            0 => 50_000, // 50KB on first try
1424                            1 => 20_000, // 20KB on second
1425                            _ => 5_000,  // 5KB on third (aggressive)
1426                        };
1427                        compact_large_tool_outputs(&mut raw_chat_history, max_output_chars);
1428
1429                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
1430                        eprintln!("{}", format!(
1431                            "  ✓ Truncated: {} messages (~{} tokens) → {} messages (~{} tokens)",
1432                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
1433                        ).green());
1434
1435                        // Preserve compacted summary while clearing turns to stay in sync
1436                        conversation_history.clear_turns_preserve_context();
1437
1438                        // Retry with truncated context
1439                        retry_attempt += 1;
1440                        if retry_attempt < MAX_RETRIES {
1441                            eprintln!(
1442                                "{}",
1443                                format!(
1444                                    "  → Retrying with truncated context ({}/{})...",
1445                                    retry_attempt, MAX_RETRIES
1446                                )
1447                                .dimmed()
1448                            );
1449                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1450                        } else {
1451                            eprintln!(
1452                                "{}",
1453                                "Context still too large after truncation. Try /clear to reset."
1454                                    .red()
1455                            );
1456                            break;
1457                        }
1458                    } else if is_truncation_error(&err_str) {
1459                        // Truncation error - try intelligent continuation
1460                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1461                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1462
1463                        // Count actually completed tools (not in-progress)
1464                        let completed_count = completed_tools
1465                            .iter()
1466                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
1467                            .count();
1468                        let in_progress_count = completed_tools.len() - completed_count;
1469
1470                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
1471                            // We have partial progress - continue from where we left off
1472                            continuation_count += 1;
1473                            let status_msg = if in_progress_count > 0 {
1474                                format!(
1475                                    "⚠ Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
1476                                    completed_count,
1477                                    in_progress_count,
1478                                    continuation_count,
1479                                    MAX_CONTINUATIONS
1480                                )
1481                            } else {
1482                                format!(
1483                                    "⚠ Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
1484                                    completed_count, continuation_count, MAX_CONTINUATIONS
1485                                )
1486                            };
1487                            eprintln!("{}", status_msg.yellow());
1488
1489                            // Add partial progress to conversation history
1490                            // NOTE: We intentionally pass empty tool_calls here because the
1491                            // continuation prompt already contains the detailed file list.
1492                            // Including them in history would duplicate the context and waste tokens.
1493                            conversation_history.add_turn(
1494                                current_input.clone(),
1495                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
1496                                    completed_count, in_progress_count),
1497                                vec![]  // Don't duplicate - continuation prompt has the details
1498                            );
1499
1500                            // Check if we need compaction after adding this heavy turn
1501                            // This is important for long multi-turn sessions with many tool calls
1502                            if conversation_history.needs_compaction() {
1503                                eprintln!(
1504                                    "{}",
1505                                    "  📦 Compacting history before continuation...".dimmed()
1506                                );
1507                                if let Some(summary) = conversation_history.compact() {
1508                                    eprintln!(
1509                                        "{}",
1510                                        format!(
1511                                            "  ✓ Compressed {} turns",
1512                                            summary.matches("Turn").count()
1513                                        )
1514                                        .dimmed()
1515                                    );
1516                                }
1517                            }
1518
1519                            // Build continuation prompt with context
1520                            current_input = build_continuation_prompt(
1521                                &input,
1522                                &completed_tools,
1523                                &agent_thinking,
1524                            );
1525
1526                            // Log continuation details for debugging
1527                            eprintln!("{}", format!(
1528                                "  → Continuing with {} files read, {} written, {} other actions tracked",
1529                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
1530                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
1531                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
1532                            ).dimmed());
1533
1534                            // Brief delay before continuation
1535                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1536                            // Don't increment retry_attempt - this is progress via continuation
1537                        } else if retry_attempt < MAX_RETRIES {
1538                            // No tool calls completed - simple retry
1539                            retry_attempt += 1;
1540                            eprintln!(
1541                                "{}",
1542                                format!(
1543                                    "⚠ Response error (attempt {}/{}). Retrying...",
1544                                    retry_attempt, MAX_RETRIES
1545                                )
1546                                .yellow()
1547                            );
1548                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1549                        } else {
1550                            // Max retries/continuations reached
1551                            eprintln!("{}", format!("Error: {}", e).red());
1552                            if continuation_count >= MAX_CONTINUATIONS {
1553                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
1554                            } else {
1555                                eprintln!(
1556                                    "{}",
1557                                    "Max retries reached. The response may be too complex."
1558                                        .dimmed()
1559                                );
1560                            }
1561                            eprintln!(
1562                                "{}",
1563                                "Try breaking your request into smaller parts.".dimmed()
1564                            );
1565                            exit_error = Some(e.to_string());
1566                            break;
1567                        }
1568                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1569                        // Timeout - simple retry
1570                        retry_attempt += 1;
1571                        if retry_attempt < MAX_RETRIES {
1572                            eprintln!(
1573                                "{}",
1574                                format!(
1575                                    "⚠ Request timed out (attempt {}/{}). Retrying...",
1576                                    retry_attempt, MAX_RETRIES
1577                                )
1578                                .yellow()
1579                            );
1580                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1581                        } else {
1582                            eprintln!("{}", "Request timed out. Please try again.".red());
1583                            exit_error = Some("Request timed out".to_string());
1584                            break;
1585                        }
1586                    } else {
1587                        // Unknown error - show details and break
1588                        eprintln!("{}", format!("Error: {}", e).red());
1589                        if continuation_count > 0 {
1590                            eprintln!(
1591                                "{}",
1592                                format!(
1593                                    "  (occurred during continuation attempt {})",
1594                                    continuation_count
1595                                )
1596                                .dimmed()
1597                            );
1598                        }
1599                        eprintln!("{}", "Error details for debugging:".dimmed());
1600                        eprintln!(
1601                            "{}",
1602                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1603                                .dimmed()
1604                        );
1605                        eprintln!(
1606                            "{}",
1607                            format!(
1608                                "  - continuation_count: {}/{}",
1609                                continuation_count, MAX_CONTINUATIONS
1610                            )
1611                            .dimmed()
1612                        );
1613                        exit_error = Some(e.to_string());
1614                        break;
1615                    }
1616                }
1617            }
1618        }
1619
1620        // End AG-UI step event for this turn
1621        if let Some(ref bridge) = event_bridge {
1622            bridge.end_step().await;
1623        }
1624
1625        println!();
1626    }
1627
1628    // Emit AG-UI run completion event for connected frontends
1629    if let Some(ref bridge) = event_bridge {
1630        if let Some(error_msg) = exit_error {
1631            bridge.finish_run_with_error(&error_msg).await;
1632        } else {
1633            bridge.finish_run().await;
1634        }
1635    }
1636
1637    // Clean up terminal layout before exiting (disabled - layout not initialized)
1638    // if let Err(e) = terminal_layout.cleanup() {
1639    //     eprintln!(
1640    //         "{}",
1641    //         format!("Warning: Terminal cleanup failed: {}", e).dimmed()
1642    //     );
1643    // }
1644
1645    Ok(())
1646}
1647
1648// NOTE: wait_for_interrupt function removed - ESC interrupt feature disabled
1649// due to terminal corruption issues with spawn_blocking raw mode handling.
1650// TODO: Re-implement using tool hook callbacks for cleaner interruption.
1651
1652/// Extract tool call records from the hook state for history tracking
1653async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1654    let state = hook.state();
1655    let guard = state.lock().await;
1656
1657    guard
1658        .tool_calls
1659        .iter()
1660        .enumerate()
1661        .map(|(i, tc)| {
1662            let result = if tc.is_running {
1663                // Tool was in progress when error occurred
1664                "[IN PROGRESS - may need to be re-run]".to_string()
1665            } else if let Some(output) = &tc.output {
1666                truncate_string(output, 200)
1667            } else {
1668                "completed".to_string()
1669            };
1670
1671            ToolCallRecord {
1672                tool_name: tc.name.clone(),
1673                args_summary: truncate_string(&tc.args, 100),
1674                result_summary: result,
1675                // Generate a unique tool ID for proper message pairing
1676                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1677                // Mark read-only tools as droppable (their results can be re-fetched)
1678                droppable: matches!(
1679                    tc.name.as_str(),
1680                    "read_file" | "list_directory" | "analyze_project"
1681                ),
1682            }
1683        })
1684        .collect()
1685}
1686
1687/// Extract any agent thinking/messages from the hook for context
1688async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1689    let state = hook.state();
1690    let guard = state.lock().await;
1691    guard.agent_messages.clone()
1692}
1693
1694/// Helper to truncate strings for summaries
1695fn truncate_string(s: &str, max_len: usize) -> String {
1696    if s.len() <= max_len {
1697        s.to_string()
1698    } else {
1699        format!("{}...", &s[..max_len.saturating_sub(3)])
1700    }
1701}
1702
1703/// Compact large tool outputs by saving them to temp files and replacing with summaries.
1704/// This preserves all data (agent can read the file) while reducing context size.
1705fn compact_large_tool_outputs(messages: &mut [rig::completion::Message], max_chars: usize) {
1706    use rig::completion::message::{Text, ToolResultContent, UserContent};
1707    use std::fs;
1708
1709    // Create temp directory for compacted outputs
1710    let temp_dir = std::env::temp_dir().join("syncable-agent-outputs");
1711    let _ = fs::create_dir_all(&temp_dir);
1712
1713    for msg in messages.iter_mut() {
1714        if let rig::completion::Message::User { content } = msg {
1715            for item in content.iter_mut() {
1716                if let UserContent::ToolResult(tr) = item {
1717                    for trc in tr.content.iter_mut() {
1718                        if let ToolResultContent::Text(text) = trc
1719                            && text.text.len() > max_chars
1720                        {
1721                            // Save full output to temp file
1722                            let file_id = format!(
1723                                "{}_{}.txt",
1724                                tr.id,
1725                                std::time::SystemTime::now()
1726                                    .duration_since(std::time::UNIX_EPOCH)
1727                                    .unwrap()
1728                                    .as_millis()
1729                            );
1730                            let file_path = temp_dir.join(&file_id);
1731
1732                            if let Ok(()) = fs::write(&file_path, &text.text) {
1733                                // Create a smart summary
1734                                let summary = create_output_summary(
1735                                    &text.text,
1736                                    &file_path.display().to_string(),
1737                                    max_chars / 2, // Use half max for summary
1738                                );
1739
1740                                // Replace with summary
1741                                *trc = ToolResultContent::Text(Text { text: summary });
1742                            }
1743                        }
1744                    }
1745                }
1746            }
1747        }
1748    }
1749}
1750
1751/// Create a smart summary of a large output using incremental chunk processing.
1752/// Processes output in logical sections, summarizes each, then combines into actionable summary.
1753fn create_output_summary(full_output: &str, file_path: &str, max_summary_len: usize) -> String {
1754    let total_lines = full_output.lines().count();
1755    let total_chars = full_output.len();
1756
1757    let summary_content =
1758        if full_output.trim_start().starts_with('{') || full_output.trim_start().starts_with('[') {
1759            // JSON output - extract structured summary
1760            summarize_json_incrementally(full_output, max_summary_len)
1761        } else {
1762            // Text output - chunk and summarize
1763            summarize_text_incrementally(full_output, max_summary_len)
1764        };
1765
1766    format!(
1767        "[COMPACTED OUTPUT]\n\
1768        Full data: {}\n\
1769        Size: {} chars, {} lines\n\
1770        \n\
1771        {}\n\
1772        \n\
1773        [Read file with offset/limit for specific sections if needed]",
1774        file_path, total_chars, total_lines, summary_content
1775    )
1776}
1777
1778/// Incrementally summarize JSON output, extracting key fields and prioritizing important items.
1779fn summarize_json_incrementally(json_str: &str, max_len: usize) -> String {
1780    let Ok(json) = serde_json::from_str::<serde_json::Value>(json_str) else {
1781        return "Failed to parse JSON".to_string();
1782    };
1783
1784    let mut parts: Vec<String> = Vec::new();
1785    let mut current_len = 0;
1786
1787    match &json {
1788        serde_json::Value::Object(obj) => {
1789            // Priority 1: Summary/stats fields
1790            for key in ["summary", "stats", "metadata", "status"] {
1791                if let Some(v) = obj.get(key) {
1792                    let s = format!("{}:\n{}", key, indent_json(v, 2, 500));
1793                    if current_len + s.len() < max_len {
1794                        parts.push(s.clone());
1795                        current_len += s.len();
1796                    }
1797                }
1798            }
1799
1800            // Priority 2: Error/critical items (summarize each)
1801            for key in [
1802                "errors",
1803                "critical",
1804                "failures",
1805                "issues",
1806                "findings",
1807                "recommendations",
1808            ] {
1809                if let Some(serde_json::Value::Array(arr)) = obj.get(key) {
1810                    if arr.is_empty() {
1811                        continue;
1812                    }
1813                    parts.push(format!("\n{} ({} items):", key, arr.len()));
1814
1815                    // Group by severity/type if present
1816                    let mut by_severity: std::collections::HashMap<
1817                        String,
1818                        Vec<&serde_json::Value>,
1819                    > = std::collections::HashMap::new();
1820
1821                    for item in arr {
1822                        let severity = item
1823                            .get("severity")
1824                            .or_else(|| item.get("level"))
1825                            .or_else(|| item.get("type"))
1826                            .and_then(|v| v.as_str())
1827                            .unwrap_or("other")
1828                            .to_string();
1829                        by_severity.entry(severity).or_default().push(item);
1830                    }
1831
1832                    // Show critical/high first, summarize others
1833                    for sev in [
1834                        "critical", "high", "error", "warning", "medium", "low", "info", "other",
1835                    ] {
1836                        if let Some(items) = by_severity.get(sev) {
1837                            let show_count = match sev {
1838                                "critical" | "high" | "error" => 5.min(items.len()),
1839                                "warning" | "medium" => 3.min(items.len()),
1840                                _ => 2.min(items.len()),
1841                            };
1842
1843                            if !items.is_empty() {
1844                                let s =
1845                                    format!("  [{}] {} items:", sev.to_uppercase(), items.len());
1846                                if current_len + s.len() < max_len {
1847                                    parts.push(s.clone());
1848                                    current_len += s.len();
1849
1850                                    for item in items.iter().take(show_count) {
1851                                        let item_summary = summarize_single_item(item);
1852                                        if current_len + item_summary.len() < max_len {
1853                                            parts.push(format!("    • {}", item_summary));
1854                                            current_len += item_summary.len();
1855                                        }
1856                                    }
1857
1858                                    if items.len() > show_count {
1859                                        parts.push(format!(
1860                                            "    ... and {} more",
1861                                            items.len() - show_count
1862                                        ));
1863                                    }
1864                                }
1865                            }
1866                        }
1867                    }
1868                }
1869            }
1870
1871            // Priority 3: Show remaining top-level keys
1872            let shown_keys: std::collections::HashSet<&str> = [
1873                "summary",
1874                "stats",
1875                "metadata",
1876                "status",
1877                "errors",
1878                "critical",
1879                "failures",
1880                "issues",
1881                "findings",
1882                "recommendations",
1883            ]
1884            .iter()
1885            .cloned()
1886            .collect();
1887
1888            let other_keys: Vec<_> = obj
1889                .keys()
1890                .filter(|k| !shown_keys.contains(k.as_str()))
1891                .collect();
1892            if !other_keys.is_empty() && current_len < max_len - 200 {
1893                parts.push(format!("\nOther fields: {:?}", other_keys));
1894            }
1895        }
1896        serde_json::Value::Array(arr) => {
1897            parts.push(format!("Array with {} items", arr.len()));
1898
1899            // Try to group by type/severity
1900            for (i, item) in arr.iter().take(10).enumerate() {
1901                let s = format!("[{}] {}", i, summarize_single_item(item));
1902                if current_len + s.len() < max_len {
1903                    parts.push(s.clone());
1904                    current_len += s.len();
1905                }
1906            }
1907            if arr.len() > 10 {
1908                parts.push(format!("... and {} more items", arr.len() - 10));
1909            }
1910        }
1911        _ => {
1912            parts.push(truncate_json_value(&json, max_len));
1913        }
1914    }
1915
1916    parts.join("\n")
1917}
1918
1919/// Summarize a single JSON item (issue, error, etc.) into a one-liner.
1920fn summarize_single_item(item: &serde_json::Value) -> String {
1921    let mut parts: Vec<String> = Vec::new();
1922
1923    // Extract common fields
1924    for key in [
1925        "message",
1926        "description",
1927        "title",
1928        "name",
1929        "file",
1930        "path",
1931        "code",
1932        "rule",
1933    ] {
1934        if let Some(v) = item.get(key)
1935            && let Some(s) = v.as_str()
1936        {
1937            parts.push(truncate_string(s, 80));
1938            break; // Only take first descriptive field
1939        }
1940    }
1941
1942    // Add location if present
1943    if let Some(file) = item
1944        .get("file")
1945        .or_else(|| item.get("path"))
1946        .and_then(|v| v.as_str())
1947    {
1948        if let Some(line) = item.get("line").and_then(|v| v.as_u64()) {
1949            parts.push(format!("at {}:{}", file, line));
1950        } else {
1951            parts.push(format!("in {}", truncate_string(file, 40)));
1952        }
1953    }
1954
1955    if parts.is_empty() {
1956        truncate_json_value(item, 100)
1957    } else {
1958        parts.join(" ")
1959    }
1960}
1961
1962/// Indent JSON for display.
1963fn indent_json(v: &serde_json::Value, indent: usize, max_len: usize) -> String {
1964    let s = serde_json::to_string_pretty(v).unwrap_or_else(|_| v.to_string());
1965    let prefix = " ".repeat(indent);
1966    let indented: String = s
1967        .lines()
1968        .map(|l| format!("{}{}", prefix, l))
1969        .collect::<Vec<_>>()
1970        .join("\n");
1971    if indented.len() > max_len {
1972        format!("{}...", &indented[..max_len.saturating_sub(3)])
1973    } else {
1974        indented
1975    }
1976}
1977
1978/// Incrementally summarize text output by processing in chunks.
1979fn summarize_text_incrementally(text: &str, max_len: usize) -> String {
1980    let lines: Vec<&str> = text.lines().collect();
1981    let mut parts: Vec<String> = Vec::new();
1982    let mut current_len = 0;
1983
1984    // Look for section headers or key patterns
1985    let mut sections: Vec<(usize, &str)> = Vec::new();
1986    for (i, line) in lines.iter().enumerate() {
1987        // Detect headers (lines that look like titles)
1988        if line.starts_with('#')
1989            || line.starts_with("==")
1990            || line.starts_with("--")
1991            || (line.ends_with(':') && line.len() < 50)
1992            || line.chars().all(|c| c.is_uppercase() || c.is_whitespace())
1993        {
1994            sections.push((i, line));
1995        }
1996    }
1997
1998    if !sections.is_empty() {
1999        // Summarize by sections
2000        parts.push(format!("Found {} sections:", sections.len()));
2001        for (i, (line_num, header)) in sections.iter().enumerate() {
2002            let next_section = sections.get(i + 1).map(|(n, _)| *n).unwrap_or(lines.len());
2003            let section_lines = next_section - line_num;
2004
2005            let s = format!(
2006                "  [L{}] {} ({} lines)",
2007                line_num + 1,
2008                header.trim(),
2009                section_lines
2010            );
2011            if current_len + s.len() < max_len / 2 {
2012                parts.push(s.clone());
2013                current_len += s.len();
2014            }
2015        }
2016        parts.push("".to_string());
2017    }
2018
2019    // Show first chunk
2020    let preview_lines = 15.min(lines.len());
2021    parts.push("Content preview:".to_string());
2022    for line in lines.iter().take(preview_lines) {
2023        let s = format!("  {}", truncate_string(line, 120));
2024        if current_len + s.len() < max_len * 3 / 4 {
2025            parts.push(s.clone());
2026            current_len += s.len();
2027        }
2028    }
2029
2030    if lines.len() > preview_lines {
2031        parts.push(format!(
2032            "  ... ({} more lines)",
2033            lines.len() - preview_lines
2034        ));
2035    }
2036
2037    // Show last few lines if space permits
2038    if lines.len() > preview_lines * 2 && current_len < max_len - 500 {
2039        parts.push("\nEnd of output:".to_string());
2040        for line in lines.iter().skip(lines.len() - 5) {
2041            let s = format!("  {}", truncate_string(line, 120));
2042            if current_len + s.len() < max_len {
2043                parts.push(s.clone());
2044                current_len += s.len();
2045            }
2046        }
2047    }
2048
2049    parts.join("\n")
2050}
2051
2052/// Truncate a JSON value for display
2053fn truncate_json_value(v: &serde_json::Value, max_len: usize) -> String {
2054    let s = v.to_string();
2055    if s.len() <= max_len {
2056        s
2057    } else {
2058        format!("{}...", &s[..max_len.saturating_sub(3)])
2059    }
2060}
2061
2062/// Simplify history for OpenAI Responses API compatibility with reasoning models.
2063///
2064/// OpenAI's Responses API has strict pairing requirements:
2065/// - Reasoning items must be followed by their output (text or function_call)
2066/// - Function_call items must be preceded by their reasoning item
2067///
2068/// When passing history across user turns, these pairings get broken, causing errors like:
2069/// - "Item 'rs_...' of type 'reasoning' was provided without its required following item"
2070/// - "Item 'fc_...' of type 'function_call' was provided without its required 'reasoning' item"
2071///
2072/// Solution: Keep only User messages and final Assistant Text responses.
2073/// This preserves conversation context without the complex internal tool/reasoning structure.
2074fn simplify_history_for_openai_reasoning(history: &mut Vec<rig::completion::Message>) {
2075    use rig::completion::message::{AssistantContent, UserContent};
2076    use rig::one_or_many::OneOrMany;
2077
2078    // Filter to keep only User text messages and Assistant text messages
2079    let simplified: Vec<rig::completion::Message> = history
2080        .iter()
2081        .filter_map(|msg| match msg {
2082            // Keep User messages, but only text content (not tool results)
2083            rig::completion::Message::User { content } => {
2084                let text_only: Vec<UserContent> = content
2085                    .iter()
2086                    .filter(|c| matches!(c, UserContent::Text(_)))
2087                    .cloned()
2088                    .collect();
2089                if text_only.is_empty() {
2090                    None
2091                } else {
2092                    let mut iter = text_only.into_iter();
2093                    let first = iter.next().unwrap();
2094                    let rest: Vec<_> = iter.collect();
2095                    let new_content = if rest.is_empty() {
2096                        OneOrMany::one(first)
2097                    } else {
2098                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
2099                    };
2100                    Some(rig::completion::Message::User {
2101                        content: new_content,
2102                    })
2103                }
2104            }
2105            // Keep Assistant messages, but only text content (not reasoning, tool calls)
2106            rig::completion::Message::Assistant { content, id } => {
2107                let text_only: Vec<AssistantContent> = content
2108                    .iter()
2109                    .filter(|c| matches!(c, AssistantContent::Text(_)))
2110                    .cloned()
2111                    .collect();
2112                if text_only.is_empty() {
2113                    None
2114                } else {
2115                    let mut iter = text_only.into_iter();
2116                    let first = iter.next().unwrap();
2117                    let rest: Vec<_> = iter.collect();
2118                    let new_content = if rest.is_empty() {
2119                        OneOrMany::one(first)
2120                    } else {
2121                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
2122                    };
2123                    Some(rig::completion::Message::Assistant {
2124                        content: new_content,
2125                        id: id.clone(),
2126                    })
2127                }
2128            }
2129        })
2130        .collect();
2131
2132    *history = simplified;
2133}
2134
2135/// Ensure history starts with a User message for OpenAI Responses API compatibility.
2136///
2137/// OpenAI's Responses API requires that reasoning items are properly structured within
2138/// a conversation. When history truncation leaves an Assistant message (containing
2139/// Reasoning blocks) at the start, OpenAI rejects it with:
2140/// "Item 'rs_...' of type 'reasoning' was provided without its required following item."
2141///
2142/// This function inserts a synthetic User message at the beginning if history starts
2143/// with an Assistant message, preserving the context while maintaining valid structure.
2144fn ensure_history_starts_with_user(history: &mut Vec<rig::completion::Message>) {
2145    if !history.is_empty()
2146        && matches!(
2147            history.first(),
2148            Some(rig::completion::Message::Assistant { .. })
2149        )
2150    {
2151        // Insert synthetic User message at the beginning to maintain valid conversation structure
2152        history.insert(
2153            0,
2154            rig::completion::Message::User {
2155                content: rig::one_or_many::OneOrMany::one(
2156                    rig::completion::message::UserContent::text("(Conversation continued)"),
2157                ),
2158            },
2159        );
2160    }
2161}
2162
2163/// Estimate token count from raw rig Messages
2164/// This is used for context length management to prevent "input too long" errors.
2165/// Estimates ~4 characters per token.
2166fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
2167    use rig::completion::message::{AssistantContent, ToolResultContent, UserContent};
2168
2169    messages
2170        .iter()
2171        .map(|msg| -> usize {
2172            match msg {
2173                rig::completion::Message::User { content } => {
2174                    content
2175                        .iter()
2176                        .map(|c| -> usize {
2177                            match c {
2178                                UserContent::Text(t) => t.text.len() / 4,
2179                                UserContent::ToolResult(tr) => {
2180                                    // Tool results can be HUGE - properly estimate them
2181                                    tr.content
2182                                        .iter()
2183                                        .map(|trc| match trc {
2184                                            ToolResultContent::Text(t) => t.text.len() / 4,
2185                                            _ => 100,
2186                                        })
2187                                        .sum::<usize>()
2188                                }
2189                                _ => 100, // Estimate for images/documents
2190                            }
2191                        })
2192                        .sum::<usize>()
2193                }
2194                rig::completion::Message::Assistant { content, .. } => {
2195                    content
2196                        .iter()
2197                        .map(|c| -> usize {
2198                            match c {
2199                                AssistantContent::Text(t) => t.text.len() / 4,
2200                                AssistantContent::ToolCall(tc) => {
2201                                    // arguments is serde_json::Value, convert to string for length estimate
2202                                    let args_len = tc.function.arguments.to_string().len();
2203                                    (tc.function.name.len() + args_len) / 4
2204                                }
2205                                _ => 100,
2206                            }
2207                        })
2208                        .sum::<usize>()
2209                }
2210            }
2211        })
2212        .sum()
2213}
2214
2215/// Find a plan_create tool call in the list and extract plan info
2216/// Returns (plan_path, task_count) if found
2217fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
2218    for tc in tool_calls {
2219        if tc.tool_name == "plan_create" {
2220            // Try to parse the result_summary as JSON to extract plan_path
2221            // Note: result_summary may be truncated, so we have multiple fallbacks
2222            let plan_path =
2223                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
2224                    result
2225                        .get("plan_path")
2226                        .and_then(|v| v.as_str())
2227                        .map(|s| s.to_string())
2228                } else {
2229                    None
2230                };
2231
2232            // If JSON parsing failed, find the most recently created plan file
2233            // This is more reliable than trying to reconstruct the path from truncated args
2234            let plan_path = plan_path.unwrap_or_else(|| {
2235                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
2236            });
2237
2238            // Count tasks by reading the plan file directly
2239            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
2240
2241            return Some((plan_path, task_count));
2242        }
2243    }
2244    None
2245}
2246
2247/// Find the most recently created plan file in the plans directory
2248fn find_most_recent_plan_file() -> Option<String> {
2249    let plans_dir = std::env::current_dir().ok()?.join("plans");
2250    if !plans_dir.exists() {
2251        return None;
2252    }
2253
2254    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
2255
2256    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
2257        let path = entry.path();
2258        if path.extension().is_some_and(|e| e == "md")
2259            && let Ok(metadata) = entry.metadata()
2260            && let Ok(modified) = metadata.modified()
2261            && newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true)
2262        {
2263            newest = Some((path, modified));
2264        }
2265    }
2266
2267    newest.map(|(path, _)| {
2268        // Return relative path
2269        path.strip_prefix(std::env::current_dir().unwrap_or_default())
2270            .map(|p| p.display().to_string())
2271            .unwrap_or_else(|_| path.display().to_string())
2272    })
2273}
2274
2275/// Count tasks (checkbox items) in a plan file
2276fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
2277    use regex::Regex;
2278
2279    // Try both relative and absolute paths
2280    let path = std::path::Path::new(plan_path);
2281    let content = if path.exists() {
2282        std::fs::read_to_string(path).ok()?
2283    } else {
2284        // Try with current directory
2285        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
2286    };
2287
2288    // Count task checkboxes: - [ ], - [x], - [~], - [!]
2289    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
2290    let count = content
2291        .lines()
2292        .filter(|line| task_regex.is_match(line))
2293        .count();
2294
2295    Some(count)
2296}
2297
2298/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
2299fn is_truncation_error(err_str: &str) -> bool {
2300    err_str.contains("JsonError")
2301        || err_str.contains("EOF while parsing")
2302        || err_str.contains("JSON")
2303        || err_str.contains("unexpected end")
2304}
2305
2306/// Check if error is "input too long" - context exceeds model limit
2307/// This happens when conversation history grows beyond what the model can handle.
2308/// Recovery: compact history and retry with reduced context.
2309fn is_input_too_long_error(err_str: &str) -> bool {
2310    err_str.contains("too long")
2311        || err_str.contains("Too long")
2312        || err_str.contains("context length")
2313        || err_str.contains("maximum context")
2314        || err_str.contains("exceeds the model")
2315        || err_str.contains("Input is too long")
2316}
2317
2318/// Build a continuation prompt that tells the AI what work was completed
2319/// and asks it to continue from where it left off
2320fn build_continuation_prompt(
2321    original_task: &str,
2322    completed_tools: &[ToolCallRecord],
2323    agent_thinking: &[String],
2324) -> String {
2325    use std::collections::HashSet;
2326
2327    // Group tools by type and extract unique files read
2328    let mut files_read: HashSet<String> = HashSet::new();
2329    let mut files_written: HashSet<String> = HashSet::new();
2330    let mut dirs_listed: HashSet<String> = HashSet::new();
2331    let mut other_tools: Vec<String> = Vec::new();
2332    let mut in_progress: Vec<String> = Vec::new();
2333
2334    for tool in completed_tools {
2335        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
2336
2337        if is_in_progress {
2338            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
2339            continue;
2340        }
2341
2342        match tool.tool_name.as_str() {
2343            "read_file" => {
2344                // Extract path from args
2345                files_read.insert(tool.args_summary.clone());
2346            }
2347            "write_file" | "write_files" => {
2348                files_written.insert(tool.args_summary.clone());
2349            }
2350            "list_directory" => {
2351                dirs_listed.insert(tool.args_summary.clone());
2352            }
2353            _ => {
2354                other_tools.push(format!(
2355                    "{}({})",
2356                    tool.tool_name,
2357                    truncate_string(&tool.args_summary, 40)
2358                ));
2359            }
2360        }
2361    }
2362
2363    let mut prompt = format!(
2364        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
2365        Original task: {}\n",
2366        truncate_string(original_task, 500)
2367    );
2368
2369    // Show files already read - CRITICAL for preventing re-reads
2370    if !files_read.is_empty() {
2371        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
2372        for file in &files_read {
2373            prompt.push_str(&format!("  - {}\n", file));
2374        }
2375    }
2376
2377    if !dirs_listed.is_empty() {
2378        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
2379        for dir in &dirs_listed {
2380            prompt.push_str(&format!("  - {}\n", dir));
2381        }
2382    }
2383
2384    if !files_written.is_empty() {
2385        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
2386        for file in &files_written {
2387            prompt.push_str(&format!("  - {}\n", file));
2388        }
2389    }
2390
2391    if !other_tools.is_empty() {
2392        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
2393        for tool in other_tools.iter().take(20) {
2394            prompt.push_str(&format!("  - {}\n", tool));
2395        }
2396        if other_tools.len() > 20 {
2397            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
2398        }
2399    }
2400
2401    if !in_progress.is_empty() {
2402        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
2403        for tool in &in_progress {
2404            prompt.push_str(&format!("  ⚠ {}\n", tool));
2405        }
2406    }
2407
2408    // Include last thinking context if available
2409    if let Some(last_thought) = agent_thinking.last() {
2410        prompt.push_str(&format!(
2411            "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
2412            truncate_string(last_thought, 300)
2413        ));
2414    }
2415
2416    prompt.push_str("\n== INSTRUCTIONS ==\n");
2417    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
2418    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
2419    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
2420    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
2421    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
2422
2423    prompt
2424}
2425
2426/// Run a single query and return the response
2427/// Note: event_bridge is accepted for API consistency but not used in single-query mode
2428pub async fn run_query(
2429    project_path: &Path,
2430    query: &str,
2431    provider: ProviderType,
2432    model: Option<String>,
2433    _event_bridge: Option<crate::server::EventBridge>,
2434) -> AgentResult<String> {
2435    use tools::*;
2436
2437    let project_path_buf = project_path.to_path_buf();
2438
2439    // Background process manager for Prometheus port-forwards (single query context)
2440    let bg_manager = Arc::new(BackgroundProcessManager::new());
2441    // Select prompt based on query type (analysis vs generation)
2442    // For single queries (non-interactive), always use standard mode
2443    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
2444    let is_generation = prompts::is_generation_query(query);
2445
2446    match provider {
2447        ProviderType::OpenAI => {
2448            // Use Responses API (default) for reasoning model support
2449            let client = openai::Client::from_env();
2450            let model_name = model.as_deref().unwrap_or("gpt-5.2");
2451
2452            let mut builder = client
2453                .agent(model_name)
2454                .preamble(&preamble)
2455                .max_tokens(4096)
2456                .tool(AnalyzeTool::new(project_path_buf.clone()))
2457                .tool(SecurityScanTool::new(project_path_buf.clone()))
2458                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2459                .tool(HadolintTool::new(project_path_buf.clone()))
2460                .tool(DclintTool::new(project_path_buf.clone()))
2461                .tool(KubelintTool::new(project_path_buf.clone()))
2462                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2463                .tool(K8sCostsTool::new(project_path_buf.clone()))
2464                .tool(K8sDriftTool::new(project_path_buf.clone()))
2465                .tool(HelmlintTool::new(project_path_buf.clone()))
2466                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2467                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2468                .tool(TerraformInstallTool::new())
2469                .tool(ReadFileTool::new(project_path_buf.clone()))
2470                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2471                .tool(WebFetchTool::new())
2472                // Prometheus discovery and connection tools for live K8s analysis
2473                .tool(PrometheusDiscoverTool::new())
2474                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2475                // RAG retrieval tools for compressed tool outputs
2476                .tool(RetrieveOutputTool::new())
2477                .tool(ListOutputsTool::new())
2478                        // Platform tools for project management
2479                        .tool(ListOrganizationsTool::new())
2480                        .tool(ListProjectsTool::new())
2481                        .tool(SelectProjectTool::new())
2482                        .tool(CurrentContextTool::new())
2483                        .tool(OpenProviderSettingsTool::new())
2484                        .tool(CheckProviderConnectionTool::new())
2485                        .tool(ListDeploymentCapabilitiesTool::new())
2486                        .tool(ListHetznerAvailabilityTool::new())
2487                        // Deployment tools for service management
2488                        .tool(CreateDeploymentConfigTool::new())
2489                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
2490                        .tool(ListDeploymentConfigsTool::new())
2491                        .tool(TriggerDeploymentTool::new())
2492                        .tool(GetDeploymentStatusTool::new())
2493                        .tool(ListDeploymentsTool::new())
2494                        .tool(GetServiceLogsTool::new())
2495                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
2496
2497            // Add generation tools if this is a generation query
2498            if is_generation {
2499                builder = builder
2500                    .tool(WriteFileTool::new(project_path_buf.clone()))
2501                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2502                    .tool(ShellTool::new(project_path_buf.clone()));
2503            }
2504
2505            // Enable reasoning for OpenAI reasoning models
2506            let model_lower = model_name.to_lowercase();
2507            let is_reasoning_model = model_lower.starts_with("gpt-5")
2508                || model_lower.starts_with("gpt5")
2509                || model_lower.starts_with("o1")
2510                || model_lower.starts_with("o3")
2511                || model_lower.starts_with("o4");
2512
2513            let agent = if is_reasoning_model {
2514                let reasoning_params = serde_json::json!({
2515                    "reasoning": {
2516                        "effort": "medium",
2517                        "summary": "detailed"
2518                    }
2519                });
2520                builder.additional_params(reasoning_params).build()
2521            } else {
2522                builder.build()
2523            };
2524
2525            agent
2526                .prompt(query)
2527                .multi_turn(50)
2528                .await
2529                .map_err(|e| AgentError::ProviderError(e.to_string()))
2530        }
2531        ProviderType::Anthropic => {
2532            let client = anthropic::Client::from_env();
2533            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
2534
2535            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
2536            // handle thinking blocks in multi-turn conversations with tool use.
2537            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
2538
2539            let mut builder = client
2540                .agent(model_name)
2541                .preamble(&preamble)
2542                .max_tokens(4096)
2543                .tool(AnalyzeTool::new(project_path_buf.clone()))
2544                .tool(SecurityScanTool::new(project_path_buf.clone()))
2545                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2546                .tool(HadolintTool::new(project_path_buf.clone()))
2547                .tool(DclintTool::new(project_path_buf.clone()))
2548                .tool(KubelintTool::new(project_path_buf.clone()))
2549                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2550                .tool(K8sCostsTool::new(project_path_buf.clone()))
2551                .tool(K8sDriftTool::new(project_path_buf.clone()))
2552                .tool(HelmlintTool::new(project_path_buf.clone()))
2553                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2554                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2555                .tool(TerraformInstallTool::new())
2556                .tool(ReadFileTool::new(project_path_buf.clone()))
2557                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2558                .tool(WebFetchTool::new())
2559                // Prometheus discovery and connection tools for live K8s analysis
2560                .tool(PrometheusDiscoverTool::new())
2561                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2562                // RAG retrieval tools for compressed tool outputs
2563                .tool(RetrieveOutputTool::new())
2564                .tool(ListOutputsTool::new())
2565                        // Platform tools for project management
2566                        .tool(ListOrganizationsTool::new())
2567                        .tool(ListProjectsTool::new())
2568                        .tool(SelectProjectTool::new())
2569                        .tool(CurrentContextTool::new())
2570                        .tool(OpenProviderSettingsTool::new())
2571                        .tool(CheckProviderConnectionTool::new())
2572                        .tool(ListDeploymentCapabilitiesTool::new())
2573                        .tool(ListHetznerAvailabilityTool::new())
2574                        // Deployment tools for service management
2575                        .tool(CreateDeploymentConfigTool::new())
2576                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
2577                        .tool(ListDeploymentConfigsTool::new())
2578                        .tool(TriggerDeploymentTool::new())
2579                        .tool(GetDeploymentStatusTool::new())
2580                        .tool(ListDeploymentsTool::new())
2581                        .tool(GetServiceLogsTool::new())
2582                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
2583
2584            // Add generation tools if this is a generation query
2585            if is_generation {
2586                builder = builder
2587                    .tool(WriteFileTool::new(project_path_buf.clone()))
2588                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2589                    .tool(ShellTool::new(project_path_buf.clone()));
2590            }
2591
2592            let agent = builder.build();
2593
2594            agent
2595                .prompt(query)
2596                .multi_turn(50)
2597                .await
2598                .map_err(|e| AgentError::ProviderError(e.to_string()))
2599        }
2600        ProviderType::Bedrock => {
2601            // Bedrock provider via rig-bedrock - same pattern as Anthropic
2602            let client = crate::bedrock::client::Client::from_env();
2603            let model_name = model
2604                .as_deref()
2605                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
2606
2607            // Extended thinking for Claude via Bedrock
2608            let thinking_params = serde_json::json!({
2609                "thinking": {
2610                    "type": "enabled",
2611                    "budget_tokens": 16000
2612                }
2613            });
2614
2615            let mut builder = client
2616                .agent(model_name)
2617                .preamble(&preamble)
2618                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
2619                .tool(AnalyzeTool::new(project_path_buf.clone()))
2620                .tool(SecurityScanTool::new(project_path_buf.clone()))
2621                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2622                .tool(HadolintTool::new(project_path_buf.clone()))
2623                .tool(DclintTool::new(project_path_buf.clone()))
2624                .tool(KubelintTool::new(project_path_buf.clone()))
2625                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2626                .tool(K8sCostsTool::new(project_path_buf.clone()))
2627                .tool(K8sDriftTool::new(project_path_buf.clone()))
2628                .tool(HelmlintTool::new(project_path_buf.clone()))
2629                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2630                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2631                .tool(TerraformInstallTool::new())
2632                .tool(ReadFileTool::new(project_path_buf.clone()))
2633                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2634                .tool(WebFetchTool::new())
2635                // Prometheus discovery and connection tools for live K8s analysis
2636                .tool(PrometheusDiscoverTool::new())
2637                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2638                // RAG retrieval tools for compressed tool outputs
2639                .tool(RetrieveOutputTool::new())
2640                .tool(ListOutputsTool::new())
2641                        // Platform tools for project management
2642                        .tool(ListOrganizationsTool::new())
2643                        .tool(ListProjectsTool::new())
2644                        .tool(SelectProjectTool::new())
2645                        .tool(CurrentContextTool::new())
2646                        .tool(OpenProviderSettingsTool::new())
2647                        .tool(CheckProviderConnectionTool::new())
2648                        .tool(ListDeploymentCapabilitiesTool::new())
2649                        .tool(ListHetznerAvailabilityTool::new())
2650                        // Deployment tools for service management
2651                        .tool(CreateDeploymentConfigTool::new())
2652                        .tool(DeployServiceTool::with_context(project_path_buf.clone(), ExecutionContext::InteractiveCli))
2653                        .tool(ListDeploymentConfigsTool::new())
2654                        .tool(TriggerDeploymentTool::new())
2655                        .tool(GetDeploymentStatusTool::new())
2656                        .tool(ListDeploymentsTool::new())
2657                        .tool(GetServiceLogsTool::new())
2658                        .tool(SetDeploymentSecretsTool::with_context(ExecutionContext::InteractiveCli));
2659
2660            // Add generation tools if this is a generation query
2661            if is_generation {
2662                builder = builder
2663                    .tool(WriteFileTool::new(project_path_buf.clone()))
2664                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2665                    .tool(ShellTool::new(project_path_buf.clone()));
2666            }
2667
2668            let agent = builder.additional_params(thinking_params).build();
2669
2670            agent
2671                .prompt(query)
2672                .multi_turn(50)
2673                .await
2674                .map_err(|e| AgentError::ProviderError(e.to_string()))
2675        }
2676    }
2677}