syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod persistence;
38pub mod prompts;
39pub mod session;
40pub mod tools;
41pub mod ui;
42use colored::Colorize;
43use commands::TokenUsage;
44use history::{ConversationHistory, ToolCallRecord};
45use ide::IdeClient;
46use rig::{
47    client::{CompletionClient, ProviderClient},
48    completion::Prompt,
49    providers::{anthropic, openai},
50};
51use session::{ChatSession, PlanMode};
52use std::path::Path;
53use std::sync::Arc;
54use tokio::sync::Mutex as TokioMutex;
55use ui::{ResponseFormatter, ToolDisplayHook};
56
57/// Provider type for the agent
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ProviderType {
60    #[default]
61    OpenAI,
62    Anthropic,
63    Bedrock,
64}
65
66impl std::fmt::Display for ProviderType {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            ProviderType::OpenAI => write!(f, "openai"),
70            ProviderType::Anthropic => write!(f, "anthropic"),
71            ProviderType::Bedrock => write!(f, "bedrock"),
72        }
73    }
74}
75
76impl std::str::FromStr for ProviderType {
77    type Err = String;
78
79    fn from_str(s: &str) -> Result<Self, Self::Err> {
80        match s.to_lowercase().as_str() {
81            "openai" => Ok(ProviderType::OpenAI),
82            "anthropic" => Ok(ProviderType::Anthropic),
83            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
84            _ => Err(format!(
85                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
86                s
87            )),
88        }
89    }
90}
91
92/// Error types for the agent
93#[derive(Debug, thiserror::Error)]
94pub enum AgentError {
95    #[error("Missing API key. Set {0} environment variable.")]
96    MissingApiKey(String),
97
98    #[error("Provider error: {0}")]
99    ProviderError(String),
100
101    #[error("Tool error: {0}")]
102    ToolError(String),
103}
104
105pub type AgentResult<T> = Result<T, AgentError>;
106
107/// Get the system prompt for the agent based on query type and plan mode
108fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
109    // In planning mode, use the read-only exploration prompt
110    if plan_mode.is_planning() {
111        return prompts::get_planning_prompt(project_path);
112    }
113
114    if let Some(q) = query {
115        // First check if it's a code development task (highest priority)
116        if prompts::is_code_development_query(q) {
117            return prompts::get_code_development_prompt(project_path);
118        }
119        // Then check if it's DevOps generation (Docker, Terraform, Helm)
120        if prompts::is_generation_query(q) {
121            return prompts::get_devops_prompt(project_path, Some(q));
122        }
123    }
124    // Default to analysis prompt
125    prompts::get_analysis_prompt(project_path)
126}
127
128/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
129pub async fn run_interactive(
130    project_path: &Path,
131    provider: ProviderType,
132    model: Option<String>,
133) -> AgentResult<()> {
134    use tools::*;
135
136    let mut session = ChatSession::new(project_path, provider, model);
137
138    // Shared background process manager for Prometheus port-forwards
139    let bg_manager = Arc::new(BackgroundProcessManager::new());
140
141    // Terminal layout for split screen is disabled for now - see notes below
142    // let terminal_layout = ui::TerminalLayout::new();
143    // let layout_state = terminal_layout.state();
144
145    // Initialize conversation history with compaction support
146    let mut conversation_history = ConversationHistory::new();
147
148    // Initialize IDE client for native diff viewing
149    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
150        let mut client = IdeClient::new().await;
151        if client.is_ide_available() {
152            match client.connect().await {
153                Ok(()) => {
154                    println!(
155                        "{} Connected to {} IDE companion",
156                        "โœ“".green(),
157                        client.ide_name().unwrap_or("VS Code")
158                    );
159                    Some(Arc::new(TokioMutex::new(client)))
160                }
161                Err(e) => {
162                    // IDE detected but companion not running or connection failed
163                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
164                    None
165                }
166            }
167        } else {
168            println!(
169                "{} No IDE detected (TERM_PROGRAM={})",
170                "ยท".dimmed(),
171                std::env::var("TERM_PROGRAM").unwrap_or_default()
172            );
173            None
174        }
175    };
176
177    // Load API key from config file to env if not already set
178    ChatSession::load_api_key_to_env(session.provider);
179
180    // Check if API key is configured, prompt if not
181    if !ChatSession::has_api_key(session.provider) {
182        ChatSession::prompt_api_key(session.provider)?;
183    }
184
185    session.print_banner();
186
187    // NOTE: Terminal layout with ANSI scroll regions is disabled for now.
188    // The scroll region approach conflicts with the existing input/output flow.
189    // TODO: Implement proper scroll region support that integrates with the input handler.
190    // For now, we rely on the pause/resume mechanism in progress indicator.
191    //
192    // if let Err(e) = terminal_layout.init() {
193    //     eprintln!(
194    //         "{}",
195    //         format!("Note: Terminal layout initialization failed: {}. Using fallback mode.", e)
196    //             .dimmed()
197    //     );
198    // }
199
200    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
201    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
202    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
203
204    // Pending input for auto-continue after plan creation
205    let mut pending_input: Option<String> = None;
206    // Auto-accept mode for plan execution (skips write confirmations)
207    let mut auto_accept_writes = false;
208
209    // Initialize session recorder for conversation persistence
210    let mut session_recorder = persistence::SessionRecorder::new(project_path);
211
212    loop {
213        // Show conversation status if we have history
214        if !conversation_history.is_empty() {
215            println!(
216                "{}",
217                format!("  ๐Ÿ’ฌ Context: {}", conversation_history.status()).dimmed()
218            );
219        }
220
221        // Check for pending input (from plan menu selection)
222        let input = if let Some(pending) = pending_input.take() {
223            // Show what we're executing
224            println!("{} {}", "โ†’".cyan(), pending.dimmed());
225            pending
226        } else {
227            // New user turn - reset auto-accept mode from previous plan execution
228            auto_accept_writes = false;
229
230            // Read user input (returns InputResult)
231            let input_result = match session.read_input() {
232                Ok(result) => result,
233                Err(_) => break,
234            };
235
236            // Handle the input result
237            match input_result {
238                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
239                ui::InputResult::Cancel | ui::InputResult::Exit => break,
240                ui::InputResult::TogglePlanMode => {
241                    // Toggle planning mode - minimal feedback, no extra newlines
242                    let new_mode = session.toggle_plan_mode();
243                    if new_mode.is_planning() {
244                        println!("{}", "โ˜… plan mode".yellow());
245                    } else {
246                        println!("{}", "โ–ถ standard mode".green());
247                    }
248                    continue;
249                }
250            }
251        };
252
253        if input.is_empty() {
254            continue;
255        }
256
257        // Check for commands
258        if ChatSession::is_command(&input) {
259            // Special handling for /clear to also clear conversation history
260            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
261                conversation_history.clear();
262                raw_chat_history.clear();
263            }
264            match session.process_command(&input) {
265                Ok(true) => {
266                    // Check if /resume loaded a session
267                    if let Some(record) = session.pending_resume.take() {
268                        // Display previous messages
269                        println!();
270                        println!("{}", "โ”€โ”€โ”€ Previous Conversation โ”€โ”€โ”€".dimmed());
271                        for msg in &record.messages {
272                            match msg.role {
273                                persistence::MessageRole::User => {
274                                    println!();
275                                    println!(
276                                        "{} {}",
277                                        "You:".cyan().bold(),
278                                        truncate_string(&msg.content, 500)
279                                    );
280                                }
281                                persistence::MessageRole::Assistant => {
282                                    println!();
283                                    // Show tool calls if any (same format as live display)
284                                    if let Some(ref tools) = msg.tool_calls {
285                                        for tc in tools {
286                                            // Match live tool display: green dot for completed, cyan bold name
287                                            if tc.args_summary.is_empty() {
288                                                println!(
289                                                    "{} {}",
290                                                    "โ—".green(),
291                                                    tc.name.cyan().bold()
292                                                );
293                                            } else {
294                                                println!(
295                                                    "{} {}({})",
296                                                    "โ—".green(),
297                                                    tc.name.cyan().bold(),
298                                                    truncate_string(&tc.args_summary, 50).dimmed()
299                                                );
300                                            }
301                                        }
302                                    }
303                                    // Show response (same ResponseFormatter as live)
304                                    if !msg.content.is_empty() {
305                                        ResponseFormatter::print_response(&truncate_string(
306                                            &msg.content,
307                                            1000,
308                                        ));
309                                    }
310                                }
311                                persistence::MessageRole::System => {
312                                    // Skip system messages in display
313                                }
314                            }
315                        }
316                        println!("{}", "โ”€โ”€โ”€ End of History โ”€โ”€โ”€".dimmed());
317                        println!();
318
319                        // Load messages into raw_chat_history for AI context
320                        for msg in &record.messages {
321                            match msg.role {
322                                persistence::MessageRole::User => {
323                                    raw_chat_history.push(rig::completion::Message::User {
324                                        content: rig::one_or_many::OneOrMany::one(
325                                            rig::completion::message::UserContent::text(
326                                                &msg.content,
327                                            ),
328                                        ),
329                                    });
330                                }
331                                persistence::MessageRole::Assistant => {
332                                    raw_chat_history.push(rig::completion::Message::Assistant {
333                                        id: Some(msg.id.clone()),
334                                        content: rig::one_or_many::OneOrMany::one(
335                                            rig::completion::message::AssistantContent::text(
336                                                &msg.content,
337                                            ),
338                                        ),
339                                    });
340                                }
341                                persistence::MessageRole::System => {}
342                            }
343                        }
344
345                        // Load into conversation_history for context tracking
346                        for msg in &record.messages {
347                            if msg.role == persistence::MessageRole::User {
348                                // Find the next assistant message
349                                let response = record
350                                    .messages
351                                    .iter()
352                                    .skip_while(|m| m.id != msg.id)
353                                    .skip(1)
354                                    .find(|m| m.role == persistence::MessageRole::Assistant)
355                                    .map(|m| m.content.clone())
356                                    .unwrap_or_default();
357
358                                conversation_history.add_turn(
359                                    msg.content.clone(),
360                                    response,
361                                    vec![], // Tool calls not loaded for simplicity
362                                );
363                            }
364                        }
365
366                        println!(
367                            "{}",
368                            format!(
369                                "  โœ“ Loaded {} messages. You can now continue the conversation.",
370                                record.messages.len()
371                            )
372                            .green()
373                        );
374                        println!();
375                    }
376                    continue;
377                }
378                Ok(false) => break, // /exit
379                Err(e) => {
380                    eprintln!("{}", format!("Error: {}", e).red());
381                    continue;
382                }
383            }
384        }
385
386        // Check API key before making request (in case provider changed)
387        if !ChatSession::has_api_key(session.provider) {
388            eprintln!(
389                "{}",
390                "No API key configured. Use /provider to set one.".yellow()
391            );
392            continue;
393        }
394
395        // Check if compaction is needed before making the request
396        if conversation_history.needs_compaction() {
397            println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
398            if let Some(summary) = conversation_history.compact() {
399                println!(
400                    "{}",
401                    format!("  โœ“ Compressed {} turns", summary.matches("Turn").count()).dimmed()
402                );
403            }
404        }
405
406        // Pre-request check: estimate if we're approaching context limit
407        // Check raw_chat_history (actual messages) not conversation_history
408        // because conversation_history may be out of sync
409        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
410            + input.len() / 4  // New input
411            + 5000; // System prompt overhead estimate
412
413        if estimated_input_tokens > 150_000 {
414            println!(
415                "{}",
416                "  โš  Large context detected. Pre-truncating...".yellow()
417            );
418
419            let old_count = raw_chat_history.len();
420            // Keep last 20 messages when approaching limit
421            if raw_chat_history.len() > 20 {
422                let drain_count = raw_chat_history.len() - 20;
423                raw_chat_history.drain(0..drain_count);
424                // Ensure history starts with User message for OpenAI Responses API compatibility
425                ensure_history_starts_with_user(&mut raw_chat_history);
426                conversation_history.clear(); // Stay in sync
427                println!(
428                    "{}",
429                    format!(
430                        "  โœ“ Truncated {} โ†’ {} messages",
431                        old_count,
432                        raw_chat_history.len()
433                    )
434                    .dimmed()
435                );
436            }
437        }
438
439        // Retry loop for automatic error recovery
440        // MAX_RETRIES is for failures without progress
441        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
442        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
443        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
444        const MAX_RETRIES: u32 = 3;
445        const MAX_CONTINUATIONS: u32 = 10;
446        const _TOOL_CALL_CHECKPOINT: usize = 50;
447        const MAX_TOOL_CALLS: usize = 300;
448        let mut retry_attempt = 0;
449        let mut continuation_count = 0;
450        let mut total_tool_calls: usize = 0;
451        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
452        let mut current_input = input.clone();
453        let mut succeeded = false;
454
455        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
456            // Log if this is a continuation attempt
457            if continuation_count > 0 {
458                eprintln!("{}", "  ๐Ÿ“ก Sending continuation request...".dimmed());
459            }
460
461            // Create hook for Claude Code style tool display
462            let hook = ToolDisplayHook::new();
463
464            // Create progress indicator for visual feedback during generation
465            let progress = ui::GenerationIndicator::new();
466            // Layout connection disabled - using inline progress mode
467            // progress.state().set_layout(layout_state.clone());
468            hook.set_progress_state(progress.state()).await;
469
470            let project_path_buf = session.project_path.clone();
471            // Select prompt based on query type (analysis vs generation) and plan mode
472            let preamble = get_system_prompt(
473                &session.project_path,
474                Some(&current_input),
475                session.plan_mode,
476            );
477            let is_generation = prompts::is_generation_query(&current_input);
478            let is_planning = session.plan_mode.is_planning();
479
480            // Note: using raw_chat_history directly which preserves Reasoning blocks
481            // This is needed for extended thinking to work with multi-turn conversations
482
483            // Get progress state for interrupt detection
484            let progress_state = progress.state();
485
486            // Use tokio::select! to race the API call against Ctrl+C
487            // This allows immediate cancellation, not just between tool calls
488            let mut user_interrupted = false;
489
490            // API call with Ctrl+C interrupt support
491            let response = tokio::select! {
492                biased; // Check ctrl_c first for faster response
493
494                _ = tokio::signal::ctrl_c() => {
495                    user_interrupted = true;
496                    Err::<String, String>("User cancelled".to_string())
497                }
498
499                result = async {
500                    match session.provider {
501                ProviderType::OpenAI => {
502                    // Use Responses API (default) for reasoning model support.
503                    // rig-core 0.28+ handles Reasoning items properly in multi-turn.
504                    let client = openai::Client::from_env();
505
506                    let mut builder = client
507                        .agent(&session.model)
508                        .preamble(&preamble)
509                        .max_tokens(4096)
510                        .tool(AnalyzeTool::new(project_path_buf.clone()))
511                        .tool(SecurityScanTool::new(project_path_buf.clone()))
512                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
513                        .tool(HadolintTool::new(project_path_buf.clone()))
514                        .tool(DclintTool::new(project_path_buf.clone()))
515                        .tool(KubelintTool::new(project_path_buf.clone()))
516                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
517                        .tool(K8sCostsTool::new(project_path_buf.clone()))
518                        .tool(K8sDriftTool::new(project_path_buf.clone()))
519                        .tool(HelmlintTool::new(project_path_buf.clone()))
520                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
521                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
522                        .tool(TerraformInstallTool::new())
523                        .tool(ReadFileTool::new(project_path_buf.clone()))
524                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
525                        .tool(WebFetchTool::new())
526                        // Prometheus discovery and connection tools for live K8s analysis
527                        .tool(PrometheusDiscoverTool::new())
528                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
529                        // RAG retrieval tools for compressed tool outputs
530                        .tool(RetrieveOutputTool::new())
531                        .tool(ListOutputsTool::new());
532
533                    // Add tools based on mode
534                    if is_planning {
535                        // Plan mode: read-only shell + plan creation tools
536                        builder = builder
537                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
538                            .tool(PlanCreateTool::new(project_path_buf.clone()))
539                            .tool(PlanListTool::new(project_path_buf.clone()));
540                    } else if is_generation {
541                        // Standard mode + generation query: all tools including file writes and plan execution
542                        let (mut write_file_tool, mut write_files_tool) =
543                            if let Some(ref client) = ide_client {
544                                (
545                                    WriteFileTool::new(project_path_buf.clone())
546                                        .with_ide_client(client.clone()),
547                                    WriteFilesTool::new(project_path_buf.clone())
548                                        .with_ide_client(client.clone()),
549                                )
550                            } else {
551                                (
552                                    WriteFileTool::new(project_path_buf.clone()),
553                                    WriteFilesTool::new(project_path_buf.clone()),
554                                )
555                            };
556                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
557                        if auto_accept_writes {
558                            write_file_tool = write_file_tool.without_confirmation();
559                            write_files_tool = write_files_tool.without_confirmation();
560                        }
561                        builder = builder
562                            .tool(write_file_tool)
563                            .tool(write_files_tool)
564                            .tool(ShellTool::new(project_path_buf.clone()))
565                            .tool(PlanListTool::new(project_path_buf.clone()))
566                            .tool(PlanNextTool::new(project_path_buf.clone()))
567                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
568                    }
569
570                    // Enable reasoning for OpenAI reasoning models (GPT-5.x, O1, O3, O4)
571                    let model_lower = session.model.to_lowercase();
572                    let is_reasoning_model = model_lower.starts_with("gpt-5")
573                        || model_lower.starts_with("gpt5")
574                        || model_lower.starts_with("o1")
575                        || model_lower.starts_with("o3")
576                        || model_lower.starts_with("o4");
577
578                    let agent = if is_reasoning_model {
579                        let reasoning_params = serde_json::json!({
580                            "reasoning": {
581                                "effort": "medium",
582                                "summary": "detailed"
583                            }
584                        });
585                        builder.additional_params(reasoning_params).build()
586                    } else {
587                        builder.build()
588                    };
589
590                    // Use multi_turn with Responses API
591                    agent
592                        .prompt(&current_input)
593                        .with_history(&mut raw_chat_history)
594                        .with_hook(hook.clone())
595                        .multi_turn(50)
596                        .await
597                }
598                ProviderType::Anthropic => {
599                    let client = anthropic::Client::from_env();
600
601                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
602                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
603                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
604                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
605                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
606
607                    let mut builder = client
608                        .agent(&session.model)
609                        .preamble(&preamble)
610                        .max_tokens(4096)
611                        .tool(AnalyzeTool::new(project_path_buf.clone()))
612                        .tool(SecurityScanTool::new(project_path_buf.clone()))
613                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
614                        .tool(HadolintTool::new(project_path_buf.clone()))
615                        .tool(DclintTool::new(project_path_buf.clone()))
616                        .tool(KubelintTool::new(project_path_buf.clone()))
617                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
618                        .tool(K8sCostsTool::new(project_path_buf.clone()))
619                        .tool(K8sDriftTool::new(project_path_buf.clone()))
620                        .tool(HelmlintTool::new(project_path_buf.clone()))
621                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
622                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
623                        .tool(TerraformInstallTool::new())
624                        .tool(ReadFileTool::new(project_path_buf.clone()))
625                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
626                        .tool(WebFetchTool::new())
627                        // Prometheus discovery and connection tools for live K8s analysis
628                        .tool(PrometheusDiscoverTool::new())
629                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
630                        // RAG retrieval tools for compressed tool outputs
631                        .tool(RetrieveOutputTool::new())
632                        .tool(ListOutputsTool::new());
633
634                    // Add tools based on mode
635                    if is_planning {
636                        // Plan mode: read-only shell + plan creation tools
637                        builder = builder
638                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
639                            .tool(PlanCreateTool::new(project_path_buf.clone()))
640                            .tool(PlanListTool::new(project_path_buf.clone()));
641                    } else if is_generation {
642                        // Standard mode + generation query: all tools including file writes and plan execution
643                        let (mut write_file_tool, mut write_files_tool) =
644                            if let Some(ref client) = ide_client {
645                                (
646                                    WriteFileTool::new(project_path_buf.clone())
647                                        .with_ide_client(client.clone()),
648                                    WriteFilesTool::new(project_path_buf.clone())
649                                        .with_ide_client(client.clone()),
650                                )
651                            } else {
652                                (
653                                    WriteFileTool::new(project_path_buf.clone()),
654                                    WriteFilesTool::new(project_path_buf.clone()),
655                                )
656                            };
657                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
658                        if auto_accept_writes {
659                            write_file_tool = write_file_tool.without_confirmation();
660                            write_files_tool = write_files_tool.without_confirmation();
661                        }
662                        builder = builder
663                            .tool(write_file_tool)
664                            .tool(write_files_tool)
665                            .tool(ShellTool::new(project_path_buf.clone()))
666                            .tool(PlanListTool::new(project_path_buf.clone()))
667                            .tool(PlanNextTool::new(project_path_buf.clone()))
668                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
669                    }
670
671                    let agent = builder.build();
672
673                    // Allow up to 50 tool call turns for complex generation tasks
674                    // Use hook to display tool calls as they happen
675                    // Pass conversation history for context continuity
676                    agent
677                        .prompt(&current_input)
678                        .with_history(&mut raw_chat_history)
679                        .with_hook(hook.clone())
680                        .multi_turn(50)
681                        .await
682                }
683                ProviderType::Bedrock => {
684                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
685                    let client = crate::bedrock::client::Client::from_env();
686
687                    // Extended thinking for Claude models via Bedrock
688                    // This enables Claude to show its reasoning process before responding.
689                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
690                    // Extended thinking budget - reduced to help with rate limits
691                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
692                    let thinking_params = serde_json::json!({
693                        "thinking": {
694                            "type": "enabled",
695                            "budget_tokens": 8000
696                        }
697                    });
698
699                    let mut builder = client
700                        .agent(&session.model)
701                        .preamble(&preamble)
702                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
703                        .tool(AnalyzeTool::new(project_path_buf.clone()))
704                        .tool(SecurityScanTool::new(project_path_buf.clone()))
705                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
706                        .tool(HadolintTool::new(project_path_buf.clone()))
707                        .tool(DclintTool::new(project_path_buf.clone()))
708                        .tool(KubelintTool::new(project_path_buf.clone()))
709                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
710                        .tool(K8sCostsTool::new(project_path_buf.clone()))
711                        .tool(K8sDriftTool::new(project_path_buf.clone()))
712                        .tool(HelmlintTool::new(project_path_buf.clone()))
713                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
714                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
715                        .tool(TerraformInstallTool::new())
716                        .tool(ReadFileTool::new(project_path_buf.clone()))
717                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
718                        .tool(WebFetchTool::new())
719                        // Prometheus discovery and connection tools for live K8s analysis
720                        .tool(PrometheusDiscoverTool::new())
721                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
722                        // RAG retrieval tools for compressed tool outputs
723                        .tool(RetrieveOutputTool::new())
724                        .tool(ListOutputsTool::new());
725
726                    // Add tools based on mode
727                    if is_planning {
728                        // Plan mode: read-only shell + plan creation tools
729                        builder = builder
730                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
731                            .tool(PlanCreateTool::new(project_path_buf.clone()))
732                            .tool(PlanListTool::new(project_path_buf.clone()));
733                    } else if is_generation {
734                        // Standard mode + generation query: all tools including file writes and plan execution
735                        let (mut write_file_tool, mut write_files_tool) =
736                            if let Some(ref client) = ide_client {
737                                (
738                                    WriteFileTool::new(project_path_buf.clone())
739                                        .with_ide_client(client.clone()),
740                                    WriteFilesTool::new(project_path_buf.clone())
741                                        .with_ide_client(client.clone()),
742                                )
743                            } else {
744                                (
745                                    WriteFileTool::new(project_path_buf.clone()),
746                                    WriteFilesTool::new(project_path_buf.clone()),
747                                )
748                            };
749                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
750                        if auto_accept_writes {
751                            write_file_tool = write_file_tool.without_confirmation();
752                            write_files_tool = write_files_tool.without_confirmation();
753                        }
754                        builder = builder
755                            .tool(write_file_tool)
756                            .tool(write_files_tool)
757                            .tool(ShellTool::new(project_path_buf.clone()))
758                            .tool(PlanListTool::new(project_path_buf.clone()))
759                            .tool(PlanNextTool::new(project_path_buf.clone()))
760                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
761                    }
762
763                    // Add thinking params for extended reasoning
764                    builder = builder.additional_params(thinking_params);
765
766                    let agent = builder.build();
767
768                    // Use same multi-turn pattern as OpenAI/Anthropic
769                    agent
770                        .prompt(&current_input)
771                        .with_history(&mut raw_chat_history)
772                        .with_hook(hook.clone())
773                        .multi_turn(50)
774                        .await
775                    }
776                }.map_err(|e| e.to_string())
777            } => result
778            };
779
780            // Stop the progress indicator before handling the response
781            progress.stop().await;
782
783            // Suppress unused variable warnings
784            let _ = (&progress_state, user_interrupted);
785
786            match response {
787                Ok(text) => {
788                    // Show final response
789                    println!();
790                    ResponseFormatter::print_response(&text);
791
792                    // Track token usage - use actual from hook if available, else estimate
793                    let hook_usage = hook.get_usage().await;
794                    if hook_usage.has_data() {
795                        // Use actual token counts from API response
796                        session
797                            .token_usage
798                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
799                    } else {
800                        // Fall back to estimation when API doesn't provide usage
801                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
802                        let completion_tokens = TokenUsage::estimate_tokens(&text);
803                        session
804                            .token_usage
805                            .add_estimated(prompt_tokens, completion_tokens);
806                    }
807                    // Reset hook usage for next request batch
808                    hook.reset_usage().await;
809
810                    // Show context indicator like Forge: [model/~tokens]
811                    let model_short = session
812                        .model
813                        .split('/')
814                        .next_back()
815                        .unwrap_or(&session.model)
816                        .split(':')
817                        .next()
818                        .unwrap_or(&session.model);
819                    println!();
820                    println!(
821                        "  {}[{}/{}]{}",
822                        ui::colors::ansi::DIM,
823                        model_short,
824                        session.token_usage.format_compact(),
825                        ui::colors::ansi::RESET
826                    );
827
828                    // Extract tool calls from the hook state for history tracking
829                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
830                    let batch_tool_count = tool_calls.len();
831                    total_tool_calls += batch_tool_count;
832
833                    // Show tool call summary if significant
834                    if batch_tool_count > 10 {
835                        println!(
836                            "{}",
837                            format!(
838                                "  โœ“ Completed with {} tool calls ({} total this session)",
839                                batch_tool_count, total_tool_calls
840                            )
841                            .dimmed()
842                        );
843                    }
844
845                    // Add to conversation history with tool call records
846                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
847
848                    // Check if this heavy turn requires immediate compaction
849                    // This helps prevent context overflow in subsequent requests
850                    if conversation_history.needs_compaction() {
851                        println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
852                        if let Some(summary) = conversation_history.compact() {
853                            println!(
854                                "{}",
855                                format!("  โœ“ Compressed {} turns", summary.matches("Turn").count())
856                                    .dimmed()
857                            );
858                        }
859                    }
860
861                    // Simplify history for OpenAI Responses API reasoning models
862                    // Keep only User text and Assistant text - strip reasoning, tool calls, tool results
863                    // This prevents pairing errors like "rs_... without its required following item"
864                    // and "fc_... without its required reasoning item"
865                    if session.provider == ProviderType::OpenAI {
866                        simplify_history_for_openai_reasoning(&mut raw_chat_history);
867                    }
868
869                    // Also update legacy session history for compatibility
870                    session.history.push(("user".to_string(), input.clone()));
871                    session
872                        .history
873                        .push(("assistant".to_string(), text.clone()));
874
875                    // Record to persistent session storage
876                    session_recorder.record_user_message(&input);
877                    session_recorder.record_assistant_message(&text, Some(&tool_calls));
878                    if let Err(e) = session_recorder.save() {
879                        eprintln!(
880                            "{}",
881                            format!("  Warning: Failed to save session: {}", e).dimmed()
882                        );
883                    }
884
885                    // Check if plan_create was called - show interactive menu
886                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
887                        println!(); // Space before menu
888
889                        // Show the plan action menu (don't switch modes yet - let user choose)
890                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
891                            ui::PlanActionResult::ExecuteAutoAccept => {
892                                // Now switch to standard mode for execution
893                                if session.plan_mode.is_planning() {
894                                    session.plan_mode = session.plan_mode.toggle();
895                                }
896                                auto_accept_writes = true;
897                                pending_input = Some(format!(
898                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
899                                    plan_info.0
900                                ));
901                                succeeded = true;
902                            }
903                            ui::PlanActionResult::ExecuteWithReview => {
904                                // Now switch to standard mode for execution
905                                if session.plan_mode.is_planning() {
906                                    session.plan_mode = session.plan_mode.toggle();
907                                }
908                                pending_input = Some(format!(
909                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
910                                    plan_info.0
911                                ));
912                                succeeded = true;
913                            }
914                            ui::PlanActionResult::ChangePlan(feedback) => {
915                                // Stay in plan mode for modifications
916                                pending_input = Some(format!(
917                                    "Please modify the plan at '{}'. User feedback: {}",
918                                    plan_info.0, feedback
919                                ));
920                                succeeded = true;
921                            }
922                            ui::PlanActionResult::Cancel => {
923                                // Just complete normally, don't execute
924                                succeeded = true;
925                            }
926                        }
927                    } else {
928                        succeeded = true;
929                    }
930                }
931                Err(e) => {
932                    let err_str = e.to_string();
933
934                    println!();
935
936                    // Check if this was a user-initiated cancellation (Ctrl+C)
937                    if err_str.contains("cancelled") || err_str.contains("Cancelled") {
938                        // Extract any completed work before cancellation
939                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
940                        let tool_count = completed_tools.len();
941
942                        eprintln!("{}", "โš  Generation interrupted.".yellow());
943                        if tool_count > 0 {
944                            eprintln!(
945                                "{}",
946                                format!("  {} tool calls completed before interrupt.", tool_count)
947                                    .dimmed()
948                            );
949                            // Add partial progress to history
950                            conversation_history.add_turn(
951                                current_input.clone(),
952                                format!("[Interrupted after {} tool calls]", tool_count),
953                                completed_tools,
954                            );
955                        }
956                        eprintln!("{}", "  Type your next message to continue.".dimmed());
957
958                        // Don't retry, don't mark as succeeded - just break to return to prompt
959                        break;
960                    }
961
962                    // Check if this is a max depth error - handle as checkpoint
963                    if err_str.contains("MaxDepth")
964                        || err_str.contains("max_depth")
965                        || err_str.contains("reached limit")
966                    {
967                        // Extract what was done before hitting the limit
968                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
969                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
970                        let batch_tool_count = completed_tools.len();
971                        total_tool_calls += batch_tool_count;
972
973                        eprintln!("{}", format!(
974                            "โš  Reached {} tool calls this batch ({} total). Maximum allowed: {}",
975                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
976                        ).yellow());
977
978                        // Check if we've hit the absolute maximum
979                        if total_tool_calls >= MAX_TOOL_CALLS {
980                            eprintln!(
981                                "{}",
982                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
983                                    .red()
984                            );
985                            eprintln!(
986                                "{}",
987                                "The task is too complex. Try breaking it into smaller parts."
988                                    .dimmed()
989                            );
990                            break;
991                        }
992
993                        // Ask user if they want to continue (unless auto-continue is enabled)
994                        let should_continue = if auto_continue_tools {
995                            eprintln!(
996                                "{}",
997                                "  Auto-continuing (you selected 'always')...".dimmed()
998                            );
999                            true
1000                        } else {
1001                            eprintln!(
1002                                "{}",
1003                                "Excessive tool calls used. Want to continue?".yellow()
1004                            );
1005                            eprintln!(
1006                                "{}",
1007                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
1008                            );
1009                            print!("  > ");
1010                            let _ = std::io::Write::flush(&mut std::io::stdout());
1011
1012                            // Read user input
1013                            let mut response = String::new();
1014                            match std::io::stdin().read_line(&mut response) {
1015                                Ok(_) => {
1016                                    let resp = response.trim().to_lowercase();
1017                                    if resp == "a" || resp == "always" {
1018                                        auto_continue_tools = true;
1019                                        true
1020                                    } else {
1021                                        resp == "y" || resp == "yes" || resp.is_empty()
1022                                    }
1023                                }
1024                                Err(_) => false,
1025                            }
1026                        };
1027
1028                        if !should_continue {
1029                            eprintln!(
1030                                "{}",
1031                                "Stopped by user. Type 'continue' to resume later.".dimmed()
1032                            );
1033                            // Add partial progress to history
1034                            if !completed_tools.is_empty() {
1035                                conversation_history.add_turn(
1036                                    current_input.clone(),
1037                                    format!(
1038                                        "[Stopped at checkpoint - {} tools completed]",
1039                                        batch_tool_count
1040                                    ),
1041                                    vec![],
1042                                );
1043                            }
1044                            break;
1045                        }
1046
1047                        // Continue from checkpoint
1048                        eprintln!(
1049                            "{}",
1050                            format!(
1051                                "  โ†’ Continuing... {} remaining tool calls available",
1052                                MAX_TOOL_CALLS - total_tool_calls
1053                            )
1054                            .dimmed()
1055                        );
1056
1057                        // Add partial progress to history (without duplicating tool calls)
1058                        conversation_history.add_turn(
1059                            current_input.clone(),
1060                            format!(
1061                                "[Checkpoint - {} tools completed, continuing...]",
1062                                batch_tool_count
1063                            ),
1064                            vec![],
1065                        );
1066
1067                        // Build continuation prompt
1068                        current_input =
1069                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
1070
1071                        // Brief delay before continuation
1072                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1073                        continue; // Continue the loop without incrementing retry_attempt
1074                    } else if err_str.contains("rate")
1075                        || err_str.contains("Rate")
1076                        || err_str.contains("429")
1077                        || err_str.contains("Too many tokens")
1078                        || err_str.contains("please wait")
1079                        || err_str.contains("throttl")
1080                        || err_str.contains("Throttl")
1081                    {
1082                        eprintln!("{}", "โš  Rate limited by API provider.".yellow());
1083                        // Wait before retry for rate limits (longer wait for "too many tokens")
1084                        retry_attempt += 1;
1085                        let wait_secs = if err_str.contains("Too many tokens") {
1086                            30
1087                        } else {
1088                            5
1089                        };
1090                        eprintln!(
1091                            "{}",
1092                            format!(
1093                                "  Waiting {} seconds before retry ({}/{})...",
1094                                wait_secs, retry_attempt, MAX_RETRIES
1095                            )
1096                            .dimmed()
1097                        );
1098                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
1099                    } else if is_input_too_long_error(&err_str) {
1100                        // Context too large - truncate raw_chat_history directly
1101                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
1102                        // because conversation_history may be empty/stale during errors
1103                        eprintln!(
1104                            "{}",
1105                            "โš  Context too large for model. Truncating history...".yellow()
1106                        );
1107
1108                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
1109                        let old_msg_count = raw_chat_history.len();
1110
1111                        // Strategy 1: Keep only the last N messages (user/assistant pairs)
1112                        // More aggressive truncation on each retry: 10 โ†’ 6 โ†’ 4 messages
1113                        let keep_count = match retry_attempt {
1114                            0 => 10,
1115                            1 => 6,
1116                            _ => 4,
1117                        };
1118
1119                        if raw_chat_history.len() > keep_count {
1120                            // Drain older messages, keep the most recent ones
1121                            let drain_count = raw_chat_history.len() - keep_count;
1122                            raw_chat_history.drain(0..drain_count);
1123                            // Ensure history starts with User message for OpenAI Responses API compatibility
1124                            ensure_history_starts_with_user(&mut raw_chat_history);
1125                        }
1126
1127                        // Strategy 2: Compact large tool outputs to temp files + summaries
1128                        // This preserves data (agent can read file if needed) while reducing context
1129                        let max_output_chars = match retry_attempt {
1130                            0 => 50_000, // 50KB on first try
1131                            1 => 20_000, // 20KB on second
1132                            _ => 5_000,  // 5KB on third (aggressive)
1133                        };
1134                        compact_large_tool_outputs(&mut raw_chat_history, max_output_chars);
1135
1136                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
1137                        eprintln!("{}", format!(
1138                            "  โœ“ Truncated: {} messages (~{} tokens) โ†’ {} messages (~{} tokens)",
1139                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
1140                        ).green());
1141
1142                        // Also clear conversation_history to stay in sync
1143                        conversation_history.clear();
1144
1145                        // Retry with truncated context
1146                        retry_attempt += 1;
1147                        if retry_attempt < MAX_RETRIES {
1148                            eprintln!(
1149                                "{}",
1150                                format!(
1151                                    "  โ†’ Retrying with truncated context ({}/{})...",
1152                                    retry_attempt, MAX_RETRIES
1153                                )
1154                                .dimmed()
1155                            );
1156                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1157                        } else {
1158                            eprintln!(
1159                                "{}",
1160                                "Context still too large after truncation. Try /clear to reset."
1161                                    .red()
1162                            );
1163                            break;
1164                        }
1165                    } else if is_truncation_error(&err_str) {
1166                        // Truncation error - try intelligent continuation
1167                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1168                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1169
1170                        // Count actually completed tools (not in-progress)
1171                        let completed_count = completed_tools
1172                            .iter()
1173                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
1174                            .count();
1175                        let in_progress_count = completed_tools.len() - completed_count;
1176
1177                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
1178                            // We have partial progress - continue from where we left off
1179                            continuation_count += 1;
1180                            let status_msg = if in_progress_count > 0 {
1181                                format!(
1182                                    "โš  Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
1183                                    completed_count,
1184                                    in_progress_count,
1185                                    continuation_count,
1186                                    MAX_CONTINUATIONS
1187                                )
1188                            } else {
1189                                format!(
1190                                    "โš  Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
1191                                    completed_count, continuation_count, MAX_CONTINUATIONS
1192                                )
1193                            };
1194                            eprintln!("{}", status_msg.yellow());
1195
1196                            // Add partial progress to conversation history
1197                            // NOTE: We intentionally pass empty tool_calls here because the
1198                            // continuation prompt already contains the detailed file list.
1199                            // Including them in history would duplicate the context and waste tokens.
1200                            conversation_history.add_turn(
1201                                current_input.clone(),
1202                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
1203                                    completed_count, in_progress_count),
1204                                vec![]  // Don't duplicate - continuation prompt has the details
1205                            );
1206
1207                            // Check if we need compaction after adding this heavy turn
1208                            // This is important for long multi-turn sessions with many tool calls
1209                            if conversation_history.needs_compaction() {
1210                                eprintln!(
1211                                    "{}",
1212                                    "  ๐Ÿ“ฆ Compacting history before continuation...".dimmed()
1213                                );
1214                                if let Some(summary) = conversation_history.compact() {
1215                                    eprintln!(
1216                                        "{}",
1217                                        format!(
1218                                            "  โœ“ Compressed {} turns",
1219                                            summary.matches("Turn").count()
1220                                        )
1221                                        .dimmed()
1222                                    );
1223                                }
1224                            }
1225
1226                            // Build continuation prompt with context
1227                            current_input = build_continuation_prompt(
1228                                &input,
1229                                &completed_tools,
1230                                &agent_thinking,
1231                            );
1232
1233                            // Log continuation details for debugging
1234                            eprintln!("{}", format!(
1235                                "  โ†’ Continuing with {} files read, {} written, {} other actions tracked",
1236                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
1237                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
1238                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
1239                            ).dimmed());
1240
1241                            // Brief delay before continuation
1242                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1243                            // Don't increment retry_attempt - this is progress via continuation
1244                        } else if retry_attempt < MAX_RETRIES {
1245                            // No tool calls completed - simple retry
1246                            retry_attempt += 1;
1247                            eprintln!(
1248                                "{}",
1249                                format!(
1250                                    "โš  Response error (attempt {}/{}). Retrying...",
1251                                    retry_attempt, MAX_RETRIES
1252                                )
1253                                .yellow()
1254                            );
1255                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1256                        } else {
1257                            // Max retries/continuations reached
1258                            eprintln!("{}", format!("Error: {}", e).red());
1259                            if continuation_count >= MAX_CONTINUATIONS {
1260                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
1261                            } else {
1262                                eprintln!(
1263                                    "{}",
1264                                    "Max retries reached. The response may be too complex."
1265                                        .dimmed()
1266                                );
1267                            }
1268                            eprintln!(
1269                                "{}",
1270                                "Try breaking your request into smaller parts.".dimmed()
1271                            );
1272                            break;
1273                        }
1274                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1275                        // Timeout - simple retry
1276                        retry_attempt += 1;
1277                        if retry_attempt < MAX_RETRIES {
1278                            eprintln!(
1279                                "{}",
1280                                format!(
1281                                    "โš  Request timed out (attempt {}/{}). Retrying...",
1282                                    retry_attempt, MAX_RETRIES
1283                                )
1284                                .yellow()
1285                            );
1286                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1287                        } else {
1288                            eprintln!("{}", "Request timed out. Please try again.".red());
1289                            break;
1290                        }
1291                    } else {
1292                        // Unknown error - show details and break
1293                        eprintln!("{}", format!("Error: {}", e).red());
1294                        if continuation_count > 0 {
1295                            eprintln!(
1296                                "{}",
1297                                format!(
1298                                    "  (occurred during continuation attempt {})",
1299                                    continuation_count
1300                                )
1301                                .dimmed()
1302                            );
1303                        }
1304                        eprintln!("{}", "Error details for debugging:".dimmed());
1305                        eprintln!(
1306                            "{}",
1307                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1308                                .dimmed()
1309                        );
1310                        eprintln!(
1311                            "{}",
1312                            format!(
1313                                "  - continuation_count: {}/{}",
1314                                continuation_count, MAX_CONTINUATIONS
1315                            )
1316                            .dimmed()
1317                        );
1318                        break;
1319                    }
1320                }
1321            }
1322        }
1323        println!();
1324    }
1325
1326    // Clean up terminal layout before exiting (disabled - layout not initialized)
1327    // if let Err(e) = terminal_layout.cleanup() {
1328    //     eprintln!(
1329    //         "{}",
1330    //         format!("Warning: Terminal cleanup failed: {}", e).dimmed()
1331    //     );
1332    // }
1333
1334    Ok(())
1335}
1336
1337// NOTE: wait_for_interrupt function removed - ESC interrupt feature disabled
1338// due to terminal corruption issues with spawn_blocking raw mode handling.
1339// TODO: Re-implement using tool hook callbacks for cleaner interruption.
1340
1341/// Extract tool call records from the hook state for history tracking
1342async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1343    let state = hook.state();
1344    let guard = state.lock().await;
1345
1346    guard
1347        .tool_calls
1348        .iter()
1349        .enumerate()
1350        .map(|(i, tc)| {
1351            let result = if tc.is_running {
1352                // Tool was in progress when error occurred
1353                "[IN PROGRESS - may need to be re-run]".to_string()
1354            } else if let Some(output) = &tc.output {
1355                truncate_string(output, 200)
1356            } else {
1357                "completed".to_string()
1358            };
1359
1360            ToolCallRecord {
1361                tool_name: tc.name.clone(),
1362                args_summary: truncate_string(&tc.args, 100),
1363                result_summary: result,
1364                // Generate a unique tool ID for proper message pairing
1365                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1366                // Mark read-only tools as droppable (their results can be re-fetched)
1367                droppable: matches!(
1368                    tc.name.as_str(),
1369                    "read_file" | "list_directory" | "analyze_project"
1370                ),
1371            }
1372        })
1373        .collect()
1374}
1375
1376/// Extract any agent thinking/messages from the hook for context
1377async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1378    let state = hook.state();
1379    let guard = state.lock().await;
1380    guard.agent_messages.clone()
1381}
1382
1383/// Helper to truncate strings for summaries
1384fn truncate_string(s: &str, max_len: usize) -> String {
1385    if s.len() <= max_len {
1386        s.to_string()
1387    } else {
1388        format!("{}...", &s[..max_len.saturating_sub(3)])
1389    }
1390}
1391
1392/// Compact large tool outputs by saving them to temp files and replacing with summaries.
1393/// This preserves all data (agent can read the file) while reducing context size.
1394fn compact_large_tool_outputs(messages: &mut [rig::completion::Message], max_chars: usize) {
1395    use rig::completion::message::{Text, ToolResultContent, UserContent};
1396    use std::fs;
1397
1398    // Create temp directory for compacted outputs
1399    let temp_dir = std::env::temp_dir().join("syncable-agent-outputs");
1400    let _ = fs::create_dir_all(&temp_dir);
1401
1402    for msg in messages.iter_mut() {
1403        if let rig::completion::Message::User { content } = msg {
1404            for item in content.iter_mut() {
1405                if let UserContent::ToolResult(tr) = item {
1406                    for trc in tr.content.iter_mut() {
1407                        if let ToolResultContent::Text(text) = trc {
1408                            if text.text.len() > max_chars {
1409                                // Save full output to temp file
1410                                let file_id = format!(
1411                                    "{}_{}.txt",
1412                                    tr.id,
1413                                    std::time::SystemTime::now()
1414                                        .duration_since(std::time::UNIX_EPOCH)
1415                                        .unwrap()
1416                                        .as_millis()
1417                                );
1418                                let file_path = temp_dir.join(&file_id);
1419
1420                                if let Ok(()) = fs::write(&file_path, &text.text) {
1421                                    // Create a smart summary
1422                                    let summary = create_output_summary(
1423                                        &text.text,
1424                                        &file_path.display().to_string(),
1425                                        max_chars / 2, // Use half max for summary
1426                                    );
1427
1428                                    // Replace with summary
1429                                    *trc = ToolResultContent::Text(Text { text: summary });
1430                                }
1431                            }
1432                        }
1433                    }
1434                }
1435            }
1436        }
1437    }
1438}
1439
1440/// Create a smart summary of a large output using incremental chunk processing.
1441/// Processes output in logical sections, summarizes each, then combines into actionable summary.
1442fn create_output_summary(full_output: &str, file_path: &str, max_summary_len: usize) -> String {
1443    let total_lines = full_output.lines().count();
1444    let total_chars = full_output.len();
1445
1446    let summary_content =
1447        if full_output.trim_start().starts_with('{') || full_output.trim_start().starts_with('[') {
1448            // JSON output - extract structured summary
1449            summarize_json_incrementally(full_output, max_summary_len)
1450        } else {
1451            // Text output - chunk and summarize
1452            summarize_text_incrementally(full_output, max_summary_len)
1453        };
1454
1455    format!(
1456        "[COMPACTED OUTPUT]\n\
1457        Full data: {}\n\
1458        Size: {} chars, {} lines\n\
1459        \n\
1460        {}\n\
1461        \n\
1462        [Read file with offset/limit for specific sections if needed]",
1463        file_path, total_chars, total_lines, summary_content
1464    )
1465}
1466
1467/// Incrementally summarize JSON output, extracting key fields and prioritizing important items.
1468fn summarize_json_incrementally(json_str: &str, max_len: usize) -> String {
1469    let Ok(json) = serde_json::from_str::<serde_json::Value>(json_str) else {
1470        return "Failed to parse JSON".to_string();
1471    };
1472
1473    let mut parts: Vec<String> = Vec::new();
1474    let mut current_len = 0;
1475
1476    match &json {
1477        serde_json::Value::Object(obj) => {
1478            // Priority 1: Summary/stats fields
1479            for key in ["summary", "stats", "metadata", "status"] {
1480                if let Some(v) = obj.get(key) {
1481                    let s = format!("{}:\n{}", key, indent_json(v, 2, 500));
1482                    if current_len + s.len() < max_len {
1483                        parts.push(s.clone());
1484                        current_len += s.len();
1485                    }
1486                }
1487            }
1488
1489            // Priority 2: Error/critical items (summarize each)
1490            for key in [
1491                "errors",
1492                "critical",
1493                "failures",
1494                "issues",
1495                "findings",
1496                "recommendations",
1497            ] {
1498                if let Some(serde_json::Value::Array(arr)) = obj.get(key) {
1499                    if arr.is_empty() {
1500                        continue;
1501                    }
1502                    parts.push(format!("\n{} ({} items):", key, arr.len()));
1503
1504                    // Group by severity/type if present
1505                    let mut by_severity: std::collections::HashMap<
1506                        String,
1507                        Vec<&serde_json::Value>,
1508                    > = std::collections::HashMap::new();
1509
1510                    for item in arr {
1511                        let severity = item
1512                            .get("severity")
1513                            .or_else(|| item.get("level"))
1514                            .or_else(|| item.get("type"))
1515                            .and_then(|v| v.as_str())
1516                            .unwrap_or("other")
1517                            .to_string();
1518                        by_severity.entry(severity).or_default().push(item);
1519                    }
1520
1521                    // Show critical/high first, summarize others
1522                    for sev in [
1523                        "critical", "high", "error", "warning", "medium", "low", "info", "other",
1524                    ] {
1525                        if let Some(items) = by_severity.get(sev) {
1526                            let show_count = match sev {
1527                                "critical" | "high" | "error" => 5.min(items.len()),
1528                                "warning" | "medium" => 3.min(items.len()),
1529                                _ => 2.min(items.len()),
1530                            };
1531
1532                            if !items.is_empty() {
1533                                let s =
1534                                    format!("  [{}] {} items:", sev.to_uppercase(), items.len());
1535                                if current_len + s.len() < max_len {
1536                                    parts.push(s.clone());
1537                                    current_len += s.len();
1538
1539                                    for item in items.iter().take(show_count) {
1540                                        let item_summary = summarize_single_item(item);
1541                                        if current_len + item_summary.len() < max_len {
1542                                            parts.push(format!("    โ€ข {}", item_summary));
1543                                            current_len += item_summary.len();
1544                                        }
1545                                    }
1546
1547                                    if items.len() > show_count {
1548                                        parts.push(format!(
1549                                            "    ... and {} more",
1550                                            items.len() - show_count
1551                                        ));
1552                                    }
1553                                }
1554                            }
1555                        }
1556                    }
1557                }
1558            }
1559
1560            // Priority 3: Show remaining top-level keys
1561            let shown_keys: std::collections::HashSet<&str> = [
1562                "summary",
1563                "stats",
1564                "metadata",
1565                "status",
1566                "errors",
1567                "critical",
1568                "failures",
1569                "issues",
1570                "findings",
1571                "recommendations",
1572            ]
1573            .iter()
1574            .cloned()
1575            .collect();
1576
1577            let other_keys: Vec<_> = obj
1578                .keys()
1579                .filter(|k| !shown_keys.contains(k.as_str()))
1580                .collect();
1581            if !other_keys.is_empty() && current_len < max_len - 200 {
1582                parts.push(format!("\nOther fields: {:?}", other_keys));
1583            }
1584        }
1585        serde_json::Value::Array(arr) => {
1586            parts.push(format!("Array with {} items", arr.len()));
1587
1588            // Try to group by type/severity
1589            for (i, item) in arr.iter().take(10).enumerate() {
1590                let s = format!("[{}] {}", i, summarize_single_item(item));
1591                if current_len + s.len() < max_len {
1592                    parts.push(s.clone());
1593                    current_len += s.len();
1594                }
1595            }
1596            if arr.len() > 10 {
1597                parts.push(format!("... and {} more items", arr.len() - 10));
1598            }
1599        }
1600        _ => {
1601            parts.push(truncate_json_value(&json, max_len));
1602        }
1603    }
1604
1605    parts.join("\n")
1606}
1607
1608/// Summarize a single JSON item (issue, error, etc.) into a one-liner.
1609fn summarize_single_item(item: &serde_json::Value) -> String {
1610    let mut parts: Vec<String> = Vec::new();
1611
1612    // Extract common fields
1613    for key in [
1614        "message",
1615        "description",
1616        "title",
1617        "name",
1618        "file",
1619        "path",
1620        "code",
1621        "rule",
1622    ] {
1623        if let Some(v) = item.get(key) {
1624            if let Some(s) = v.as_str() {
1625                parts.push(truncate_string(s, 80));
1626                break; // Only take first descriptive field
1627            }
1628        }
1629    }
1630
1631    // Add location if present
1632    if let Some(file) = item
1633        .get("file")
1634        .or_else(|| item.get("path"))
1635        .and_then(|v| v.as_str())
1636    {
1637        if let Some(line) = item.get("line").and_then(|v| v.as_u64()) {
1638            parts.push(format!("at {}:{}", file, line));
1639        } else {
1640            parts.push(format!("in {}", truncate_string(file, 40)));
1641        }
1642    }
1643
1644    if parts.is_empty() {
1645        truncate_json_value(item, 100)
1646    } else {
1647        parts.join(" ")
1648    }
1649}
1650
1651/// Indent JSON for display.
1652fn indent_json(v: &serde_json::Value, indent: usize, max_len: usize) -> String {
1653    let s = serde_json::to_string_pretty(v).unwrap_or_else(|_| v.to_string());
1654    let prefix = " ".repeat(indent);
1655    let indented: String = s
1656        .lines()
1657        .map(|l| format!("{}{}", prefix, l))
1658        .collect::<Vec<_>>()
1659        .join("\n");
1660    if indented.len() > max_len {
1661        format!("{}...", &indented[..max_len.saturating_sub(3)])
1662    } else {
1663        indented
1664    }
1665}
1666
1667/// Incrementally summarize text output by processing in chunks.
1668fn summarize_text_incrementally(text: &str, max_len: usize) -> String {
1669    let lines: Vec<&str> = text.lines().collect();
1670    let mut parts: Vec<String> = Vec::new();
1671    let mut current_len = 0;
1672
1673    // Look for section headers or key patterns
1674    let mut sections: Vec<(usize, &str)> = Vec::new();
1675    for (i, line) in lines.iter().enumerate() {
1676        // Detect headers (lines that look like titles)
1677        if line.starts_with('#')
1678            || line.starts_with("==")
1679            || line.starts_with("--")
1680            || (line.ends_with(':') && line.len() < 50)
1681            || line.chars().all(|c| c.is_uppercase() || c.is_whitespace())
1682        {
1683            sections.push((i, line));
1684        }
1685    }
1686
1687    if !sections.is_empty() {
1688        // Summarize by sections
1689        parts.push(format!("Found {} sections:", sections.len()));
1690        for (i, (line_num, header)) in sections.iter().enumerate() {
1691            let next_section = sections.get(i + 1).map(|(n, _)| *n).unwrap_or(lines.len());
1692            let section_lines = next_section - line_num;
1693
1694            let s = format!(
1695                "  [L{}] {} ({} lines)",
1696                line_num + 1,
1697                header.trim(),
1698                section_lines
1699            );
1700            if current_len + s.len() < max_len / 2 {
1701                parts.push(s.clone());
1702                current_len += s.len();
1703            }
1704        }
1705        parts.push("".to_string());
1706    }
1707
1708    // Show first chunk
1709    let preview_lines = 15.min(lines.len());
1710    parts.push("Content preview:".to_string());
1711    for line in lines.iter().take(preview_lines) {
1712        let s = format!("  {}", truncate_string(line, 120));
1713        if current_len + s.len() < max_len * 3 / 4 {
1714            parts.push(s.clone());
1715            current_len += s.len();
1716        }
1717    }
1718
1719    if lines.len() > preview_lines {
1720        parts.push(format!(
1721            "  ... ({} more lines)",
1722            lines.len() - preview_lines
1723        ));
1724    }
1725
1726    // Show last few lines if space permits
1727    if lines.len() > preview_lines * 2 && current_len < max_len - 500 {
1728        parts.push("\nEnd of output:".to_string());
1729        for line in lines.iter().skip(lines.len() - 5) {
1730            let s = format!("  {}", truncate_string(line, 120));
1731            if current_len + s.len() < max_len {
1732                parts.push(s.clone());
1733                current_len += s.len();
1734            }
1735        }
1736    }
1737
1738    parts.join("\n")
1739}
1740
1741/// Truncate a JSON value for display
1742fn truncate_json_value(v: &serde_json::Value, max_len: usize) -> String {
1743    let s = v.to_string();
1744    if s.len() <= max_len {
1745        s
1746    } else {
1747        format!("{}...", &s[..max_len.saturating_sub(3)])
1748    }
1749}
1750
1751/// Simplify history for OpenAI Responses API compatibility with reasoning models.
1752///
1753/// OpenAI's Responses API has strict pairing requirements:
1754/// - Reasoning items must be followed by their output (text or function_call)
1755/// - Function_call items must be preceded by their reasoning item
1756///
1757/// When passing history across user turns, these pairings get broken, causing errors like:
1758/// - "Item 'rs_...' of type 'reasoning' was provided without its required following item"
1759/// - "Item 'fc_...' of type 'function_call' was provided without its required 'reasoning' item"
1760///
1761/// Solution: Keep only User messages and final Assistant Text responses.
1762/// This preserves conversation context without the complex internal tool/reasoning structure.
1763fn simplify_history_for_openai_reasoning(history: &mut Vec<rig::completion::Message>) {
1764    use rig::completion::message::{AssistantContent, UserContent};
1765    use rig::one_or_many::OneOrMany;
1766
1767    // Filter to keep only User text messages and Assistant text messages
1768    let simplified: Vec<rig::completion::Message> = history
1769        .iter()
1770        .filter_map(|msg| match msg {
1771            // Keep User messages, but only text content (not tool results)
1772            rig::completion::Message::User { content } => {
1773                let text_only: Vec<UserContent> = content
1774                    .iter()
1775                    .filter(|c| matches!(c, UserContent::Text(_)))
1776                    .cloned()
1777                    .collect();
1778                if text_only.is_empty() {
1779                    None
1780                } else {
1781                    let mut iter = text_only.into_iter();
1782                    let first = iter.next().unwrap();
1783                    let rest: Vec<_> = iter.collect();
1784                    let new_content = if rest.is_empty() {
1785                        OneOrMany::one(first)
1786                    } else {
1787                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
1788                    };
1789                    Some(rig::completion::Message::User {
1790                        content: new_content,
1791                    })
1792                }
1793            }
1794            // Keep Assistant messages, but only text content (not reasoning, tool calls)
1795            rig::completion::Message::Assistant { content, id } => {
1796                let text_only: Vec<AssistantContent> = content
1797                    .iter()
1798                    .filter(|c| matches!(c, AssistantContent::Text(_)))
1799                    .cloned()
1800                    .collect();
1801                if text_only.is_empty() {
1802                    None
1803                } else {
1804                    let mut iter = text_only.into_iter();
1805                    let first = iter.next().unwrap();
1806                    let rest: Vec<_> = iter.collect();
1807                    let new_content = if rest.is_empty() {
1808                        OneOrMany::one(first)
1809                    } else {
1810                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
1811                    };
1812                    Some(rig::completion::Message::Assistant {
1813                        content: new_content,
1814                        id: id.clone(),
1815                    })
1816                }
1817            }
1818        })
1819        .collect();
1820
1821    *history = simplified;
1822}
1823
1824/// Ensure history starts with a User message for OpenAI Responses API compatibility.
1825///
1826/// OpenAI's Responses API requires that reasoning items are properly structured within
1827/// a conversation. When history truncation leaves an Assistant message (containing
1828/// Reasoning blocks) at the start, OpenAI rejects it with:
1829/// "Item 'rs_...' of type 'reasoning' was provided without its required following item."
1830///
1831/// This function inserts a synthetic User message at the beginning if history starts
1832/// with an Assistant message, preserving the context while maintaining valid structure.
1833fn ensure_history_starts_with_user(history: &mut Vec<rig::completion::Message>) {
1834    if !history.is_empty() {
1835        if matches!(
1836            history.first(),
1837            Some(rig::completion::Message::Assistant { .. })
1838        ) {
1839            // Insert synthetic User message at the beginning to maintain valid conversation structure
1840            history.insert(
1841                0,
1842                rig::completion::Message::User {
1843                    content: rig::one_or_many::OneOrMany::one(
1844                        rig::completion::message::UserContent::text("(Conversation continued)"),
1845                    ),
1846                },
1847            );
1848        }
1849    }
1850}
1851
1852/// Estimate token count from raw rig Messages
1853/// This is used for context length management to prevent "input too long" errors.
1854/// Estimates ~4 characters per token.
1855fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
1856    use rig::completion::message::{AssistantContent, ToolResultContent, UserContent};
1857
1858    messages
1859        .iter()
1860        .map(|msg| -> usize {
1861            match msg {
1862                rig::completion::Message::User { content } => {
1863                    content
1864                        .iter()
1865                        .map(|c| -> usize {
1866                            match c {
1867                                UserContent::Text(t) => t.text.len() / 4,
1868                                UserContent::ToolResult(tr) => {
1869                                    // Tool results can be HUGE - properly estimate them
1870                                    tr.content
1871                                        .iter()
1872                                        .map(|trc| match trc {
1873                                            ToolResultContent::Text(t) => t.text.len() / 4,
1874                                            _ => 100,
1875                                        })
1876                                        .sum::<usize>()
1877                                }
1878                                _ => 100, // Estimate for images/documents
1879                            }
1880                        })
1881                        .sum::<usize>()
1882                }
1883                rig::completion::Message::Assistant { content, .. } => {
1884                    content
1885                        .iter()
1886                        .map(|c| -> usize {
1887                            match c {
1888                                AssistantContent::Text(t) => t.text.len() / 4,
1889                                AssistantContent::ToolCall(tc) => {
1890                                    // arguments is serde_json::Value, convert to string for length estimate
1891                                    let args_len = tc.function.arguments.to_string().len();
1892                                    (tc.function.name.len() + args_len) / 4
1893                                }
1894                                _ => 100,
1895                            }
1896                        })
1897                        .sum::<usize>()
1898                }
1899            }
1900        })
1901        .sum()
1902}
1903
1904/// Find a plan_create tool call in the list and extract plan info
1905/// Returns (plan_path, task_count) if found
1906fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
1907    for tc in tool_calls {
1908        if tc.tool_name == "plan_create" {
1909            // Try to parse the result_summary as JSON to extract plan_path
1910            // Note: result_summary may be truncated, so we have multiple fallbacks
1911            let plan_path =
1912                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
1913                    result
1914                        .get("plan_path")
1915                        .and_then(|v| v.as_str())
1916                        .map(|s| s.to_string())
1917                } else {
1918                    None
1919                };
1920
1921            // If JSON parsing failed, find the most recently created plan file
1922            // This is more reliable than trying to reconstruct the path from truncated args
1923            let plan_path = plan_path.unwrap_or_else(|| {
1924                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
1925            });
1926
1927            // Count tasks by reading the plan file directly
1928            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
1929
1930            return Some((plan_path, task_count));
1931        }
1932    }
1933    None
1934}
1935
1936/// Find the most recently created plan file in the plans directory
1937fn find_most_recent_plan_file() -> Option<String> {
1938    let plans_dir = std::env::current_dir().ok()?.join("plans");
1939    if !plans_dir.exists() {
1940        return None;
1941    }
1942
1943    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
1944
1945    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
1946        let path = entry.path();
1947        if path.extension().is_some_and(|e| e == "md")
1948            && let Ok(metadata) = entry.metadata()
1949            && let Ok(modified) = metadata.modified()
1950            && newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true)
1951        {
1952            newest = Some((path, modified));
1953        }
1954    }
1955
1956    newest.map(|(path, _)| {
1957        // Return relative path
1958        path.strip_prefix(std::env::current_dir().unwrap_or_default())
1959            .map(|p| p.display().to_string())
1960            .unwrap_or_else(|_| path.display().to_string())
1961    })
1962}
1963
1964/// Count tasks (checkbox items) in a plan file
1965fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
1966    use regex::Regex;
1967
1968    // Try both relative and absolute paths
1969    let path = std::path::Path::new(plan_path);
1970    let content = if path.exists() {
1971        std::fs::read_to_string(path).ok()?
1972    } else {
1973        // Try with current directory
1974        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
1975    };
1976
1977    // Count task checkboxes: - [ ], - [x], - [~], - [!]
1978    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
1979    let count = content
1980        .lines()
1981        .filter(|line| task_regex.is_match(line))
1982        .count();
1983
1984    Some(count)
1985}
1986
1987/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
1988fn is_truncation_error(err_str: &str) -> bool {
1989    err_str.contains("JsonError")
1990        || err_str.contains("EOF while parsing")
1991        || err_str.contains("JSON")
1992        || err_str.contains("unexpected end")
1993}
1994
1995/// Check if error is "input too long" - context exceeds model limit
1996/// This happens when conversation history grows beyond what the model can handle.
1997/// Recovery: compact history and retry with reduced context.
1998fn is_input_too_long_error(err_str: &str) -> bool {
1999    err_str.contains("too long")
2000        || err_str.contains("Too long")
2001        || err_str.contains("context length")
2002        || err_str.contains("maximum context")
2003        || err_str.contains("exceeds the model")
2004        || err_str.contains("Input is too long")
2005}
2006
2007/// Build a continuation prompt that tells the AI what work was completed
2008/// and asks it to continue from where it left off
2009fn build_continuation_prompt(
2010    original_task: &str,
2011    completed_tools: &[ToolCallRecord],
2012    agent_thinking: &[String],
2013) -> String {
2014    use std::collections::HashSet;
2015
2016    // Group tools by type and extract unique files read
2017    let mut files_read: HashSet<String> = HashSet::new();
2018    let mut files_written: HashSet<String> = HashSet::new();
2019    let mut dirs_listed: HashSet<String> = HashSet::new();
2020    let mut other_tools: Vec<String> = Vec::new();
2021    let mut in_progress: Vec<String> = Vec::new();
2022
2023    for tool in completed_tools {
2024        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
2025
2026        if is_in_progress {
2027            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
2028            continue;
2029        }
2030
2031        match tool.tool_name.as_str() {
2032            "read_file" => {
2033                // Extract path from args
2034                files_read.insert(tool.args_summary.clone());
2035            }
2036            "write_file" | "write_files" => {
2037                files_written.insert(tool.args_summary.clone());
2038            }
2039            "list_directory" => {
2040                dirs_listed.insert(tool.args_summary.clone());
2041            }
2042            _ => {
2043                other_tools.push(format!(
2044                    "{}({})",
2045                    tool.tool_name,
2046                    truncate_string(&tool.args_summary, 40)
2047                ));
2048            }
2049        }
2050    }
2051
2052    let mut prompt = format!(
2053        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
2054        Original task: {}\n",
2055        truncate_string(original_task, 500)
2056    );
2057
2058    // Show files already read - CRITICAL for preventing re-reads
2059    if !files_read.is_empty() {
2060        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
2061        for file in &files_read {
2062            prompt.push_str(&format!("  - {}\n", file));
2063        }
2064    }
2065
2066    if !dirs_listed.is_empty() {
2067        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
2068        for dir in &dirs_listed {
2069            prompt.push_str(&format!("  - {}\n", dir));
2070        }
2071    }
2072
2073    if !files_written.is_empty() {
2074        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
2075        for file in &files_written {
2076            prompt.push_str(&format!("  - {}\n", file));
2077        }
2078    }
2079
2080    if !other_tools.is_empty() {
2081        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
2082        for tool in other_tools.iter().take(20) {
2083            prompt.push_str(&format!("  - {}\n", tool));
2084        }
2085        if other_tools.len() > 20 {
2086            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
2087        }
2088    }
2089
2090    if !in_progress.is_empty() {
2091        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
2092        for tool in &in_progress {
2093            prompt.push_str(&format!("  โš  {}\n", tool));
2094        }
2095    }
2096
2097    // Include last thinking context if available
2098    if let Some(last_thought) = agent_thinking.last() {
2099        prompt.push_str(&format!(
2100            "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
2101            truncate_string(last_thought, 300)
2102        ));
2103    }
2104
2105    prompt.push_str("\n== INSTRUCTIONS ==\n");
2106    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
2107    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
2108    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
2109    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
2110    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
2111
2112    prompt
2113}
2114
2115/// Run a single query and return the response
2116pub async fn run_query(
2117    project_path: &Path,
2118    query: &str,
2119    provider: ProviderType,
2120    model: Option<String>,
2121) -> AgentResult<String> {
2122    use tools::*;
2123
2124    let project_path_buf = project_path.to_path_buf();
2125
2126    // Background process manager for Prometheus port-forwards (single query context)
2127    let bg_manager = Arc::new(BackgroundProcessManager::new());
2128    // Select prompt based on query type (analysis vs generation)
2129    // For single queries (non-interactive), always use standard mode
2130    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
2131    let is_generation = prompts::is_generation_query(query);
2132
2133    match provider {
2134        ProviderType::OpenAI => {
2135            // Use Responses API (default) for reasoning model support
2136            let client = openai::Client::from_env();
2137            let model_name = model.as_deref().unwrap_or("gpt-5.2");
2138
2139            let mut builder = client
2140                .agent(model_name)
2141                .preamble(&preamble)
2142                .max_tokens(4096)
2143                .tool(AnalyzeTool::new(project_path_buf.clone()))
2144                .tool(SecurityScanTool::new(project_path_buf.clone()))
2145                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2146                .tool(HadolintTool::new(project_path_buf.clone()))
2147                .tool(DclintTool::new(project_path_buf.clone()))
2148                .tool(KubelintTool::new(project_path_buf.clone()))
2149                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2150                .tool(K8sCostsTool::new(project_path_buf.clone()))
2151                .tool(K8sDriftTool::new(project_path_buf.clone()))
2152                .tool(HelmlintTool::new(project_path_buf.clone()))
2153                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2154                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2155                .tool(TerraformInstallTool::new())
2156                .tool(ReadFileTool::new(project_path_buf.clone()))
2157                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2158                .tool(WebFetchTool::new())
2159                // Prometheus discovery and connection tools for live K8s analysis
2160                .tool(PrometheusDiscoverTool::new())
2161                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2162                // RAG retrieval tools for compressed tool outputs
2163                .tool(RetrieveOutputTool::new())
2164                .tool(ListOutputsTool::new());
2165
2166            // Add generation tools if this is a generation query
2167            if is_generation {
2168                builder = builder
2169                    .tool(WriteFileTool::new(project_path_buf.clone()))
2170                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2171                    .tool(ShellTool::new(project_path_buf.clone()));
2172            }
2173
2174            // Enable reasoning for OpenAI reasoning models
2175            let model_lower = model_name.to_lowercase();
2176            let is_reasoning_model = model_lower.starts_with("gpt-5")
2177                || model_lower.starts_with("gpt5")
2178                || model_lower.starts_with("o1")
2179                || model_lower.starts_with("o3")
2180                || model_lower.starts_with("o4");
2181
2182            let agent = if is_reasoning_model {
2183                let reasoning_params = serde_json::json!({
2184                    "reasoning": {
2185                        "effort": "medium",
2186                        "summary": "detailed"
2187                    }
2188                });
2189                builder.additional_params(reasoning_params).build()
2190            } else {
2191                builder.build()
2192            };
2193
2194            agent
2195                .prompt(query)
2196                .multi_turn(50)
2197                .await
2198                .map_err(|e| AgentError::ProviderError(e.to_string()))
2199        }
2200        ProviderType::Anthropic => {
2201            let client = anthropic::Client::from_env();
2202            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
2203
2204            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
2205            // handle thinking blocks in multi-turn conversations with tool use.
2206            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
2207
2208            let mut builder = client
2209                .agent(model_name)
2210                .preamble(&preamble)
2211                .max_tokens(4096)
2212                .tool(AnalyzeTool::new(project_path_buf.clone()))
2213                .tool(SecurityScanTool::new(project_path_buf.clone()))
2214                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2215                .tool(HadolintTool::new(project_path_buf.clone()))
2216                .tool(DclintTool::new(project_path_buf.clone()))
2217                .tool(KubelintTool::new(project_path_buf.clone()))
2218                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2219                .tool(K8sCostsTool::new(project_path_buf.clone()))
2220                .tool(K8sDriftTool::new(project_path_buf.clone()))
2221                .tool(HelmlintTool::new(project_path_buf.clone()))
2222                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2223                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2224                .tool(TerraformInstallTool::new())
2225                .tool(ReadFileTool::new(project_path_buf.clone()))
2226                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2227                .tool(WebFetchTool::new())
2228                // Prometheus discovery and connection tools for live K8s analysis
2229                .tool(PrometheusDiscoverTool::new())
2230                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2231                // RAG retrieval tools for compressed tool outputs
2232                .tool(RetrieveOutputTool::new())
2233                .tool(ListOutputsTool::new());
2234
2235            // Add generation tools if this is a generation query
2236            if is_generation {
2237                builder = builder
2238                    .tool(WriteFileTool::new(project_path_buf.clone()))
2239                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2240                    .tool(ShellTool::new(project_path_buf.clone()));
2241            }
2242
2243            let agent = builder.build();
2244
2245            agent
2246                .prompt(query)
2247                .multi_turn(50)
2248                .await
2249                .map_err(|e| AgentError::ProviderError(e.to_string()))
2250        }
2251        ProviderType::Bedrock => {
2252            // Bedrock provider via rig-bedrock - same pattern as Anthropic
2253            let client = crate::bedrock::client::Client::from_env();
2254            let model_name = model
2255                .as_deref()
2256                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
2257
2258            // Extended thinking for Claude via Bedrock
2259            let thinking_params = serde_json::json!({
2260                "thinking": {
2261                    "type": "enabled",
2262                    "budget_tokens": 16000
2263                }
2264            });
2265
2266            let mut builder = client
2267                .agent(model_name)
2268                .preamble(&preamble)
2269                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
2270                .tool(AnalyzeTool::new(project_path_buf.clone()))
2271                .tool(SecurityScanTool::new(project_path_buf.clone()))
2272                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2273                .tool(HadolintTool::new(project_path_buf.clone()))
2274                .tool(DclintTool::new(project_path_buf.clone()))
2275                .tool(KubelintTool::new(project_path_buf.clone()))
2276                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2277                .tool(K8sCostsTool::new(project_path_buf.clone()))
2278                .tool(K8sDriftTool::new(project_path_buf.clone()))
2279                .tool(HelmlintTool::new(project_path_buf.clone()))
2280                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2281                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2282                .tool(TerraformInstallTool::new())
2283                .tool(ReadFileTool::new(project_path_buf.clone()))
2284                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2285                .tool(WebFetchTool::new())
2286                // Prometheus discovery and connection tools for live K8s analysis
2287                .tool(PrometheusDiscoverTool::new())
2288                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2289                // RAG retrieval tools for compressed tool outputs
2290                .tool(RetrieveOutputTool::new())
2291                .tool(ListOutputsTool::new());
2292
2293            // Add generation tools if this is a generation query
2294            if is_generation {
2295                builder = builder
2296                    .tool(WriteFileTool::new(project_path_buf.clone()))
2297                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2298                    .tool(ShellTool::new(project_path_buf.clone()));
2299            }
2300
2301            let agent = builder.additional_params(thinking_params).build();
2302
2303            agent
2304                .prompt(query)
2305                .multi_turn(50)
2306                .await
2307                .map_err(|e| AgentError::ProviderError(e.to_string()))
2308        }
2309    }
2310}