syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod prompts;
38pub mod session;
39pub mod tools;
40pub mod ui;
41use colored::Colorize;
42use commands::TokenUsage;
43use history::{ConversationHistory, ToolCallRecord};
44use ide::IdeClient;
45use rig::{
46    client::{CompletionClient, ProviderClient},
47    completion::Prompt,
48    providers::{anthropic, openai},
49};
50use session::{ChatSession, PlanMode};
51use std::path::Path;
52use std::sync::Arc;
53use tokio::sync::Mutex as TokioMutex;
54use ui::{ResponseFormatter, ToolDisplayHook};
55
56/// Provider type for the agent
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
58pub enum ProviderType {
59    #[default]
60    OpenAI,
61    Anthropic,
62    Bedrock,
63}
64
65impl std::fmt::Display for ProviderType {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        match self {
68            ProviderType::OpenAI => write!(f, "openai"),
69            ProviderType::Anthropic => write!(f, "anthropic"),
70            ProviderType::Bedrock => write!(f, "bedrock"),
71        }
72    }
73}
74
75impl std::str::FromStr for ProviderType {
76    type Err = String;
77
78    fn from_str(s: &str) -> Result<Self, Self::Err> {
79        match s.to_lowercase().as_str() {
80            "openai" => Ok(ProviderType::OpenAI),
81            "anthropic" => Ok(ProviderType::Anthropic),
82            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
83            _ => Err(format!(
84                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
85                s
86            )),
87        }
88    }
89}
90
91/// Error types for the agent
92#[derive(Debug, thiserror::Error)]
93pub enum AgentError {
94    #[error("Missing API key. Set {0} environment variable.")]
95    MissingApiKey(String),
96
97    #[error("Provider error: {0}")]
98    ProviderError(String),
99
100    #[error("Tool error: {0}")]
101    ToolError(String),
102}
103
104pub type AgentResult<T> = Result<T, AgentError>;
105
106/// Get the system prompt for the agent based on query type and plan mode
107fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
108    // In planning mode, use the read-only exploration prompt
109    if plan_mode.is_planning() {
110        return prompts::get_planning_prompt(project_path);
111    }
112
113    if let Some(q) = query {
114        // First check if it's a code development task (highest priority)
115        if prompts::is_code_development_query(q) {
116            return prompts::get_code_development_prompt(project_path);
117        }
118        // Then check if it's DevOps generation (Docker, Terraform, Helm)
119        if prompts::is_generation_query(q) {
120            return prompts::get_devops_prompt(project_path);
121        }
122    }
123    // Default to analysis prompt
124    prompts::get_analysis_prompt(project_path)
125}
126
127/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
128pub async fn run_interactive(
129    project_path: &Path,
130    provider: ProviderType,
131    model: Option<String>,
132) -> AgentResult<()> {
133    use tools::*;
134
135    let mut session = ChatSession::new(project_path, provider, model);
136
137    // Initialize conversation history with compaction support
138    let mut conversation_history = ConversationHistory::new();
139
140    // Initialize IDE client for native diff viewing
141    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
142        let mut client = IdeClient::new().await;
143        if client.is_ide_available() {
144            match client.connect().await {
145                Ok(()) => {
146                    println!(
147                        "{} Connected to {} IDE companion",
148                        "โœ“".green(),
149                        client.ide_name().unwrap_or("VS Code")
150                    );
151                    Some(Arc::new(TokioMutex::new(client)))
152                }
153                Err(e) => {
154                    // IDE detected but companion not running or connection failed
155                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
156                    None
157                }
158            }
159        } else {
160            println!(
161                "{} No IDE detected (TERM_PROGRAM={})",
162                "ยท".dimmed(),
163                std::env::var("TERM_PROGRAM").unwrap_or_default()
164            );
165            None
166        }
167    };
168
169    // Load API key from config file to env if not already set
170    ChatSession::load_api_key_to_env(session.provider);
171
172    // Check if API key is configured, prompt if not
173    if !ChatSession::has_api_key(session.provider) {
174        ChatSession::prompt_api_key(session.provider)?;
175    }
176
177    session.print_banner();
178
179    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
180    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
181    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
182
183    // Pending input for auto-continue after plan creation
184    let mut pending_input: Option<String> = None;
185    // Auto-accept mode for plan execution (skips write confirmations)
186    let mut auto_accept_writes = false;
187
188    loop {
189        // Show conversation status if we have history
190        if !conversation_history.is_empty() {
191            println!(
192                "{}",
193                format!("  ๐Ÿ’ฌ Context: {}", conversation_history.status()).dimmed()
194            );
195        }
196
197        // Check for pending input (from plan menu selection)
198        let input = if let Some(pending) = pending_input.take() {
199            // Show what we're executing
200            println!("{} {}", "โ†’".cyan(), pending.dimmed());
201            pending
202        } else {
203            // New user turn - reset auto-accept mode from previous plan execution
204            auto_accept_writes = false;
205
206            // Read user input (returns InputResult)
207            let input_result = match session.read_input() {
208                Ok(result) => result,
209                Err(_) => break,
210            };
211
212            // Handle the input result
213            match input_result {
214                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
215                ui::InputResult::Cancel | ui::InputResult::Exit => break,
216                ui::InputResult::TogglePlanMode => {
217                    // Toggle planning mode - minimal feedback, no extra newlines
218                    let new_mode = session.toggle_plan_mode();
219                    if new_mode.is_planning() {
220                        println!("{}", "โ˜… plan mode".yellow());
221                    } else {
222                        println!("{}", "โ–ถ standard mode".green());
223                    }
224                    continue;
225                }
226            }
227        };
228
229        if input.is_empty() {
230            continue;
231        }
232
233        // Check for commands
234        if ChatSession::is_command(&input) {
235            // Special handling for /clear to also clear conversation history
236            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
237                conversation_history.clear();
238                raw_chat_history.clear();
239            }
240            match session.process_command(&input) {
241                Ok(true) => continue,
242                Ok(false) => break, // /exit
243                Err(e) => {
244                    eprintln!("{}", format!("Error: {}", e).red());
245                    continue;
246                }
247            }
248        }
249
250        // Check API key before making request (in case provider changed)
251        if !ChatSession::has_api_key(session.provider) {
252            eprintln!(
253                "{}",
254                "No API key configured. Use /provider to set one.".yellow()
255            );
256            continue;
257        }
258
259        // Check if compaction is needed before making the request
260        if conversation_history.needs_compaction() {
261            println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
262            if let Some(summary) = conversation_history.compact() {
263                println!(
264                    "{}",
265                    format!("  โœ“ Compressed {} turns", summary.matches("Turn").count()).dimmed()
266                );
267            }
268        }
269
270        // Pre-request check: estimate if we're approaching context limit
271        // Check raw_chat_history (actual messages) not conversation_history
272        // because conversation_history may be out of sync
273        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
274            + input.len() / 4  // New input
275            + 5000; // System prompt overhead estimate
276
277        if estimated_input_tokens > 150_000 {
278            println!(
279                "{}",
280                "  โš  Large context detected. Pre-truncating...".yellow()
281            );
282
283            let old_count = raw_chat_history.len();
284            // Keep last 20 messages when approaching limit
285            if raw_chat_history.len() > 20 {
286                let drain_count = raw_chat_history.len() - 20;
287                raw_chat_history.drain(0..drain_count);
288                conversation_history.clear(); // Stay in sync
289                println!(
290                    "{}",
291                    format!(
292                        "  โœ“ Truncated {} โ†’ {} messages",
293                        old_count,
294                        raw_chat_history.len()
295                    )
296                    .dimmed()
297                );
298            }
299        }
300
301        // Retry loop for automatic error recovery
302        // MAX_RETRIES is for failures without progress
303        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
304        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
305        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
306        const MAX_RETRIES: u32 = 3;
307        const MAX_CONTINUATIONS: u32 = 10;
308        const _TOOL_CALL_CHECKPOINT: usize = 50;
309        const MAX_TOOL_CALLS: usize = 300;
310        let mut retry_attempt = 0;
311        let mut continuation_count = 0;
312        let mut total_tool_calls: usize = 0;
313        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
314        let mut current_input = input.clone();
315        let mut succeeded = false;
316
317        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
318            // Log if this is a continuation attempt
319            if continuation_count > 0 {
320                eprintln!("{}", "  ๐Ÿ“ก Sending continuation request...".dimmed());
321            }
322
323            // Create hook for Claude Code style tool display
324            let hook = ToolDisplayHook::new();
325
326            let project_path_buf = session.project_path.clone();
327            // Select prompt based on query type (analysis vs generation) and plan mode
328            let preamble = get_system_prompt(
329                &session.project_path,
330                Some(&current_input),
331                session.plan_mode,
332            );
333            let is_generation = prompts::is_generation_query(&current_input);
334            let is_planning = session.plan_mode.is_planning();
335
336            // Note: using raw_chat_history directly which preserves Reasoning blocks
337            // This is needed for extended thinking to work with multi-turn conversations
338
339            let response = match session.provider {
340                ProviderType::OpenAI => {
341                    let client = openai::Client::from_env();
342                    // For GPT-5.x reasoning models, enable reasoning with summary output
343                    // so we can see the model's thinking process
344                    let reasoning_params =
345                        if session.model.starts_with("gpt-5") || session.model.starts_with("o1") {
346                            Some(serde_json::json!({
347                                "reasoning": {
348                                    "effort": "medium",
349                                    "summary": "detailed"
350                                }
351                            }))
352                        } else {
353                            None
354                        };
355
356                    let mut builder = client
357                        .agent(&session.model)
358                        .preamble(&preamble)
359                        .max_tokens(4096)
360                        .tool(AnalyzeTool::new(project_path_buf.clone()))
361                        .tool(SecurityScanTool::new(project_path_buf.clone()))
362                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
363                        .tool(HadolintTool::new(project_path_buf.clone()))
364                        .tool(DclintTool::new(project_path_buf.clone()))
365                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
366                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
367                        .tool(TerraformInstallTool::new())
368                        .tool(ReadFileTool::new(project_path_buf.clone()))
369                        .tool(ListDirectoryTool::new(project_path_buf.clone()));
370
371                    // Add tools based on mode
372                    if is_planning {
373                        // Plan mode: read-only shell + plan creation tools
374                        builder = builder
375                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
376                            .tool(PlanCreateTool::new(project_path_buf.clone()))
377                            .tool(PlanListTool::new(project_path_buf.clone()));
378                    } else if is_generation {
379                        // Standard mode + generation query: all tools including file writes and plan execution
380                        let (mut write_file_tool, mut write_files_tool) =
381                            if let Some(ref client) = ide_client {
382                                (
383                                    WriteFileTool::new(project_path_buf.clone())
384                                        .with_ide_client(client.clone()),
385                                    WriteFilesTool::new(project_path_buf.clone())
386                                        .with_ide_client(client.clone()),
387                                )
388                            } else {
389                                (
390                                    WriteFileTool::new(project_path_buf.clone()),
391                                    WriteFilesTool::new(project_path_buf.clone()),
392                                )
393                            };
394                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
395                        if auto_accept_writes {
396                            write_file_tool = write_file_tool.without_confirmation();
397                            write_files_tool = write_files_tool.without_confirmation();
398                        }
399                        builder = builder
400                            .tool(write_file_tool)
401                            .tool(write_files_tool)
402                            .tool(ShellTool::new(project_path_buf.clone()))
403                            .tool(PlanListTool::new(project_path_buf.clone()))
404                            .tool(PlanNextTool::new(project_path_buf.clone()))
405                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
406                    }
407
408                    if let Some(params) = reasoning_params {
409                        builder = builder.additional_params(params);
410                    }
411
412                    let agent = builder.build();
413                    // Allow up to 50 tool call turns for complex generation tasks
414                    // Use hook to display tool calls as they happen
415                    // Pass conversation history for context continuity
416                    agent
417                        .prompt(&current_input)
418                        .with_history(&mut raw_chat_history)
419                        .with_hook(hook.clone())
420                        .multi_turn(50)
421                        .await
422                }
423                ProviderType::Anthropic => {
424                    let client = anthropic::Client::from_env();
425
426                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
427                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
428                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
429                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
430                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
431
432                    let mut builder = client
433                        .agent(&session.model)
434                        .preamble(&preamble)
435                        .max_tokens(4096)
436                        .tool(AnalyzeTool::new(project_path_buf.clone()))
437                        .tool(SecurityScanTool::new(project_path_buf.clone()))
438                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
439                        .tool(HadolintTool::new(project_path_buf.clone()))
440                        .tool(DclintTool::new(project_path_buf.clone()))
441                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
442                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
443                        .tool(TerraformInstallTool::new())
444                        .tool(ReadFileTool::new(project_path_buf.clone()))
445                        .tool(ListDirectoryTool::new(project_path_buf.clone()));
446
447                    // Add tools based on mode
448                    if is_planning {
449                        // Plan mode: read-only shell + plan creation tools
450                        builder = builder
451                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
452                            .tool(PlanCreateTool::new(project_path_buf.clone()))
453                            .tool(PlanListTool::new(project_path_buf.clone()));
454                    } else if is_generation {
455                        // Standard mode + generation query: all tools including file writes and plan execution
456                        let (mut write_file_tool, mut write_files_tool) =
457                            if let Some(ref client) = ide_client {
458                                (
459                                    WriteFileTool::new(project_path_buf.clone())
460                                        .with_ide_client(client.clone()),
461                                    WriteFilesTool::new(project_path_buf.clone())
462                                        .with_ide_client(client.clone()),
463                                )
464                            } else {
465                                (
466                                    WriteFileTool::new(project_path_buf.clone()),
467                                    WriteFilesTool::new(project_path_buf.clone()),
468                                )
469                            };
470                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
471                        if auto_accept_writes {
472                            write_file_tool = write_file_tool.without_confirmation();
473                            write_files_tool = write_files_tool.without_confirmation();
474                        }
475                        builder = builder
476                            .tool(write_file_tool)
477                            .tool(write_files_tool)
478                            .tool(ShellTool::new(project_path_buf.clone()))
479                            .tool(PlanListTool::new(project_path_buf.clone()))
480                            .tool(PlanNextTool::new(project_path_buf.clone()))
481                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
482                    }
483
484                    let agent = builder.build();
485
486                    // Allow up to 50 tool call turns for complex generation tasks
487                    // Use hook to display tool calls as they happen
488                    // Pass conversation history for context continuity
489                    agent
490                        .prompt(&current_input)
491                        .with_history(&mut raw_chat_history)
492                        .with_hook(hook.clone())
493                        .multi_turn(50)
494                        .await
495                }
496                ProviderType::Bedrock => {
497                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
498                    let client = rig_bedrock::client::Client::from_env();
499
500                    // Extended thinking for Claude models via Bedrock
501                    // This enables Claude to show its reasoning process before responding.
502                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
503                    // Extended thinking budget - reduced to help with rate limits
504                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
505                    let thinking_params = serde_json::json!({
506                        "thinking": {
507                            "type": "enabled",
508                            "budget_tokens": 8000
509                        }
510                    });
511
512                    let mut builder = client
513                        .agent(&session.model)
514                        .preamble(&preamble)
515                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
516                        .tool(AnalyzeTool::new(project_path_buf.clone()))
517                        .tool(SecurityScanTool::new(project_path_buf.clone()))
518                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
519                        .tool(HadolintTool::new(project_path_buf.clone()))
520                        .tool(DclintTool::new(project_path_buf.clone()))
521                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
522                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
523                        .tool(TerraformInstallTool::new())
524                        .tool(ReadFileTool::new(project_path_buf.clone()))
525                        .tool(ListDirectoryTool::new(project_path_buf.clone()));
526
527                    // Add tools based on mode
528                    if is_planning {
529                        // Plan mode: read-only shell + plan creation tools
530                        builder = builder
531                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
532                            .tool(PlanCreateTool::new(project_path_buf.clone()))
533                            .tool(PlanListTool::new(project_path_buf.clone()));
534                    } else if is_generation {
535                        // Standard mode + generation query: all tools including file writes and plan execution
536                        let (mut write_file_tool, mut write_files_tool) =
537                            if let Some(ref client) = ide_client {
538                                (
539                                    WriteFileTool::new(project_path_buf.clone())
540                                        .with_ide_client(client.clone()),
541                                    WriteFilesTool::new(project_path_buf.clone())
542                                        .with_ide_client(client.clone()),
543                                )
544                            } else {
545                                (
546                                    WriteFileTool::new(project_path_buf.clone()),
547                                    WriteFilesTool::new(project_path_buf.clone()),
548                                )
549                            };
550                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
551                        if auto_accept_writes {
552                            write_file_tool = write_file_tool.without_confirmation();
553                            write_files_tool = write_files_tool.without_confirmation();
554                        }
555                        builder = builder
556                            .tool(write_file_tool)
557                            .tool(write_files_tool)
558                            .tool(ShellTool::new(project_path_buf.clone()))
559                            .tool(PlanListTool::new(project_path_buf.clone()))
560                            .tool(PlanNextTool::new(project_path_buf.clone()))
561                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
562                    }
563
564                    // Add thinking params for extended reasoning
565                    builder = builder.additional_params(thinking_params);
566
567                    let agent = builder.build();
568
569                    // Use same multi-turn pattern as OpenAI/Anthropic
570                    agent
571                        .prompt(&current_input)
572                        .with_history(&mut raw_chat_history)
573                        .with_hook(hook.clone())
574                        .multi_turn(50)
575                        .await
576                }
577            };
578
579            match response {
580                Ok(text) => {
581                    // Show final response
582                    println!();
583                    ResponseFormatter::print_response(&text);
584
585                    // Track token usage - use actual from hook if available, else estimate
586                    let hook_usage = hook.get_usage().await;
587                    if hook_usage.has_data() {
588                        // Use actual token counts from API response
589                        session
590                            .token_usage
591                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
592                    } else {
593                        // Fall back to estimation when API doesn't provide usage
594                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
595                        let completion_tokens = TokenUsage::estimate_tokens(&text);
596                        session
597                            .token_usage
598                            .add_estimated(prompt_tokens, completion_tokens);
599                    }
600                    // Reset hook usage for next request batch
601                    hook.reset_usage().await;
602
603                    // Show context indicator like Forge: [model/~tokens]
604                    let model_short = session
605                        .model
606                        .split('/')
607                        .next_back()
608                        .unwrap_or(&session.model)
609                        .split(':')
610                        .next()
611                        .unwrap_or(&session.model);
612                    println!();
613                    println!(
614                        "  {}[{}/{}]{}",
615                        ui::colors::ansi::DIM,
616                        model_short,
617                        session.token_usage.format_compact(),
618                        ui::colors::ansi::RESET
619                    );
620
621                    // Extract tool calls from the hook state for history tracking
622                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
623                    let batch_tool_count = tool_calls.len();
624                    total_tool_calls += batch_tool_count;
625
626                    // Show tool call summary if significant
627                    if batch_tool_count > 10 {
628                        println!(
629                            "{}",
630                            format!(
631                                "  โœ“ Completed with {} tool calls ({} total this session)",
632                                batch_tool_count, total_tool_calls
633                            )
634                            .dimmed()
635                        );
636                    }
637
638                    // Add to conversation history with tool call records
639                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
640
641                    // Check if this heavy turn requires immediate compaction
642                    // This helps prevent context overflow in subsequent requests
643                    if conversation_history.needs_compaction() {
644                        println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
645                        if let Some(summary) = conversation_history.compact() {
646                            println!(
647                                "{}",
648                                format!("  โœ“ Compressed {} turns", summary.matches("Turn").count())
649                                    .dimmed()
650                            );
651                        }
652                    }
653
654                    // Also update legacy session history for compatibility
655                    session.history.push(("user".to_string(), input.clone()));
656                    session
657                        .history
658                        .push(("assistant".to_string(), text.clone()));
659
660                    // Check if plan_create was called - show interactive menu
661                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
662                        println!(); // Space before menu
663
664                        // Show the plan action menu (don't switch modes yet - let user choose)
665                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
666                            ui::PlanActionResult::ExecuteAutoAccept => {
667                                // Now switch to standard mode for execution
668                                if session.plan_mode.is_planning() {
669                                    session.plan_mode = session.plan_mode.toggle();
670                                }
671                                auto_accept_writes = true;
672                                pending_input = Some(format!(
673                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
674                                    plan_info.0
675                                ));
676                                succeeded = true;
677                            }
678                            ui::PlanActionResult::ExecuteWithReview => {
679                                // Now switch to standard mode for execution
680                                if session.plan_mode.is_planning() {
681                                    session.plan_mode = session.plan_mode.toggle();
682                                }
683                                pending_input = Some(format!(
684                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
685                                    plan_info.0
686                                ));
687                                succeeded = true;
688                            }
689                            ui::PlanActionResult::ChangePlan(feedback) => {
690                                // Stay in plan mode for modifications
691                                pending_input = Some(format!(
692                                    "Please modify the plan at '{}'. User feedback: {}",
693                                    plan_info.0, feedback
694                                ));
695                                succeeded = true;
696                            }
697                            ui::PlanActionResult::Cancel => {
698                                // Just complete normally, don't execute
699                                succeeded = true;
700                            }
701                        }
702                    } else {
703                        succeeded = true;
704                    }
705                }
706                Err(e) => {
707                    let err_str = e.to_string();
708
709                    println!();
710
711                    // Check if this is a max depth error - handle as checkpoint
712                    if err_str.contains("MaxDepth")
713                        || err_str.contains("max_depth")
714                        || err_str.contains("reached limit")
715                    {
716                        // Extract what was done before hitting the limit
717                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
718                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
719                        let batch_tool_count = completed_tools.len();
720                        total_tool_calls += batch_tool_count;
721
722                        eprintln!("{}", format!(
723                            "โš  Reached {} tool calls this batch ({} total). Maximum allowed: {}",
724                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
725                        ).yellow());
726
727                        // Check if we've hit the absolute maximum
728                        if total_tool_calls >= MAX_TOOL_CALLS {
729                            eprintln!(
730                                "{}",
731                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
732                                    .red()
733                            );
734                            eprintln!(
735                                "{}",
736                                "The task is too complex. Try breaking it into smaller parts."
737                                    .dimmed()
738                            );
739                            break;
740                        }
741
742                        // Ask user if they want to continue (unless auto-continue is enabled)
743                        let should_continue = if auto_continue_tools {
744                            eprintln!(
745                                "{}",
746                                "  Auto-continuing (you selected 'always')...".dimmed()
747                            );
748                            true
749                        } else {
750                            eprintln!(
751                                "{}",
752                                "Excessive tool calls used. Want to continue?".yellow()
753                            );
754                            eprintln!(
755                                "{}",
756                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
757                            );
758                            print!("  > ");
759                            let _ = std::io::Write::flush(&mut std::io::stdout());
760
761                            // Read user input
762                            let mut response = String::new();
763                            match std::io::stdin().read_line(&mut response) {
764                                Ok(_) => {
765                                    let resp = response.trim().to_lowercase();
766                                    if resp == "a" || resp == "always" {
767                                        auto_continue_tools = true;
768                                        true
769                                    } else {
770                                        resp == "y" || resp == "yes" || resp.is_empty()
771                                    }
772                                }
773                                Err(_) => false,
774                            }
775                        };
776
777                        if !should_continue {
778                            eprintln!(
779                                "{}",
780                                "Stopped by user. Type 'continue' to resume later.".dimmed()
781                            );
782                            // Add partial progress to history
783                            if !completed_tools.is_empty() {
784                                conversation_history.add_turn(
785                                    current_input.clone(),
786                                    format!(
787                                        "[Stopped at checkpoint - {} tools completed]",
788                                        batch_tool_count
789                                    ),
790                                    vec![],
791                                );
792                            }
793                            break;
794                        }
795
796                        // Continue from checkpoint
797                        eprintln!(
798                            "{}",
799                            format!(
800                                "  โ†’ Continuing... {} remaining tool calls available",
801                                MAX_TOOL_CALLS - total_tool_calls
802                            )
803                            .dimmed()
804                        );
805
806                        // Add partial progress to history (without duplicating tool calls)
807                        conversation_history.add_turn(
808                            current_input.clone(),
809                            format!(
810                                "[Checkpoint - {} tools completed, continuing...]",
811                                batch_tool_count
812                            ),
813                            vec![],
814                        );
815
816                        // Build continuation prompt
817                        current_input =
818                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
819
820                        // Brief delay before continuation
821                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
822                        continue; // Continue the loop without incrementing retry_attempt
823                    } else if err_str.contains("rate")
824                        || err_str.contains("Rate")
825                        || err_str.contains("429")
826                        || err_str.contains("Too many tokens")
827                        || err_str.contains("please wait")
828                        || err_str.contains("throttl")
829                        || err_str.contains("Throttl")
830                    {
831                        eprintln!("{}", "โš  Rate limited by API provider.".yellow());
832                        // Wait before retry for rate limits (longer wait for "too many tokens")
833                        retry_attempt += 1;
834                        let wait_secs = if err_str.contains("Too many tokens") {
835                            30
836                        } else {
837                            5
838                        };
839                        eprintln!(
840                            "{}",
841                            format!(
842                                "  Waiting {} seconds before retry ({}/{})...",
843                                wait_secs, retry_attempt, MAX_RETRIES
844                            )
845                            .dimmed()
846                        );
847                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
848                    } else if is_input_too_long_error(&err_str) {
849                        // Context too large - truncate raw_chat_history directly
850                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
851                        // because conversation_history may be empty/stale during errors
852                        eprintln!(
853                            "{}",
854                            "โš  Context too large for model. Truncating history...".yellow()
855                        );
856
857                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
858                        let old_msg_count = raw_chat_history.len();
859
860                        // Strategy: Keep only the last N messages (user/assistant pairs)
861                        // More aggressive truncation on each retry: 10 โ†’ 6 โ†’ 4 messages
862                        let keep_count = match retry_attempt {
863                            0 => 10,
864                            1 => 6,
865                            _ => 4,
866                        };
867
868                        if raw_chat_history.len() > keep_count {
869                            // Drain older messages, keep the most recent ones
870                            let drain_count = raw_chat_history.len() - keep_count;
871                            raw_chat_history.drain(0..drain_count);
872                        }
873
874                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
875                        eprintln!("{}", format!(
876                            "  โœ“ Truncated: {} messages (~{} tokens) โ†’ {} messages (~{} tokens)",
877                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
878                        ).green());
879
880                        // Also clear conversation_history to stay in sync
881                        conversation_history.clear();
882
883                        // Retry with truncated context
884                        retry_attempt += 1;
885                        if retry_attempt < MAX_RETRIES {
886                            eprintln!(
887                                "{}",
888                                format!(
889                                    "  โ†’ Retrying with truncated context ({}/{})...",
890                                    retry_attempt, MAX_RETRIES
891                                )
892                                .dimmed()
893                            );
894                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
895                        } else {
896                            eprintln!(
897                                "{}",
898                                "Context still too large after truncation. Try /clear to reset."
899                                    .red()
900                            );
901                            break;
902                        }
903                    } else if is_truncation_error(&err_str) {
904                        // Truncation error - try intelligent continuation
905                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
906                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
907
908                        // Count actually completed tools (not in-progress)
909                        let completed_count = completed_tools
910                            .iter()
911                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
912                            .count();
913                        let in_progress_count = completed_tools.len() - completed_count;
914
915                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
916                            // We have partial progress - continue from where we left off
917                            continuation_count += 1;
918                            let status_msg = if in_progress_count > 0 {
919                                format!(
920                                    "โš  Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
921                                    completed_count,
922                                    in_progress_count,
923                                    continuation_count,
924                                    MAX_CONTINUATIONS
925                                )
926                            } else {
927                                format!(
928                                    "โš  Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
929                                    completed_count, continuation_count, MAX_CONTINUATIONS
930                                )
931                            };
932                            eprintln!("{}", status_msg.yellow());
933
934                            // Add partial progress to conversation history
935                            // NOTE: We intentionally pass empty tool_calls here because the
936                            // continuation prompt already contains the detailed file list.
937                            // Including them in history would duplicate the context and waste tokens.
938                            conversation_history.add_turn(
939                                current_input.clone(),
940                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
941                                    completed_count, in_progress_count),
942                                vec![]  // Don't duplicate - continuation prompt has the details
943                            );
944
945                            // Check if we need compaction after adding this heavy turn
946                            // This is important for long multi-turn sessions with many tool calls
947                            if conversation_history.needs_compaction() {
948                                eprintln!(
949                                    "{}",
950                                    "  ๐Ÿ“ฆ Compacting history before continuation...".dimmed()
951                                );
952                                if let Some(summary) = conversation_history.compact() {
953                                    eprintln!(
954                                        "{}",
955                                        format!(
956                                            "  โœ“ Compressed {} turns",
957                                            summary.matches("Turn").count()
958                                        )
959                                        .dimmed()
960                                    );
961                                }
962                            }
963
964                            // Build continuation prompt with context
965                            current_input = build_continuation_prompt(
966                                &input,
967                                &completed_tools,
968                                &agent_thinking,
969                            );
970
971                            // Log continuation details for debugging
972                            eprintln!("{}", format!(
973                                "  โ†’ Continuing with {} files read, {} written, {} other actions tracked",
974                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
975                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
976                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
977                            ).dimmed());
978
979                            // Brief delay before continuation
980                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
981                            // Don't increment retry_attempt - this is progress via continuation
982                        } else if retry_attempt < MAX_RETRIES {
983                            // No tool calls completed - simple retry
984                            retry_attempt += 1;
985                            eprintln!(
986                                "{}",
987                                format!(
988                                    "โš  Response error (attempt {}/{}). Retrying...",
989                                    retry_attempt, MAX_RETRIES
990                                )
991                                .yellow()
992                            );
993                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
994                        } else {
995                            // Max retries/continuations reached
996                            eprintln!("{}", format!("Error: {}", e).red());
997                            if continuation_count >= MAX_CONTINUATIONS {
998                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
999                            } else {
1000                                eprintln!(
1001                                    "{}",
1002                                    "Max retries reached. The response may be too complex."
1003                                        .dimmed()
1004                                );
1005                            }
1006                            eprintln!(
1007                                "{}",
1008                                "Try breaking your request into smaller parts.".dimmed()
1009                            );
1010                            break;
1011                        }
1012                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1013                        // Timeout - simple retry
1014                        retry_attempt += 1;
1015                        if retry_attempt < MAX_RETRIES {
1016                            eprintln!(
1017                                "{}",
1018                                format!(
1019                                    "โš  Request timed out (attempt {}/{}). Retrying...",
1020                                    retry_attempt, MAX_RETRIES
1021                                )
1022                                .yellow()
1023                            );
1024                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1025                        } else {
1026                            eprintln!("{}", "Request timed out. Please try again.".red());
1027                            break;
1028                        }
1029                    } else {
1030                        // Unknown error - show details and break
1031                        eprintln!("{}", format!("Error: {}", e).red());
1032                        if continuation_count > 0 {
1033                            eprintln!(
1034                                "{}",
1035                                format!(
1036                                    "  (occurred during continuation attempt {})",
1037                                    continuation_count
1038                                )
1039                                .dimmed()
1040                            );
1041                        }
1042                        eprintln!("{}", "Error details for debugging:".dimmed());
1043                        eprintln!(
1044                            "{}",
1045                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1046                                .dimmed()
1047                        );
1048                        eprintln!(
1049                            "{}",
1050                            format!(
1051                                "  - continuation_count: {}/{}",
1052                                continuation_count, MAX_CONTINUATIONS
1053                            )
1054                            .dimmed()
1055                        );
1056                        break;
1057                    }
1058                }
1059            }
1060        }
1061        println!();
1062    }
1063
1064    Ok(())
1065}
1066
1067/// Extract tool call records from the hook state for history tracking
1068async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1069    let state = hook.state();
1070    let guard = state.lock().await;
1071
1072    guard
1073        .tool_calls
1074        .iter()
1075        .enumerate()
1076        .map(|(i, tc)| {
1077            let result = if tc.is_running {
1078                // Tool was in progress when error occurred
1079                "[IN PROGRESS - may need to be re-run]".to_string()
1080            } else if let Some(output) = &tc.output {
1081                truncate_string(output, 200)
1082            } else {
1083                "completed".to_string()
1084            };
1085
1086            ToolCallRecord {
1087                tool_name: tc.name.clone(),
1088                args_summary: truncate_string(&tc.args, 100),
1089                result_summary: result,
1090                // Generate a unique tool ID for proper message pairing
1091                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1092                // Mark read-only tools as droppable (their results can be re-fetched)
1093                droppable: matches!(
1094                    tc.name.as_str(),
1095                    "read_file" | "list_directory" | "analyze_project"
1096                ),
1097            }
1098        })
1099        .collect()
1100}
1101
1102/// Extract any agent thinking/messages from the hook for context
1103async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1104    let state = hook.state();
1105    let guard = state.lock().await;
1106    guard.agent_messages.clone()
1107}
1108
1109/// Helper to truncate strings for summaries
1110fn truncate_string(s: &str, max_len: usize) -> String {
1111    if s.len() <= max_len {
1112        s.to_string()
1113    } else {
1114        format!("{}...", &s[..max_len.saturating_sub(3)])
1115    }
1116}
1117
1118/// Estimate token count from raw rig Messages
1119/// This is used for context length management to prevent "input too long" errors.
1120/// Estimates ~4 characters per token.
1121fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
1122    use rig::completion::message::{AssistantContent, UserContent};
1123
1124    messages
1125        .iter()
1126        .map(|msg| -> usize {
1127            match msg {
1128                rig::completion::Message::User { content } => {
1129                    content
1130                        .iter()
1131                        .map(|c| -> usize {
1132                            match c {
1133                                UserContent::Text(t) => t.text.len() / 4,
1134                                _ => 100, // Estimate for images/documents
1135                            }
1136                        })
1137                        .sum::<usize>()
1138                }
1139                rig::completion::Message::Assistant { content, .. } => {
1140                    content
1141                        .iter()
1142                        .map(|c| -> usize {
1143                            match c {
1144                                AssistantContent::Text(t) => t.text.len() / 4,
1145                                AssistantContent::ToolCall(tc) => {
1146                                    // arguments is serde_json::Value, convert to string for length estimate
1147                                    let args_len = tc.function.arguments.to_string().len();
1148                                    (tc.function.name.len() + args_len) / 4
1149                                }
1150                                _ => 100,
1151                            }
1152                        })
1153                        .sum::<usize>()
1154                }
1155            }
1156        })
1157        .sum()
1158}
1159
1160/// Find a plan_create tool call in the list and extract plan info
1161/// Returns (plan_path, task_count) if found
1162fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
1163    for tc in tool_calls {
1164        if tc.tool_name == "plan_create" {
1165            // Try to parse the result_summary as JSON to extract plan_path
1166            // Note: result_summary may be truncated, so we have multiple fallbacks
1167            let plan_path =
1168                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
1169                    result
1170                        .get("plan_path")
1171                        .and_then(|v| v.as_str())
1172                        .map(|s| s.to_string())
1173                } else {
1174                    None
1175                };
1176
1177            // If JSON parsing failed, find the most recently created plan file
1178            // This is more reliable than trying to reconstruct the path from truncated args
1179            let plan_path = plan_path.unwrap_or_else(|| {
1180                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
1181            });
1182
1183            // Count tasks by reading the plan file directly
1184            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
1185
1186            return Some((plan_path, task_count));
1187        }
1188    }
1189    None
1190}
1191
1192/// Find the most recently created plan file in the plans directory
1193fn find_most_recent_plan_file() -> Option<String> {
1194    let plans_dir = std::env::current_dir().ok()?.join("plans");
1195    if !plans_dir.exists() {
1196        return None;
1197    }
1198
1199    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
1200
1201    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
1202        let path = entry.path();
1203        if path.extension().is_some_and(|e| e == "md")
1204            && let Ok(metadata) = entry.metadata()
1205            && let Ok(modified) = metadata.modified()
1206            && newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true)
1207        {
1208            newest = Some((path, modified));
1209        }
1210    }
1211
1212    newest.map(|(path, _)| {
1213        // Return relative path
1214        path.strip_prefix(std::env::current_dir().unwrap_or_default())
1215            .map(|p| p.display().to_string())
1216            .unwrap_or_else(|_| path.display().to_string())
1217    })
1218}
1219
1220/// Count tasks (checkbox items) in a plan file
1221fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
1222    use regex::Regex;
1223
1224    // Try both relative and absolute paths
1225    let path = std::path::Path::new(plan_path);
1226    let content = if path.exists() {
1227        std::fs::read_to_string(path).ok()?
1228    } else {
1229        // Try with current directory
1230        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
1231    };
1232
1233    // Count task checkboxes: - [ ], - [x], - [~], - [!]
1234    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
1235    let count = content
1236        .lines()
1237        .filter(|line| task_regex.is_match(line))
1238        .count();
1239
1240    Some(count)
1241}
1242
1243/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
1244fn is_truncation_error(err_str: &str) -> bool {
1245    err_str.contains("JsonError")
1246        || err_str.contains("EOF while parsing")
1247        || err_str.contains("JSON")
1248        || err_str.contains("unexpected end")
1249}
1250
1251/// Check if error is "input too long" - context exceeds model limit
1252/// This happens when conversation history grows beyond what the model can handle.
1253/// Recovery: compact history and retry with reduced context.
1254fn is_input_too_long_error(err_str: &str) -> bool {
1255    err_str.contains("too long")
1256        || err_str.contains("Too long")
1257        || err_str.contains("context length")
1258        || err_str.contains("maximum context")
1259        || err_str.contains("exceeds the model")
1260        || err_str.contains("Input is too long")
1261}
1262
1263/// Build a continuation prompt that tells the AI what work was completed
1264/// and asks it to continue from where it left off
1265fn build_continuation_prompt(
1266    original_task: &str,
1267    completed_tools: &[ToolCallRecord],
1268    agent_thinking: &[String],
1269) -> String {
1270    use std::collections::HashSet;
1271
1272    // Group tools by type and extract unique files read
1273    let mut files_read: HashSet<String> = HashSet::new();
1274    let mut files_written: HashSet<String> = HashSet::new();
1275    let mut dirs_listed: HashSet<String> = HashSet::new();
1276    let mut other_tools: Vec<String> = Vec::new();
1277    let mut in_progress: Vec<String> = Vec::new();
1278
1279    for tool in completed_tools {
1280        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
1281
1282        if is_in_progress {
1283            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
1284            continue;
1285        }
1286
1287        match tool.tool_name.as_str() {
1288            "read_file" => {
1289                // Extract path from args
1290                files_read.insert(tool.args_summary.clone());
1291            }
1292            "write_file" | "write_files" => {
1293                files_written.insert(tool.args_summary.clone());
1294            }
1295            "list_directory" => {
1296                dirs_listed.insert(tool.args_summary.clone());
1297            }
1298            _ => {
1299                other_tools.push(format!(
1300                    "{}({})",
1301                    tool.tool_name,
1302                    truncate_string(&tool.args_summary, 40)
1303                ));
1304            }
1305        }
1306    }
1307
1308    let mut prompt = format!(
1309        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
1310        Original task: {}\n",
1311        truncate_string(original_task, 500)
1312    );
1313
1314    // Show files already read - CRITICAL for preventing re-reads
1315    if !files_read.is_empty() {
1316        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
1317        for file in &files_read {
1318            prompt.push_str(&format!("  - {}\n", file));
1319        }
1320    }
1321
1322    if !dirs_listed.is_empty() {
1323        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
1324        for dir in &dirs_listed {
1325            prompt.push_str(&format!("  - {}\n", dir));
1326        }
1327    }
1328
1329    if !files_written.is_empty() {
1330        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
1331        for file in &files_written {
1332            prompt.push_str(&format!("  - {}\n", file));
1333        }
1334    }
1335
1336    if !other_tools.is_empty() {
1337        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
1338        for tool in other_tools.iter().take(20) {
1339            prompt.push_str(&format!("  - {}\n", tool));
1340        }
1341        if other_tools.len() > 20 {
1342            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
1343        }
1344    }
1345
1346    if !in_progress.is_empty() {
1347        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
1348        for tool in &in_progress {
1349            prompt.push_str(&format!("  โš  {}\n", tool));
1350        }
1351    }
1352
1353    // Include last thinking context if available
1354    if let Some(last_thought) = agent_thinking.last() {
1355        prompt.push_str(&format!(
1356            "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
1357            truncate_string(last_thought, 300)
1358        ));
1359    }
1360
1361    prompt.push_str("\n== INSTRUCTIONS ==\n");
1362    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
1363    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
1364    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
1365    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
1366    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
1367
1368    prompt
1369}
1370
1371/// Run a single query and return the response
1372pub async fn run_query(
1373    project_path: &Path,
1374    query: &str,
1375    provider: ProviderType,
1376    model: Option<String>,
1377) -> AgentResult<String> {
1378    use tools::*;
1379
1380    let project_path_buf = project_path.to_path_buf();
1381    // Select prompt based on query type (analysis vs generation)
1382    // For single queries (non-interactive), always use standard mode
1383    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
1384    let is_generation = prompts::is_generation_query(query);
1385
1386    match provider {
1387        ProviderType::OpenAI => {
1388            let client = openai::Client::from_env();
1389            let model_name = model.as_deref().unwrap_or("gpt-5.2");
1390
1391            // For GPT-5.x reasoning models, enable reasoning with summary output
1392            let reasoning_params =
1393                if model_name.starts_with("gpt-5") || model_name.starts_with("o1") {
1394                    Some(serde_json::json!({
1395                        "reasoning": {
1396                            "effort": "medium",
1397                            "summary": "detailed"
1398                        }
1399                    }))
1400                } else {
1401                    None
1402                };
1403
1404            let mut builder = client
1405                .agent(model_name)
1406                .preamble(&preamble)
1407                .max_tokens(4096)
1408                .tool(AnalyzeTool::new(project_path_buf.clone()))
1409                .tool(SecurityScanTool::new(project_path_buf.clone()))
1410                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
1411                .tool(HadolintTool::new(project_path_buf.clone()))
1412                .tool(DclintTool::new(project_path_buf.clone()))
1413                .tool(TerraformFmtTool::new(project_path_buf.clone()))
1414                .tool(TerraformValidateTool::new(project_path_buf.clone()))
1415                .tool(TerraformInstallTool::new())
1416                .tool(ReadFileTool::new(project_path_buf.clone()))
1417                .tool(ListDirectoryTool::new(project_path_buf.clone()));
1418
1419            // Add generation tools if this is a generation query
1420            if is_generation {
1421                builder = builder
1422                    .tool(WriteFileTool::new(project_path_buf.clone()))
1423                    .tool(WriteFilesTool::new(project_path_buf.clone()))
1424                    .tool(ShellTool::new(project_path_buf.clone()));
1425            }
1426
1427            if let Some(params) = reasoning_params {
1428                builder = builder.additional_params(params);
1429            }
1430
1431            let agent = builder.build();
1432
1433            agent
1434                .prompt(query)
1435                .multi_turn(50)
1436                .await
1437                .map_err(|e| AgentError::ProviderError(e.to_string()))
1438        }
1439        ProviderType::Anthropic => {
1440            let client = anthropic::Client::from_env();
1441            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
1442
1443            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
1444            // handle thinking blocks in multi-turn conversations with tool use.
1445            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
1446
1447            let mut builder = client
1448                .agent(model_name)
1449                .preamble(&preamble)
1450                .max_tokens(4096)
1451                .tool(AnalyzeTool::new(project_path_buf.clone()))
1452                .tool(SecurityScanTool::new(project_path_buf.clone()))
1453                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
1454                .tool(HadolintTool::new(project_path_buf.clone()))
1455                .tool(DclintTool::new(project_path_buf.clone()))
1456                .tool(TerraformFmtTool::new(project_path_buf.clone()))
1457                .tool(TerraformValidateTool::new(project_path_buf.clone()))
1458                .tool(TerraformInstallTool::new())
1459                .tool(ReadFileTool::new(project_path_buf.clone()))
1460                .tool(ListDirectoryTool::new(project_path_buf.clone()));
1461
1462            // Add generation tools if this is a generation query
1463            if is_generation {
1464                builder = builder
1465                    .tool(WriteFileTool::new(project_path_buf.clone()))
1466                    .tool(WriteFilesTool::new(project_path_buf.clone()))
1467                    .tool(ShellTool::new(project_path_buf.clone()));
1468            }
1469
1470            let agent = builder.build();
1471
1472            agent
1473                .prompt(query)
1474                .multi_turn(50)
1475                .await
1476                .map_err(|e| AgentError::ProviderError(e.to_string()))
1477        }
1478        ProviderType::Bedrock => {
1479            // Bedrock provider via rig-bedrock - same pattern as Anthropic
1480            let client = rig_bedrock::client::Client::from_env();
1481            let model_name = model
1482                .as_deref()
1483                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
1484
1485            // Extended thinking for Claude via Bedrock
1486            let thinking_params = serde_json::json!({
1487                "thinking": {
1488                    "type": "enabled",
1489                    "budget_tokens": 16000
1490                }
1491            });
1492
1493            let mut builder = client
1494                .agent(model_name)
1495                .preamble(&preamble)
1496                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
1497                .tool(AnalyzeTool::new(project_path_buf.clone()))
1498                .tool(SecurityScanTool::new(project_path_buf.clone()))
1499                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
1500                .tool(HadolintTool::new(project_path_buf.clone()))
1501                .tool(DclintTool::new(project_path_buf.clone()))
1502                .tool(TerraformFmtTool::new(project_path_buf.clone()))
1503                .tool(TerraformValidateTool::new(project_path_buf.clone()))
1504                .tool(TerraformInstallTool::new())
1505                .tool(ReadFileTool::new(project_path_buf.clone()))
1506                .tool(ListDirectoryTool::new(project_path_buf.clone()));
1507
1508            // Add generation tools if this is a generation query
1509            if is_generation {
1510                builder = builder
1511                    .tool(WriteFileTool::new(project_path_buf.clone()))
1512                    .tool(WriteFilesTool::new(project_path_buf.clone()))
1513                    .tool(ShellTool::new(project_path_buf.clone()));
1514            }
1515
1516            let agent = builder.additional_params(thinking_params).build();
1517
1518            agent
1519                .prompt(query)
1520                .multi_turn(50)
1521                .await
1522                .map_err(|e| AgentError::ProviderError(e.to_string()))
1523        }
1524    }
1525}