syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod prompts;
38pub mod session;
39pub mod tools;
40pub mod ui;
41use colored::Colorize;
42use commands::TokenUsage;
43use history::{ConversationHistory, ToolCallRecord};
44use ide::IdeClient;
45use rig::{
46    client::{CompletionClient, ProviderClient},
47    completion::Prompt,
48    providers::{anthropic, openai},
49};
50use session::{ChatSession, PlanMode};
51use std::path::Path;
52use std::sync::Arc;
53use tokio::sync::Mutex as TokioMutex;
54use ui::{ResponseFormatter, ToolDisplayHook};
55
56/// Provider type for the agent
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
58pub enum ProviderType {
59    #[default]
60    OpenAI,
61    Anthropic,
62    Bedrock,
63}
64
65impl std::fmt::Display for ProviderType {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        match self {
68            ProviderType::OpenAI => write!(f, "openai"),
69            ProviderType::Anthropic => write!(f, "anthropic"),
70            ProviderType::Bedrock => write!(f, "bedrock"),
71        }
72    }
73}
74
75impl std::str::FromStr for ProviderType {
76    type Err = String;
77
78    fn from_str(s: &str) -> Result<Self, Self::Err> {
79        match s.to_lowercase().as_str() {
80            "openai" => Ok(ProviderType::OpenAI),
81            "anthropic" => Ok(ProviderType::Anthropic),
82            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
83            _ => Err(format!(
84                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
85                s
86            )),
87        }
88    }
89}
90
91/// Error types for the agent
92#[derive(Debug, thiserror::Error)]
93pub enum AgentError {
94    #[error("Missing API key. Set {0} environment variable.")]
95    MissingApiKey(String),
96
97    #[error("Provider error: {0}")]
98    ProviderError(String),
99
100    #[error("Tool error: {0}")]
101    ToolError(String),
102}
103
104pub type AgentResult<T> = Result<T, AgentError>;
105
106/// Get the system prompt for the agent based on query type and plan mode
107fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
108    // In planning mode, use the read-only exploration prompt
109    if plan_mode.is_planning() {
110        return prompts::get_planning_prompt(project_path);
111    }
112
113    if let Some(q) = query {
114        // First check if it's a code development task (highest priority)
115        if prompts::is_code_development_query(q) {
116            return prompts::get_code_development_prompt(project_path);
117        }
118        // Then check if it's DevOps generation (Docker, Terraform, Helm)
119        if prompts::is_generation_query(q) {
120            return prompts::get_devops_prompt(project_path);
121        }
122    }
123    // Default to analysis prompt
124    prompts::get_analysis_prompt(project_path)
125}
126
127/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
128pub async fn run_interactive(
129    project_path: &Path,
130    provider: ProviderType,
131    model: Option<String>,
132) -> AgentResult<()> {
133    use tools::*;
134
135    let mut session = ChatSession::new(project_path, provider, model);
136
137    // Initialize conversation history with compaction support
138    let mut conversation_history = ConversationHistory::new();
139
140    // Initialize IDE client for native diff viewing
141    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
142        let mut client = IdeClient::new().await;
143        if client.is_ide_available() {
144            match client.connect().await {
145                Ok(()) => {
146                    println!(
147                        "{} Connected to {} IDE companion",
148                        "โœ“".green(),
149                        client.ide_name().unwrap_or("VS Code")
150                    );
151                    Some(Arc::new(TokioMutex::new(client)))
152                }
153                Err(e) => {
154                    // IDE detected but companion not running or connection failed
155                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
156                    None
157                }
158            }
159        } else {
160            println!(
161                "{} No IDE detected (TERM_PROGRAM={})",
162                "ยท".dimmed(),
163                std::env::var("TERM_PROGRAM").unwrap_or_default()
164            );
165            None
166        }
167    };
168
169    // Load API key from config file to env if not already set
170    ChatSession::load_api_key_to_env(session.provider);
171
172    // Check if API key is configured, prompt if not
173    if !ChatSession::has_api_key(session.provider) {
174        ChatSession::prompt_api_key(session.provider)?;
175    }
176
177    session.print_banner();
178
179    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
180    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
181    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
182
183    // Pending input for auto-continue after plan creation
184    let mut pending_input: Option<String> = None;
185    // Auto-accept mode for plan execution (skips write confirmations)
186    let mut auto_accept_writes = false;
187
188    loop {
189        // Show conversation status if we have history
190        if !conversation_history.is_empty() {
191            println!(
192                "{}",
193                format!("  ๐Ÿ’ฌ Context: {}", conversation_history.status()).dimmed()
194            );
195        }
196
197        // Check for pending input (from plan menu selection)
198        let input = if let Some(pending) = pending_input.take() {
199            // Show what we're executing
200            println!("{} {}", "โ†’".cyan(), pending.dimmed());
201            pending
202        } else {
203            // New user turn - reset auto-accept mode from previous plan execution
204            auto_accept_writes = false;
205
206            // Read user input (returns InputResult)
207            let input_result = match session.read_input() {
208                Ok(result) => result,
209                Err(_) => break,
210            };
211
212            // Handle the input result
213            match input_result {
214                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
215                ui::InputResult::Cancel | ui::InputResult::Exit => break,
216                ui::InputResult::TogglePlanMode => {
217                    // Toggle planning mode - minimal feedback, no extra newlines
218                    let new_mode = session.toggle_plan_mode();
219                    if new_mode.is_planning() {
220                        println!("{}", "โ˜… plan mode".yellow());
221                    } else {
222                        println!("{}", "โ–ถ standard mode".green());
223                    }
224                    continue;
225                }
226            }
227        };
228
229        if input.is_empty() {
230            continue;
231        }
232
233        // Check for commands
234        if ChatSession::is_command(&input) {
235            // Special handling for /clear to also clear conversation history
236            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
237                conversation_history.clear();
238                raw_chat_history.clear();
239            }
240            match session.process_command(&input) {
241                Ok(true) => continue,
242                Ok(false) => break, // /exit
243                Err(e) => {
244                    eprintln!("{}", format!("Error: {}", e).red());
245                    continue;
246                }
247            }
248        }
249
250        // Check API key before making request (in case provider changed)
251        if !ChatSession::has_api_key(session.provider) {
252            eprintln!(
253                "{}",
254                "No API key configured. Use /provider to set one.".yellow()
255            );
256            continue;
257        }
258
259        // Check if compaction is needed before making the request
260        if conversation_history.needs_compaction() {
261            println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
262            if let Some(summary) = conversation_history.compact() {
263                println!(
264                    "{}",
265                    format!("  โœ“ Compressed {} turns", summary.matches("Turn").count()).dimmed()
266                );
267            }
268        }
269
270        // Pre-request check: estimate if we're approaching context limit
271        // Check raw_chat_history (actual messages) not conversation_history
272        // because conversation_history may be out of sync
273        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
274            + input.len() / 4  // New input
275            + 5000; // System prompt overhead estimate
276
277        if estimated_input_tokens > 150_000 {
278            println!(
279                "{}",
280                "  โš  Large context detected. Pre-truncating...".yellow()
281            );
282
283            let old_count = raw_chat_history.len();
284            // Keep last 20 messages when approaching limit
285            if raw_chat_history.len() > 20 {
286                let drain_count = raw_chat_history.len() - 20;
287                raw_chat_history.drain(0..drain_count);
288                conversation_history.clear(); // Stay in sync
289                println!(
290                    "{}",
291                    format!(
292                        "  โœ“ Truncated {} โ†’ {} messages",
293                        old_count,
294                        raw_chat_history.len()
295                    )
296                    .dimmed()
297                );
298            }
299        }
300
301        // Retry loop for automatic error recovery
302        // MAX_RETRIES is for failures without progress
303        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
304        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
305        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
306        const MAX_RETRIES: u32 = 3;
307        const MAX_CONTINUATIONS: u32 = 10;
308        const TOOL_CALL_CHECKPOINT: usize = 50;
309        const MAX_TOOL_CALLS: usize = 300;
310        let mut retry_attempt = 0;
311        let mut continuation_count = 0;
312        let mut total_tool_calls: usize = 0;
313        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
314        let mut current_input = input.clone();
315        let mut succeeded = false;
316
317        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
318            // Log if this is a continuation attempt
319            if continuation_count > 0 {
320                eprintln!(
321                    "{}",
322                    format!("  ๐Ÿ“ก Sending continuation request...").dimmed()
323                );
324            }
325
326            // Create hook for Claude Code style tool display
327            let hook = ToolDisplayHook::new();
328
329            let project_path_buf = session.project_path.clone();
330            // Select prompt based on query type (analysis vs generation) and plan mode
331            let preamble = get_system_prompt(
332                &session.project_path,
333                Some(&current_input),
334                session.plan_mode,
335            );
336            let is_generation = prompts::is_generation_query(&current_input);
337            let is_planning = session.plan_mode.is_planning();
338
339            // Note: using raw_chat_history directly which preserves Reasoning blocks
340            // This is needed for extended thinking to work with multi-turn conversations
341
342            let response = match session.provider {
343                ProviderType::OpenAI => {
344                    let client = openai::Client::from_env();
345                    // For GPT-5.x reasoning models, enable reasoning with summary output
346                    // so we can see the model's thinking process
347                    let reasoning_params =
348                        if session.model.starts_with("gpt-5") || session.model.starts_with("o1") {
349                            Some(serde_json::json!({
350                                "reasoning": {
351                                    "effort": "medium",
352                                    "summary": "detailed"
353                                }
354                            }))
355                        } else {
356                            None
357                        };
358
359                    let mut builder = client
360                        .agent(&session.model)
361                        .preamble(&preamble)
362                        .max_tokens(4096)
363                        .tool(AnalyzeTool::new(project_path_buf.clone()))
364                        .tool(SecurityScanTool::new(project_path_buf.clone()))
365                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
366                        .tool(HadolintTool::new(project_path_buf.clone()))
367                        .tool(DclintTool::new(project_path_buf.clone()))
368                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
369                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
370                        .tool(TerraformInstallTool::new())
371                        .tool(ReadFileTool::new(project_path_buf.clone()))
372                        .tool(ListDirectoryTool::new(project_path_buf.clone()));
373
374                    // Add tools based on mode
375                    if is_planning {
376                        // Plan mode: read-only shell + plan creation tools
377                        builder = builder
378                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
379                            .tool(PlanCreateTool::new(project_path_buf.clone()))
380                            .tool(PlanListTool::new(project_path_buf.clone()));
381                    } else if is_generation {
382                        // Standard mode + generation query: all tools including file writes and plan execution
383                        let (mut write_file_tool, mut write_files_tool) =
384                            if let Some(ref client) = ide_client {
385                                (
386                                    WriteFileTool::new(project_path_buf.clone())
387                                        .with_ide_client(client.clone()),
388                                    WriteFilesTool::new(project_path_buf.clone())
389                                        .with_ide_client(client.clone()),
390                                )
391                            } else {
392                                (
393                                    WriteFileTool::new(project_path_buf.clone()),
394                                    WriteFilesTool::new(project_path_buf.clone()),
395                                )
396                            };
397                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
398                        if auto_accept_writes {
399                            write_file_tool = write_file_tool.without_confirmation();
400                            write_files_tool = write_files_tool.without_confirmation();
401                        }
402                        builder = builder
403                            .tool(write_file_tool)
404                            .tool(write_files_tool)
405                            .tool(ShellTool::new(project_path_buf.clone()))
406                            .tool(PlanListTool::new(project_path_buf.clone()))
407                            .tool(PlanNextTool::new(project_path_buf.clone()))
408                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
409                    }
410
411                    if let Some(params) = reasoning_params {
412                        builder = builder.additional_params(params);
413                    }
414
415                    let agent = builder.build();
416                    // Allow up to 50 tool call turns for complex generation tasks
417                    // Use hook to display tool calls as they happen
418                    // Pass conversation history for context continuity
419                    agent
420                        .prompt(&current_input)
421                        .with_history(&mut raw_chat_history)
422                        .with_hook(hook.clone())
423                        .multi_turn(50)
424                        .await
425                }
426                ProviderType::Anthropic => {
427                    let client = anthropic::Client::from_env();
428
429                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
430                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
431                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
432                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
433                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
434
435                    let mut builder = client
436                        .agent(&session.model)
437                        .preamble(&preamble)
438                        .max_tokens(4096)
439                        .tool(AnalyzeTool::new(project_path_buf.clone()))
440                        .tool(SecurityScanTool::new(project_path_buf.clone()))
441                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
442                        .tool(HadolintTool::new(project_path_buf.clone()))
443                        .tool(DclintTool::new(project_path_buf.clone()))
444                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
445                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
446                        .tool(TerraformInstallTool::new())
447                        .tool(ReadFileTool::new(project_path_buf.clone()))
448                        .tool(ListDirectoryTool::new(project_path_buf.clone()));
449
450                    // Add tools based on mode
451                    if is_planning {
452                        // Plan mode: read-only shell + plan creation tools
453                        builder = builder
454                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
455                            .tool(PlanCreateTool::new(project_path_buf.clone()))
456                            .tool(PlanListTool::new(project_path_buf.clone()));
457                    } else if is_generation {
458                        // Standard mode + generation query: all tools including file writes and plan execution
459                        let (mut write_file_tool, mut write_files_tool) =
460                            if let Some(ref client) = ide_client {
461                                (
462                                    WriteFileTool::new(project_path_buf.clone())
463                                        .with_ide_client(client.clone()),
464                                    WriteFilesTool::new(project_path_buf.clone())
465                                        .with_ide_client(client.clone()),
466                                )
467                            } else {
468                                (
469                                    WriteFileTool::new(project_path_buf.clone()),
470                                    WriteFilesTool::new(project_path_buf.clone()),
471                                )
472                            };
473                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
474                        if auto_accept_writes {
475                            write_file_tool = write_file_tool.without_confirmation();
476                            write_files_tool = write_files_tool.without_confirmation();
477                        }
478                        builder = builder
479                            .tool(write_file_tool)
480                            .tool(write_files_tool)
481                            .tool(ShellTool::new(project_path_buf.clone()))
482                            .tool(PlanListTool::new(project_path_buf.clone()))
483                            .tool(PlanNextTool::new(project_path_buf.clone()))
484                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
485                    }
486
487                    let agent = builder.build();
488
489                    // Allow up to 50 tool call turns for complex generation tasks
490                    // Use hook to display tool calls as they happen
491                    // Pass conversation history for context continuity
492                    agent
493                        .prompt(&current_input)
494                        .with_history(&mut raw_chat_history)
495                        .with_hook(hook.clone())
496                        .multi_turn(50)
497                        .await
498                }
499                ProviderType::Bedrock => {
500                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
501                    let client = rig_bedrock::client::Client::from_env();
502
503                    // Extended thinking for Claude models via Bedrock
504                    // This enables Claude to show its reasoning process before responding.
505                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
506                    // Extended thinking budget - reduced to help with rate limits
507                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
508                    let thinking_params = serde_json::json!({
509                        "thinking": {
510                            "type": "enabled",
511                            "budget_tokens": 8000
512                        }
513                    });
514
515                    let mut builder = client
516                        .agent(&session.model)
517                        .preamble(&preamble)
518                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
519                        .tool(AnalyzeTool::new(project_path_buf.clone()))
520                        .tool(SecurityScanTool::new(project_path_buf.clone()))
521                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
522                        .tool(HadolintTool::new(project_path_buf.clone()))
523                        .tool(DclintTool::new(project_path_buf.clone()))
524                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
525                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
526                        .tool(TerraformInstallTool::new())
527                        .tool(ReadFileTool::new(project_path_buf.clone()))
528                        .tool(ListDirectoryTool::new(project_path_buf.clone()));
529
530                    // Add tools based on mode
531                    if is_planning {
532                        // Plan mode: read-only shell + plan creation tools
533                        builder = builder
534                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
535                            .tool(PlanCreateTool::new(project_path_buf.clone()))
536                            .tool(PlanListTool::new(project_path_buf.clone()));
537                    } else if is_generation {
538                        // Standard mode + generation query: all tools including file writes and plan execution
539                        let (mut write_file_tool, mut write_files_tool) =
540                            if let Some(ref client) = ide_client {
541                                (
542                                    WriteFileTool::new(project_path_buf.clone())
543                                        .with_ide_client(client.clone()),
544                                    WriteFilesTool::new(project_path_buf.clone())
545                                        .with_ide_client(client.clone()),
546                                )
547                            } else {
548                                (
549                                    WriteFileTool::new(project_path_buf.clone()),
550                                    WriteFilesTool::new(project_path_buf.clone()),
551                                )
552                            };
553                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
554                        if auto_accept_writes {
555                            write_file_tool = write_file_tool.without_confirmation();
556                            write_files_tool = write_files_tool.without_confirmation();
557                        }
558                        builder = builder
559                            .tool(write_file_tool)
560                            .tool(write_files_tool)
561                            .tool(ShellTool::new(project_path_buf.clone()))
562                            .tool(PlanListTool::new(project_path_buf.clone()))
563                            .tool(PlanNextTool::new(project_path_buf.clone()))
564                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
565                    }
566
567                    // Add thinking params for extended reasoning
568                    builder = builder.additional_params(thinking_params);
569
570                    let agent = builder.build();
571
572                    // Use same multi-turn pattern as OpenAI/Anthropic
573                    agent
574                        .prompt(&current_input)
575                        .with_history(&mut raw_chat_history)
576                        .with_hook(hook.clone())
577                        .multi_turn(50)
578                        .await
579                }
580            };
581
582            match response {
583                Ok(text) => {
584                    // Show final response
585                    println!();
586                    ResponseFormatter::print_response(&text);
587
588                    // Track token usage - use actual from hook if available, else estimate
589                    let hook_usage = hook.get_usage().await;
590                    if hook_usage.has_data() {
591                        // Use actual token counts from API response
592                        session
593                            .token_usage
594                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
595                    } else {
596                        // Fall back to estimation when API doesn't provide usage
597                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
598                        let completion_tokens = TokenUsage::estimate_tokens(&text);
599                        session
600                            .token_usage
601                            .add_estimated(prompt_tokens, completion_tokens);
602                    }
603                    // Reset hook usage for next request batch
604                    hook.reset_usage().await;
605
606                    // Show context indicator like Forge: [model/~tokens]
607                    let model_short = session
608                        .model
609                        .split('/')
610                        .last()
611                        .unwrap_or(&session.model)
612                        .split(':')
613                        .next()
614                        .unwrap_or(&session.model);
615                    println!();
616                    println!(
617                        "  {}[{}/{}]{}",
618                        ui::colors::ansi::DIM,
619                        model_short,
620                        session.token_usage.format_compact(),
621                        ui::colors::ansi::RESET
622                    );
623
624                    // Extract tool calls from the hook state for history tracking
625                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
626                    let batch_tool_count = tool_calls.len();
627                    total_tool_calls += batch_tool_count;
628
629                    // Show tool call summary if significant
630                    if batch_tool_count > 10 {
631                        println!(
632                            "{}",
633                            format!(
634                                "  โœ“ Completed with {} tool calls ({} total this session)",
635                                batch_tool_count, total_tool_calls
636                            )
637                            .dimmed()
638                        );
639                    }
640
641                    // Add to conversation history with tool call records
642                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
643
644                    // Check if this heavy turn requires immediate compaction
645                    // This helps prevent context overflow in subsequent requests
646                    if conversation_history.needs_compaction() {
647                        println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
648                        if let Some(summary) = conversation_history.compact() {
649                            println!(
650                                "{}",
651                                format!("  โœ“ Compressed {} turns", summary.matches("Turn").count())
652                                    .dimmed()
653                            );
654                        }
655                    }
656
657                    // Also update legacy session history for compatibility
658                    session.history.push(("user".to_string(), input.clone()));
659                    session
660                        .history
661                        .push(("assistant".to_string(), text.clone()));
662
663                    // Check if plan_create was called - show interactive menu
664                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
665                        println!(); // Space before menu
666
667                        // Show the plan action menu (don't switch modes yet - let user choose)
668                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
669                            ui::PlanActionResult::ExecuteAutoAccept => {
670                                // Now switch to standard mode for execution
671                                if session.plan_mode.is_planning() {
672                                    session.plan_mode = session.plan_mode.toggle();
673                                }
674                                auto_accept_writes = true;
675                                pending_input = Some(format!(
676                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
677                                    plan_info.0
678                                ));
679                                succeeded = true;
680                            }
681                            ui::PlanActionResult::ExecuteWithReview => {
682                                // Now switch to standard mode for execution
683                                if session.plan_mode.is_planning() {
684                                    session.plan_mode = session.plan_mode.toggle();
685                                }
686                                pending_input = Some(format!(
687                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
688                                    plan_info.0
689                                ));
690                                succeeded = true;
691                            }
692                            ui::PlanActionResult::ChangePlan(feedback) => {
693                                // Stay in plan mode for modifications
694                                pending_input = Some(format!(
695                                    "Please modify the plan at '{}'. User feedback: {}",
696                                    plan_info.0, feedback
697                                ));
698                                succeeded = true;
699                            }
700                            ui::PlanActionResult::Cancel => {
701                                // Just complete normally, don't execute
702                                succeeded = true;
703                            }
704                        }
705                    } else {
706                        succeeded = true;
707                    }
708                }
709                Err(e) => {
710                    let err_str = e.to_string();
711
712                    println!();
713
714                    // Check if this is a max depth error - handle as checkpoint
715                    if err_str.contains("MaxDepth")
716                        || err_str.contains("max_depth")
717                        || err_str.contains("reached limit")
718                    {
719                        // Extract what was done before hitting the limit
720                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
721                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
722                        let batch_tool_count = completed_tools.len();
723                        total_tool_calls += batch_tool_count;
724
725                        eprintln!("{}", format!(
726                            "โš  Reached {} tool calls this batch ({} total). Maximum allowed: {}",
727                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
728                        ).yellow());
729
730                        // Check if we've hit the absolute maximum
731                        if total_tool_calls >= MAX_TOOL_CALLS {
732                            eprintln!(
733                                "{}",
734                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
735                                    .red()
736                            );
737                            eprintln!(
738                                "{}",
739                                "The task is too complex. Try breaking it into smaller parts."
740                                    .dimmed()
741                            );
742                            break;
743                        }
744
745                        // Ask user if they want to continue (unless auto-continue is enabled)
746                        let should_continue = if auto_continue_tools {
747                            eprintln!(
748                                "{}",
749                                "  Auto-continuing (you selected 'always')...".dimmed()
750                            );
751                            true
752                        } else {
753                            eprintln!(
754                                "{}",
755                                "Excessive tool calls used. Want to continue?".yellow()
756                            );
757                            eprintln!(
758                                "{}",
759                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
760                            );
761                            print!("  > ");
762                            let _ = std::io::Write::flush(&mut std::io::stdout());
763
764                            // Read user input
765                            let mut response = String::new();
766                            match std::io::stdin().read_line(&mut response) {
767                                Ok(_) => {
768                                    let resp = response.trim().to_lowercase();
769                                    if resp == "a" || resp == "always" {
770                                        auto_continue_tools = true;
771                                        true
772                                    } else {
773                                        resp == "y" || resp == "yes" || resp.is_empty()
774                                    }
775                                }
776                                Err(_) => false,
777                            }
778                        };
779
780                        if !should_continue {
781                            eprintln!(
782                                "{}",
783                                "Stopped by user. Type 'continue' to resume later.".dimmed()
784                            );
785                            // Add partial progress to history
786                            if !completed_tools.is_empty() {
787                                conversation_history.add_turn(
788                                    current_input.clone(),
789                                    format!(
790                                        "[Stopped at checkpoint - {} tools completed]",
791                                        batch_tool_count
792                                    ),
793                                    vec![],
794                                );
795                            }
796                            break;
797                        }
798
799                        // Continue from checkpoint
800                        eprintln!(
801                            "{}",
802                            format!(
803                                "  โ†’ Continuing... {} remaining tool calls available",
804                                MAX_TOOL_CALLS - total_tool_calls
805                            )
806                            .dimmed()
807                        );
808
809                        // Add partial progress to history (without duplicating tool calls)
810                        conversation_history.add_turn(
811                            current_input.clone(),
812                            format!(
813                                "[Checkpoint - {} tools completed, continuing...]",
814                                batch_tool_count
815                            ),
816                            vec![],
817                        );
818
819                        // Build continuation prompt
820                        current_input =
821                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
822
823                        // Brief delay before continuation
824                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
825                        continue; // Continue the loop without incrementing retry_attempt
826                    } else if err_str.contains("rate")
827                        || err_str.contains("Rate")
828                        || err_str.contains("429")
829                        || err_str.contains("Too many tokens")
830                        || err_str.contains("please wait")
831                        || err_str.contains("throttl")
832                        || err_str.contains("Throttl")
833                    {
834                        eprintln!("{}", "โš  Rate limited by API provider.".yellow());
835                        // Wait before retry for rate limits (longer wait for "too many tokens")
836                        retry_attempt += 1;
837                        let wait_secs = if err_str.contains("Too many tokens") {
838                            30
839                        } else {
840                            5
841                        };
842                        eprintln!(
843                            "{}",
844                            format!(
845                                "  Waiting {} seconds before retry ({}/{})...",
846                                wait_secs, retry_attempt, MAX_RETRIES
847                            )
848                            .dimmed()
849                        );
850                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
851                    } else if is_input_too_long_error(&err_str) {
852                        // Context too large - truncate raw_chat_history directly
853                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
854                        // because conversation_history may be empty/stale during errors
855                        eprintln!(
856                            "{}",
857                            "โš  Context too large for model. Truncating history...".yellow()
858                        );
859
860                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
861                        let old_msg_count = raw_chat_history.len();
862
863                        // Strategy: Keep only the last N messages (user/assistant pairs)
864                        // More aggressive truncation on each retry: 10 โ†’ 6 โ†’ 4 messages
865                        let keep_count = match retry_attempt {
866                            0 => 10,
867                            1 => 6,
868                            _ => 4,
869                        };
870
871                        if raw_chat_history.len() > keep_count {
872                            // Drain older messages, keep the most recent ones
873                            let drain_count = raw_chat_history.len() - keep_count;
874                            raw_chat_history.drain(0..drain_count);
875                        }
876
877                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
878                        eprintln!("{}", format!(
879                            "  โœ“ Truncated: {} messages (~{} tokens) โ†’ {} messages (~{} tokens)",
880                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
881                        ).green());
882
883                        // Also clear conversation_history to stay in sync
884                        conversation_history.clear();
885
886                        // Retry with truncated context
887                        retry_attempt += 1;
888                        if retry_attempt < MAX_RETRIES {
889                            eprintln!(
890                                "{}",
891                                format!(
892                                    "  โ†’ Retrying with truncated context ({}/{})...",
893                                    retry_attempt, MAX_RETRIES
894                                )
895                                .dimmed()
896                            );
897                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
898                        } else {
899                            eprintln!(
900                                "{}",
901                                "Context still too large after truncation. Try /clear to reset."
902                                    .red()
903                            );
904                            break;
905                        }
906                    } else if is_truncation_error(&err_str) {
907                        // Truncation error - try intelligent continuation
908                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
909                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
910
911                        // Count actually completed tools (not in-progress)
912                        let completed_count = completed_tools
913                            .iter()
914                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
915                            .count();
916                        let in_progress_count = completed_tools.len() - completed_count;
917
918                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
919                            // We have partial progress - continue from where we left off
920                            continuation_count += 1;
921                            let status_msg = if in_progress_count > 0 {
922                                format!(
923                                    "โš  Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
924                                    completed_count,
925                                    in_progress_count,
926                                    continuation_count,
927                                    MAX_CONTINUATIONS
928                                )
929                            } else {
930                                format!(
931                                    "โš  Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
932                                    completed_count, continuation_count, MAX_CONTINUATIONS
933                                )
934                            };
935                            eprintln!("{}", status_msg.yellow());
936
937                            // Add partial progress to conversation history
938                            // NOTE: We intentionally pass empty tool_calls here because the
939                            // continuation prompt already contains the detailed file list.
940                            // Including them in history would duplicate the context and waste tokens.
941                            conversation_history.add_turn(
942                                current_input.clone(),
943                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
944                                    completed_count, in_progress_count),
945                                vec![]  // Don't duplicate - continuation prompt has the details
946                            );
947
948                            // Check if we need compaction after adding this heavy turn
949                            // This is important for long multi-turn sessions with many tool calls
950                            if conversation_history.needs_compaction() {
951                                eprintln!(
952                                    "{}",
953                                    "  ๐Ÿ“ฆ Compacting history before continuation...".dimmed()
954                                );
955                                if let Some(summary) = conversation_history.compact() {
956                                    eprintln!(
957                                        "{}",
958                                        format!(
959                                            "  โœ“ Compressed {} turns",
960                                            summary.matches("Turn").count()
961                                        )
962                                        .dimmed()
963                                    );
964                                }
965                            }
966
967                            // Build continuation prompt with context
968                            current_input = build_continuation_prompt(
969                                &input,
970                                &completed_tools,
971                                &agent_thinking,
972                            );
973
974                            // Log continuation details for debugging
975                            eprintln!("{}", format!(
976                                "  โ†’ Continuing with {} files read, {} written, {} other actions tracked",
977                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
978                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
979                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
980                            ).dimmed());
981
982                            // Brief delay before continuation
983                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
984                            // Don't increment retry_attempt - this is progress via continuation
985                        } else if retry_attempt < MAX_RETRIES {
986                            // No tool calls completed - simple retry
987                            retry_attempt += 1;
988                            eprintln!(
989                                "{}",
990                                format!(
991                                    "โš  Response error (attempt {}/{}). Retrying...",
992                                    retry_attempt, MAX_RETRIES
993                                )
994                                .yellow()
995                            );
996                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
997                        } else {
998                            // Max retries/continuations reached
999                            eprintln!("{}", format!("Error: {}", e).red());
1000                            if continuation_count >= MAX_CONTINUATIONS {
1001                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
1002                            } else {
1003                                eprintln!(
1004                                    "{}",
1005                                    "Max retries reached. The response may be too complex."
1006                                        .dimmed()
1007                                );
1008                            }
1009                            eprintln!(
1010                                "{}",
1011                                "Try breaking your request into smaller parts.".dimmed()
1012                            );
1013                            break;
1014                        }
1015                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1016                        // Timeout - simple retry
1017                        retry_attempt += 1;
1018                        if retry_attempt < MAX_RETRIES {
1019                            eprintln!(
1020                                "{}",
1021                                format!(
1022                                    "โš  Request timed out (attempt {}/{}). Retrying...",
1023                                    retry_attempt, MAX_RETRIES
1024                                )
1025                                .yellow()
1026                            );
1027                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1028                        } else {
1029                            eprintln!("{}", "Request timed out. Please try again.".red());
1030                            break;
1031                        }
1032                    } else {
1033                        // Unknown error - show details and break
1034                        eprintln!("{}", format!("Error: {}", e).red());
1035                        if continuation_count > 0 {
1036                            eprintln!(
1037                                "{}",
1038                                format!(
1039                                    "  (occurred during continuation attempt {})",
1040                                    continuation_count
1041                                )
1042                                .dimmed()
1043                            );
1044                        }
1045                        eprintln!("{}", "Error details for debugging:".dimmed());
1046                        eprintln!(
1047                            "{}",
1048                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1049                                .dimmed()
1050                        );
1051                        eprintln!(
1052                            "{}",
1053                            format!(
1054                                "  - continuation_count: {}/{}",
1055                                continuation_count, MAX_CONTINUATIONS
1056                            )
1057                            .dimmed()
1058                        );
1059                        break;
1060                    }
1061                }
1062            }
1063        }
1064        println!();
1065    }
1066
1067    Ok(())
1068}
1069
1070/// Extract tool call records from the hook state for history tracking
1071async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1072    let state = hook.state();
1073    let guard = state.lock().await;
1074
1075    guard
1076        .tool_calls
1077        .iter()
1078        .enumerate()
1079        .map(|(i, tc)| {
1080            let result = if tc.is_running {
1081                // Tool was in progress when error occurred
1082                "[IN PROGRESS - may need to be re-run]".to_string()
1083            } else if let Some(output) = &tc.output {
1084                truncate_string(output, 200)
1085            } else {
1086                "completed".to_string()
1087            };
1088
1089            ToolCallRecord {
1090                tool_name: tc.name.clone(),
1091                args_summary: truncate_string(&tc.args, 100),
1092                result_summary: result,
1093                // Generate a unique tool ID for proper message pairing
1094                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1095                // Mark read-only tools as droppable (their results can be re-fetched)
1096                droppable: matches!(
1097                    tc.name.as_str(),
1098                    "read_file" | "list_directory" | "analyze_project"
1099                ),
1100            }
1101        })
1102        .collect()
1103}
1104
1105/// Extract any agent thinking/messages from the hook for context
1106async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1107    let state = hook.state();
1108    let guard = state.lock().await;
1109    guard.agent_messages.clone()
1110}
1111
1112/// Helper to truncate strings for summaries
1113fn truncate_string(s: &str, max_len: usize) -> String {
1114    if s.len() <= max_len {
1115        s.to_string()
1116    } else {
1117        format!("{}...", &s[..max_len.saturating_sub(3)])
1118    }
1119}
1120
1121/// Estimate token count from raw rig Messages
1122/// This is used for context length management to prevent "input too long" errors.
1123/// Estimates ~4 characters per token.
1124fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
1125    use rig::completion::message::{AssistantContent, UserContent};
1126
1127    messages
1128        .iter()
1129        .map(|msg| -> usize {
1130            match msg {
1131                rig::completion::Message::User { content } => {
1132                    content
1133                        .iter()
1134                        .map(|c| -> usize {
1135                            match c {
1136                                UserContent::Text(t) => t.text.len() / 4,
1137                                _ => 100, // Estimate for images/documents
1138                            }
1139                        })
1140                        .sum::<usize>()
1141                }
1142                rig::completion::Message::Assistant { content, .. } => {
1143                    content
1144                        .iter()
1145                        .map(|c| -> usize {
1146                            match c {
1147                                AssistantContent::Text(t) => t.text.len() / 4,
1148                                AssistantContent::ToolCall(tc) => {
1149                                    // arguments is serde_json::Value, convert to string for length estimate
1150                                    let args_len = tc.function.arguments.to_string().len();
1151                                    (tc.function.name.len() + args_len) / 4
1152                                }
1153                                _ => 100,
1154                            }
1155                        })
1156                        .sum::<usize>()
1157                }
1158            }
1159        })
1160        .sum()
1161}
1162
1163/// Find a plan_create tool call in the list and extract plan info
1164/// Returns (plan_path, task_count) if found
1165fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
1166    for tc in tool_calls {
1167        if tc.tool_name == "plan_create" {
1168            // Try to parse the result_summary as JSON to extract plan_path
1169            // Note: result_summary may be truncated, so we have multiple fallbacks
1170            let plan_path =
1171                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
1172                    result
1173                        .get("plan_path")
1174                        .and_then(|v| v.as_str())
1175                        .map(|s| s.to_string())
1176                } else {
1177                    None
1178                };
1179
1180            // If JSON parsing failed, find the most recently created plan file
1181            // This is more reliable than trying to reconstruct the path from truncated args
1182            let plan_path = plan_path.unwrap_or_else(|| {
1183                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
1184            });
1185
1186            // Count tasks by reading the plan file directly
1187            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
1188
1189            return Some((plan_path, task_count));
1190        }
1191    }
1192    None
1193}
1194
1195/// Find the most recently created plan file in the plans directory
1196fn find_most_recent_plan_file() -> Option<String> {
1197    let plans_dir = std::env::current_dir().ok()?.join("plans");
1198    if !plans_dir.exists() {
1199        return None;
1200    }
1201
1202    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
1203
1204    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
1205        let path = entry.path();
1206        if path.extension().map(|e| e == "md").unwrap_or(false) {
1207            if let Ok(metadata) = entry.metadata() {
1208                if let Ok(modified) = metadata.modified() {
1209                    if newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true) {
1210                        newest = Some((path, modified));
1211                    }
1212                }
1213            }
1214        }
1215    }
1216
1217    newest.map(|(path, _)| {
1218        // Return relative path
1219        path.strip_prefix(std::env::current_dir().unwrap_or_default())
1220            .map(|p| p.display().to_string())
1221            .unwrap_or_else(|_| path.display().to_string())
1222    })
1223}
1224
1225/// Count tasks (checkbox items) in a plan file
1226fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
1227    use regex::Regex;
1228
1229    // Try both relative and absolute paths
1230    let path = std::path::Path::new(plan_path);
1231    let content = if path.exists() {
1232        std::fs::read_to_string(path).ok()?
1233    } else {
1234        // Try with current directory
1235        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
1236    };
1237
1238    // Count task checkboxes: - [ ], - [x], - [~], - [!]
1239    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
1240    let count = content
1241        .lines()
1242        .filter(|line| task_regex.is_match(line))
1243        .count();
1244
1245    Some(count)
1246}
1247
1248/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
1249fn is_truncation_error(err_str: &str) -> bool {
1250    err_str.contains("JsonError")
1251        || err_str.contains("EOF while parsing")
1252        || err_str.contains("JSON")
1253        || err_str.contains("unexpected end")
1254}
1255
1256/// Check if error is "input too long" - context exceeds model limit
1257/// This happens when conversation history grows beyond what the model can handle.
1258/// Recovery: compact history and retry with reduced context.
1259fn is_input_too_long_error(err_str: &str) -> bool {
1260    err_str.contains("too long")
1261        || err_str.contains("Too long")
1262        || err_str.contains("context length")
1263        || err_str.contains("maximum context")
1264        || err_str.contains("exceeds the model")
1265        || err_str.contains("Input is too long")
1266}
1267
1268/// Build a continuation prompt that tells the AI what work was completed
1269/// and asks it to continue from where it left off
1270fn build_continuation_prompt(
1271    original_task: &str,
1272    completed_tools: &[ToolCallRecord],
1273    agent_thinking: &[String],
1274) -> String {
1275    use std::collections::HashSet;
1276
1277    // Group tools by type and extract unique files read
1278    let mut files_read: HashSet<String> = HashSet::new();
1279    let mut files_written: HashSet<String> = HashSet::new();
1280    let mut dirs_listed: HashSet<String> = HashSet::new();
1281    let mut other_tools: Vec<String> = Vec::new();
1282    let mut in_progress: Vec<String> = Vec::new();
1283
1284    for tool in completed_tools {
1285        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
1286
1287        if is_in_progress {
1288            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
1289            continue;
1290        }
1291
1292        match tool.tool_name.as_str() {
1293            "read_file" => {
1294                // Extract path from args
1295                files_read.insert(tool.args_summary.clone());
1296            }
1297            "write_file" | "write_files" => {
1298                files_written.insert(tool.args_summary.clone());
1299            }
1300            "list_directory" => {
1301                dirs_listed.insert(tool.args_summary.clone());
1302            }
1303            _ => {
1304                other_tools.push(format!(
1305                    "{}({})",
1306                    tool.tool_name,
1307                    truncate_string(&tool.args_summary, 40)
1308                ));
1309            }
1310        }
1311    }
1312
1313    let mut prompt = format!(
1314        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
1315        Original task: {}\n",
1316        truncate_string(original_task, 500)
1317    );
1318
1319    // Show files already read - CRITICAL for preventing re-reads
1320    if !files_read.is_empty() {
1321        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
1322        for file in &files_read {
1323            prompt.push_str(&format!("  - {}\n", file));
1324        }
1325    }
1326
1327    if !dirs_listed.is_empty() {
1328        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
1329        for dir in &dirs_listed {
1330            prompt.push_str(&format!("  - {}\n", dir));
1331        }
1332    }
1333
1334    if !files_written.is_empty() {
1335        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
1336        for file in &files_written {
1337            prompt.push_str(&format!("  - {}\n", file));
1338        }
1339    }
1340
1341    if !other_tools.is_empty() {
1342        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
1343        for tool in other_tools.iter().take(20) {
1344            prompt.push_str(&format!("  - {}\n", tool));
1345        }
1346        if other_tools.len() > 20 {
1347            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
1348        }
1349    }
1350
1351    if !in_progress.is_empty() {
1352        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
1353        for tool in &in_progress {
1354            prompt.push_str(&format!("  โš  {}\n", tool));
1355        }
1356    }
1357
1358    // Include last thinking context if available
1359    if !agent_thinking.is_empty() {
1360        if let Some(last_thought) = agent_thinking.last() {
1361            prompt.push_str(&format!(
1362                "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
1363                truncate_string(last_thought, 300)
1364            ));
1365        }
1366    }
1367
1368    prompt.push_str("\n== INSTRUCTIONS ==\n");
1369    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
1370    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
1371    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
1372    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
1373    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
1374
1375    prompt
1376}
1377
1378/// Run a single query and return the response
1379pub async fn run_query(
1380    project_path: &Path,
1381    query: &str,
1382    provider: ProviderType,
1383    model: Option<String>,
1384) -> AgentResult<String> {
1385    use tools::*;
1386
1387    let project_path_buf = project_path.to_path_buf();
1388    // Select prompt based on query type (analysis vs generation)
1389    // For single queries (non-interactive), always use standard mode
1390    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
1391    let is_generation = prompts::is_generation_query(query);
1392
1393    match provider {
1394        ProviderType::OpenAI => {
1395            let client = openai::Client::from_env();
1396            let model_name = model.as_deref().unwrap_or("gpt-5.2");
1397
1398            // For GPT-5.x reasoning models, enable reasoning with summary output
1399            let reasoning_params =
1400                if model_name.starts_with("gpt-5") || model_name.starts_with("o1") {
1401                    Some(serde_json::json!({
1402                        "reasoning": {
1403                            "effort": "medium",
1404                            "summary": "detailed"
1405                        }
1406                    }))
1407                } else {
1408                    None
1409                };
1410
1411            let mut builder = client
1412                .agent(model_name)
1413                .preamble(&preamble)
1414                .max_tokens(4096)
1415                .tool(AnalyzeTool::new(project_path_buf.clone()))
1416                .tool(SecurityScanTool::new(project_path_buf.clone()))
1417                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
1418                .tool(HadolintTool::new(project_path_buf.clone()))
1419                .tool(DclintTool::new(project_path_buf.clone()))
1420                .tool(TerraformFmtTool::new(project_path_buf.clone()))
1421                .tool(TerraformValidateTool::new(project_path_buf.clone()))
1422                .tool(TerraformInstallTool::new())
1423                .tool(ReadFileTool::new(project_path_buf.clone()))
1424                .tool(ListDirectoryTool::new(project_path_buf.clone()));
1425
1426            // Add generation tools if this is a generation query
1427            if is_generation {
1428                builder = builder
1429                    .tool(WriteFileTool::new(project_path_buf.clone()))
1430                    .tool(WriteFilesTool::new(project_path_buf.clone()))
1431                    .tool(ShellTool::new(project_path_buf.clone()));
1432            }
1433
1434            if let Some(params) = reasoning_params {
1435                builder = builder.additional_params(params);
1436            }
1437
1438            let agent = builder.build();
1439
1440            agent
1441                .prompt(query)
1442                .multi_turn(50)
1443                .await
1444                .map_err(|e| AgentError::ProviderError(e.to_string()))
1445        }
1446        ProviderType::Anthropic => {
1447            let client = anthropic::Client::from_env();
1448            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
1449
1450            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
1451            // handle thinking blocks in multi-turn conversations with tool use.
1452            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
1453
1454            let mut builder = client
1455                .agent(model_name)
1456                .preamble(&preamble)
1457                .max_tokens(4096)
1458                .tool(AnalyzeTool::new(project_path_buf.clone()))
1459                .tool(SecurityScanTool::new(project_path_buf.clone()))
1460                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
1461                .tool(HadolintTool::new(project_path_buf.clone()))
1462                .tool(DclintTool::new(project_path_buf.clone()))
1463                .tool(TerraformFmtTool::new(project_path_buf.clone()))
1464                .tool(TerraformValidateTool::new(project_path_buf.clone()))
1465                .tool(TerraformInstallTool::new())
1466                .tool(ReadFileTool::new(project_path_buf.clone()))
1467                .tool(ListDirectoryTool::new(project_path_buf.clone()));
1468
1469            // Add generation tools if this is a generation query
1470            if is_generation {
1471                builder = builder
1472                    .tool(WriteFileTool::new(project_path_buf.clone()))
1473                    .tool(WriteFilesTool::new(project_path_buf.clone()))
1474                    .tool(ShellTool::new(project_path_buf.clone()));
1475            }
1476
1477            let agent = builder.build();
1478
1479            agent
1480                .prompt(query)
1481                .multi_turn(50)
1482                .await
1483                .map_err(|e| AgentError::ProviderError(e.to_string()))
1484        }
1485        ProviderType::Bedrock => {
1486            // Bedrock provider via rig-bedrock - same pattern as Anthropic
1487            let client = rig_bedrock::client::Client::from_env();
1488            let model_name = model
1489                .as_deref()
1490                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
1491
1492            // Extended thinking for Claude via Bedrock
1493            let thinking_params = serde_json::json!({
1494                "thinking": {
1495                    "type": "enabled",
1496                    "budget_tokens": 16000
1497                }
1498            });
1499
1500            let mut builder = client
1501                .agent(model_name)
1502                .preamble(&preamble)
1503                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
1504                .tool(AnalyzeTool::new(project_path_buf.clone()))
1505                .tool(SecurityScanTool::new(project_path_buf.clone()))
1506                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
1507                .tool(HadolintTool::new(project_path_buf.clone()))
1508                .tool(DclintTool::new(project_path_buf.clone()))
1509                .tool(TerraformFmtTool::new(project_path_buf.clone()))
1510                .tool(TerraformValidateTool::new(project_path_buf.clone()))
1511                .tool(TerraformInstallTool::new())
1512                .tool(ReadFileTool::new(project_path_buf.clone()))
1513                .tool(ListDirectoryTool::new(project_path_buf.clone()));
1514
1515            // Add generation tools if this is a generation query
1516            if is_generation {
1517                builder = builder
1518                    .tool(WriteFileTool::new(project_path_buf.clone()))
1519                    .tool(WriteFilesTool::new(project_path_buf.clone()))
1520                    .tool(ShellTool::new(project_path_buf.clone()));
1521            }
1522
1523            let agent = builder.additional_params(thinking_params).build();
1524
1525            agent
1526                .prompt(query)
1527                .multi_turn(50)
1528                .await
1529                .map_err(|e| AgentError::ProviderError(e.to_string()))
1530        }
1531    }
1532}