syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod persistence;
38pub mod prompts;
39pub mod session;
40pub mod tools;
41pub mod ui;
42use colored::Colorize;
43use commands::TokenUsage;
44use history::{ConversationHistory, ToolCallRecord};
45use ide::IdeClient;
46use rig::{
47    client::{CompletionClient, ProviderClient},
48    completion::Prompt,
49    providers::{anthropic, openai},
50};
51use session::{ChatSession, PlanMode};
52use std::path::Path;
53use std::sync::Arc;
54use tokio::sync::Mutex as TokioMutex;
55use ui::{ResponseFormatter, ToolDisplayHook};
56
57/// Provider type for the agent
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ProviderType {
60    #[default]
61    OpenAI,
62    Anthropic,
63    Bedrock,
64}
65
66impl std::fmt::Display for ProviderType {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            ProviderType::OpenAI => write!(f, "openai"),
70            ProviderType::Anthropic => write!(f, "anthropic"),
71            ProviderType::Bedrock => write!(f, "bedrock"),
72        }
73    }
74}
75
76impl std::str::FromStr for ProviderType {
77    type Err = String;
78
79    fn from_str(s: &str) -> Result<Self, Self::Err> {
80        match s.to_lowercase().as_str() {
81            "openai" => Ok(ProviderType::OpenAI),
82            "anthropic" => Ok(ProviderType::Anthropic),
83            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
84            _ => Err(format!(
85                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
86                s
87            )),
88        }
89    }
90}
91
92/// Error types for the agent
93#[derive(Debug, thiserror::Error)]
94pub enum AgentError {
95    #[error("Missing API key. Set {0} environment variable.")]
96    MissingApiKey(String),
97
98    #[error("Provider error: {0}")]
99    ProviderError(String),
100
101    #[error("Tool error: {0}")]
102    ToolError(String),
103}
104
105pub type AgentResult<T> = Result<T, AgentError>;
106
107/// Get the system prompt for the agent based on query type and plan mode
108fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
109    // In planning mode, use the read-only exploration prompt
110    if plan_mode.is_planning() {
111        return prompts::get_planning_prompt(project_path);
112    }
113
114    if let Some(q) = query {
115        // First check if it's a code development task (highest priority)
116        if prompts::is_code_development_query(q) {
117            return prompts::get_code_development_prompt(project_path);
118        }
119        // Then check if it's DevOps generation (Docker, Terraform, Helm)
120        if prompts::is_generation_query(q) {
121            return prompts::get_devops_prompt(project_path, Some(q));
122        }
123    }
124    // Default to analysis prompt
125    prompts::get_analysis_prompt(project_path)
126}
127
128/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
129pub async fn run_interactive(
130    project_path: &Path,
131    provider: ProviderType,
132    model: Option<String>,
133) -> AgentResult<()> {
134    use tools::*;
135
136    let mut session = ChatSession::new(project_path, provider, model);
137
138    // Shared background process manager for Prometheus port-forwards
139    let bg_manager = Arc::new(BackgroundProcessManager::new());
140
141    // Terminal layout for split screen is disabled for now - see notes below
142    // let terminal_layout = ui::TerminalLayout::new();
143    // let layout_state = terminal_layout.state();
144
145    // Initialize conversation history with compaction support
146    let mut conversation_history = ConversationHistory::new();
147
148    // Initialize IDE client for native diff viewing
149    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
150        let mut client = IdeClient::new().await;
151        if client.is_ide_available() {
152            match client.connect().await {
153                Ok(()) => {
154                    println!(
155                        "{} Connected to {} IDE companion",
156                        "โœ“".green(),
157                        client.ide_name().unwrap_or("VS Code")
158                    );
159                    Some(Arc::new(TokioMutex::new(client)))
160                }
161                Err(e) => {
162                    // IDE detected but companion not running or connection failed
163                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
164                    None
165                }
166            }
167        } else {
168            println!(
169                "{} No IDE detected (TERM_PROGRAM={})",
170                "ยท".dimmed(),
171                std::env::var("TERM_PROGRAM").unwrap_or_default()
172            );
173            None
174        }
175    };
176
177    // Load API key from config file to env if not already set
178    ChatSession::load_api_key_to_env(session.provider);
179
180    // Check if API key is configured, prompt if not
181    if !ChatSession::has_api_key(session.provider) {
182        ChatSession::prompt_api_key(session.provider)?;
183    }
184
185    session.print_banner();
186
187    // NOTE: Terminal layout with ANSI scroll regions is disabled for now.
188    // The scroll region approach conflicts with the existing input/output flow.
189    // TODO: Implement proper scroll region support that integrates with the input handler.
190    // For now, we rely on the pause/resume mechanism in progress indicator.
191    //
192    // if let Err(e) = terminal_layout.init() {
193    //     eprintln!(
194    //         "{}",
195    //         format!("Note: Terminal layout initialization failed: {}. Using fallback mode.", e)
196    //             .dimmed()
197    //     );
198    // }
199
200    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
201    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
202    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
203
204    // Pending input for auto-continue after plan creation
205    let mut pending_input: Option<String> = None;
206    // Auto-accept mode for plan execution (skips write confirmations)
207    let mut auto_accept_writes = false;
208
209    // Initialize session recorder for conversation persistence
210    let mut session_recorder = persistence::SessionRecorder::new(project_path);
211
212    loop {
213        // Show conversation status if we have history
214        if !conversation_history.is_empty() {
215            println!(
216                "{}",
217                format!("  ๐Ÿ’ฌ Context: {}", conversation_history.status()).dimmed()
218            );
219        }
220
221        // Check for pending input (from plan menu selection)
222        let input = if let Some(pending) = pending_input.take() {
223            // Show what we're executing
224            println!("{} {}", "โ†’".cyan(), pending.dimmed());
225            pending
226        } else {
227            // New user turn - reset auto-accept mode from previous plan execution
228            auto_accept_writes = false;
229
230            // Read user input (returns InputResult)
231            let input_result = match session.read_input() {
232                Ok(result) => result,
233                Err(_) => break,
234            };
235
236            // Handle the input result
237            match input_result {
238                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
239                ui::InputResult::Cancel | ui::InputResult::Exit => break,
240                ui::InputResult::TogglePlanMode => {
241                    // Toggle planning mode - minimal feedback, no extra newlines
242                    let new_mode = session.toggle_plan_mode();
243                    if new_mode.is_planning() {
244                        println!("{}", "โ˜… plan mode".yellow());
245                    } else {
246                        println!("{}", "โ–ถ standard mode".green());
247                    }
248                    continue;
249                }
250            }
251        };
252
253        if input.is_empty() {
254            continue;
255        }
256
257        // Check for commands
258        if ChatSession::is_command(&input) {
259            // Special handling for /clear to also clear conversation history
260            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
261                conversation_history.clear();
262                raw_chat_history.clear();
263            }
264            match session.process_command(&input) {
265                Ok(true) => {
266                    // Check if /resume loaded a session
267                    if let Some(record) = session.pending_resume.take() {
268                        // Display previous messages
269                        println!();
270                        println!("{}", "โ”€โ”€โ”€ Previous Conversation โ”€โ”€โ”€".dimmed());
271                        for msg in &record.messages {
272                            match msg.role {
273                                persistence::MessageRole::User => {
274                                    println!();
275                                    println!(
276                                        "{} {}",
277                                        "You:".cyan().bold(),
278                                        truncate_string(&msg.content, 500)
279                                    );
280                                }
281                                persistence::MessageRole::Assistant => {
282                                    println!();
283                                    // Show tool calls if any (same format as live display)
284                                    if let Some(ref tools) = msg.tool_calls {
285                                        for tc in tools {
286                                            // Match live tool display: green dot for completed, cyan bold name
287                                            if tc.args_summary.is_empty() {
288                                                println!(
289                                                    "{} {}",
290                                                    "โ—".green(),
291                                                    tc.name.cyan().bold()
292                                                );
293                                            } else {
294                                                println!(
295                                                    "{} {}({})",
296                                                    "โ—".green(),
297                                                    tc.name.cyan().bold(),
298                                                    truncate_string(&tc.args_summary, 50).dimmed()
299                                                );
300                                            }
301                                        }
302                                    }
303                                    // Show response (same ResponseFormatter as live)
304                                    if !msg.content.is_empty() {
305                                        ResponseFormatter::print_response(&truncate_string(
306                                            &msg.content,
307                                            1000,
308                                        ));
309                                    }
310                                }
311                                persistence::MessageRole::System => {
312                                    // Skip system messages in display
313                                }
314                            }
315                        }
316                        println!("{}", "โ”€โ”€โ”€ End of History โ”€โ”€โ”€".dimmed());
317                        println!();
318
319                        // Try to restore from history_snapshot (new format with full context)
320                        let restored_from_snapshot = if let Some(history_json) =
321                            &record.history_snapshot
322                        {
323                            match ConversationHistory::from_json(history_json) {
324                                Ok(restored) => {
325                                    conversation_history = restored;
326                                    // Rebuild raw_chat_history from restored conversation_history
327                                    raw_chat_history = conversation_history.to_messages();
328                                    println!(
329                                            "{}",
330                                            "  โœ“ Restored full conversation context (including compacted history)".green()
331                                        );
332                                    true
333                                }
334                                Err(e) => {
335                                    eprintln!(
336                                        "{}",
337                                        format!(
338                                            "  Warning: Failed to restore history snapshot: {}",
339                                            e
340                                        )
341                                        .yellow()
342                                    );
343                                    false
344                                }
345                            }
346                        } else {
347                            false
348                        };
349
350                        // Fallback: Load from messages (old format or if snapshot failed)
351                        if !restored_from_snapshot {
352                            // Load messages into raw_chat_history for AI context
353                            for msg in &record.messages {
354                                match msg.role {
355                                    persistence::MessageRole::User => {
356                                        raw_chat_history.push(rig::completion::Message::User {
357                                            content: rig::one_or_many::OneOrMany::one(
358                                                rig::completion::message::UserContent::text(
359                                                    &msg.content,
360                                                ),
361                                            ),
362                                        });
363                                    }
364                                    persistence::MessageRole::Assistant => {
365                                        raw_chat_history
366                                            .push(rig::completion::Message::Assistant {
367                                            id: Some(msg.id.clone()),
368                                            content: rig::one_or_many::OneOrMany::one(
369                                                rig::completion::message::AssistantContent::text(
370                                                    &msg.content,
371                                                ),
372                                            ),
373                                        });
374                                    }
375                                    persistence::MessageRole::System => {}
376                                }
377                            }
378
379                            // Load into conversation_history with tool calls from message records
380                            for msg in &record.messages {
381                                if msg.role == persistence::MessageRole::User {
382                                    // Find the next assistant message
383                                    let (response, tool_calls) = record
384                                        .messages
385                                        .iter()
386                                        .skip_while(|m| m.id != msg.id)
387                                        .skip(1)
388                                        .find(|m| m.role == persistence::MessageRole::Assistant)
389                                        .map(|m| {
390                                            let tcs = m.tool_calls.as_ref().map(|calls| {
391                                                calls
392                                                    .iter()
393                                                    .map(|tc| history::ToolCallRecord {
394                                                        tool_name: tc.name.clone(),
395                                                        args_summary: tc.args_summary.clone(),
396                                                        result_summary: tc.result_summary.clone(),
397                                                        tool_id: None,
398                                                        droppable: false,
399                                                    })
400                                                    .collect::<Vec<_>>()
401                                            });
402                                            (m.content.clone(), tcs.unwrap_or_default())
403                                        })
404                                        .unwrap_or_default();
405
406                                    conversation_history.add_turn(
407                                        msg.content.clone(),
408                                        response,
409                                        tool_calls,
410                                    );
411                                }
412                            }
413                            println!(
414                                "{}",
415                                format!(
416                                    "  โœ“ Loaded {} messages (legacy format).",
417                                    record.messages.len()
418                                )
419                                .green()
420                            );
421                        }
422                        println!();
423                    }
424                    continue;
425                }
426                Ok(false) => break, // /exit
427                Err(e) => {
428                    eprintln!("{}", format!("Error: {}", e).red());
429                    continue;
430                }
431            }
432        }
433
434        // Check API key before making request (in case provider changed)
435        if !ChatSession::has_api_key(session.provider) {
436            eprintln!(
437                "{}",
438                "No API key configured. Use /provider to set one.".yellow()
439            );
440            continue;
441        }
442
443        // Check if compaction is needed before making the request
444        if conversation_history.needs_compaction() {
445            println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
446            if let Some(summary) = conversation_history.compact() {
447                println!(
448                    "{}",
449                    format!("  โœ“ Compressed {} turns", summary.matches("Turn").count()).dimmed()
450                );
451            }
452        }
453
454        // Pre-request check: estimate if we're approaching context limit
455        // Check raw_chat_history (actual messages) not conversation_history
456        // because conversation_history may be out of sync
457        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
458            + input.len() / 4  // New input
459            + 5000; // System prompt overhead estimate
460
461        if estimated_input_tokens > 150_000 {
462            println!(
463                "{}",
464                "  โš  Large context detected. Pre-truncating...".yellow()
465            );
466
467            let old_count = raw_chat_history.len();
468            // Keep last 20 messages when approaching limit
469            if raw_chat_history.len() > 20 {
470                let drain_count = raw_chat_history.len() - 20;
471                raw_chat_history.drain(0..drain_count);
472                // Ensure history starts with User message for OpenAI Responses API compatibility
473                ensure_history_starts_with_user(&mut raw_chat_history);
474                // Preserve compacted summary while clearing turns to stay in sync
475                conversation_history.clear_turns_preserve_context();
476                println!(
477                    "{}",
478                    format!(
479                        "  โœ“ Truncated {} โ†’ {} messages",
480                        old_count,
481                        raw_chat_history.len()
482                    )
483                    .dimmed()
484                );
485            }
486        }
487
488        // Retry loop for automatic error recovery
489        // MAX_RETRIES is for failures without progress
490        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
491        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
492        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
493        const MAX_RETRIES: u32 = 3;
494        const MAX_CONTINUATIONS: u32 = 10;
495        const _TOOL_CALL_CHECKPOINT: usize = 50;
496        const MAX_TOOL_CALLS: usize = 300;
497        let mut retry_attempt = 0;
498        let mut continuation_count = 0;
499        let mut total_tool_calls: usize = 0;
500        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
501        let mut current_input = input.clone();
502        let mut succeeded = false;
503
504        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
505            // Log if this is a continuation attempt
506            if continuation_count > 0 {
507                eprintln!("{}", "  ๐Ÿ“ก Sending continuation request...".dimmed());
508            }
509
510            // Create hook for Claude Code style tool display
511            let hook = ToolDisplayHook::new();
512
513            // Create progress indicator for visual feedback during generation
514            let progress = ui::GenerationIndicator::new();
515            // Layout connection disabled - using inline progress mode
516            // progress.state().set_layout(layout_state.clone());
517            hook.set_progress_state(progress.state()).await;
518
519            let project_path_buf = session.project_path.clone();
520            // Select prompt based on query type (analysis vs generation) and plan mode
521            let preamble = get_system_prompt(
522                &session.project_path,
523                Some(&current_input),
524                session.plan_mode,
525            );
526            let is_generation = prompts::is_generation_query(&current_input);
527            let is_planning = session.plan_mode.is_planning();
528
529            // Note: using raw_chat_history directly which preserves Reasoning blocks
530            // This is needed for extended thinking to work with multi-turn conversations
531
532            // Get progress state for interrupt detection
533            let progress_state = progress.state();
534
535            // Use tokio::select! to race the API call against Ctrl+C
536            // This allows immediate cancellation, not just between tool calls
537            let mut user_interrupted = false;
538
539            // API call with Ctrl+C interrupt support
540            let response = tokio::select! {
541                biased; // Check ctrl_c first for faster response
542
543                _ = tokio::signal::ctrl_c() => {
544                    user_interrupted = true;
545                    Err::<String, String>("User cancelled".to_string())
546                }
547
548                result = async {
549                    match session.provider {
550                ProviderType::OpenAI => {
551                    // Use Responses API (default) for reasoning model support.
552                    // rig-core 0.28+ handles Reasoning items properly in multi-turn.
553                    let client = openai::Client::from_env();
554
555                    let mut builder = client
556                        .agent(&session.model)
557                        .preamble(&preamble)
558                        .max_tokens(4096)
559                        .tool(AnalyzeTool::new(project_path_buf.clone()))
560                        .tool(SecurityScanTool::new(project_path_buf.clone()))
561                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
562                        .tool(HadolintTool::new(project_path_buf.clone()))
563                        .tool(DclintTool::new(project_path_buf.clone()))
564                        .tool(KubelintTool::new(project_path_buf.clone()))
565                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
566                        .tool(K8sCostsTool::new(project_path_buf.clone()))
567                        .tool(K8sDriftTool::new(project_path_buf.clone()))
568                        .tool(HelmlintTool::new(project_path_buf.clone()))
569                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
570                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
571                        .tool(TerraformInstallTool::new())
572                        .tool(ReadFileTool::new(project_path_buf.clone()))
573                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
574                        .tool(WebFetchTool::new())
575                        // Prometheus discovery and connection tools for live K8s analysis
576                        .tool(PrometheusDiscoverTool::new())
577                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
578                        // RAG retrieval tools for compressed tool outputs
579                        .tool(RetrieveOutputTool::new())
580                        .tool(ListOutputsTool::new());
581
582                    // Add tools based on mode
583                    if is_planning {
584                        // Plan mode: read-only shell + plan creation tools
585                        builder = builder
586                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
587                            .tool(PlanCreateTool::new(project_path_buf.clone()))
588                            .tool(PlanListTool::new(project_path_buf.clone()));
589                    } else if is_generation {
590                        // Standard mode + generation query: all tools including file writes and plan execution
591                        let (mut write_file_tool, mut write_files_tool) =
592                            if let Some(ref client) = ide_client {
593                                (
594                                    WriteFileTool::new(project_path_buf.clone())
595                                        .with_ide_client(client.clone()),
596                                    WriteFilesTool::new(project_path_buf.clone())
597                                        .with_ide_client(client.clone()),
598                                )
599                            } else {
600                                (
601                                    WriteFileTool::new(project_path_buf.clone()),
602                                    WriteFilesTool::new(project_path_buf.clone()),
603                                )
604                            };
605                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
606                        if auto_accept_writes {
607                            write_file_tool = write_file_tool.without_confirmation();
608                            write_files_tool = write_files_tool.without_confirmation();
609                        }
610                        builder = builder
611                            .tool(write_file_tool)
612                            .tool(write_files_tool)
613                            .tool(ShellTool::new(project_path_buf.clone()))
614                            .tool(PlanListTool::new(project_path_buf.clone()))
615                            .tool(PlanNextTool::new(project_path_buf.clone()))
616                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
617                    }
618
619                    // Enable reasoning for OpenAI reasoning models (GPT-5.x, O1, O3, O4)
620                    let model_lower = session.model.to_lowercase();
621                    let is_reasoning_model = model_lower.starts_with("gpt-5")
622                        || model_lower.starts_with("gpt5")
623                        || model_lower.starts_with("o1")
624                        || model_lower.starts_with("o3")
625                        || model_lower.starts_with("o4");
626
627                    let agent = if is_reasoning_model {
628                        let reasoning_params = serde_json::json!({
629                            "reasoning": {
630                                "effort": "medium",
631                                "summary": "detailed"
632                            }
633                        });
634                        builder.additional_params(reasoning_params).build()
635                    } else {
636                        builder.build()
637                    };
638
639                    // Use multi_turn with Responses API
640                    agent
641                        .prompt(&current_input)
642                        .with_history(&mut raw_chat_history)
643                        .with_hook(hook.clone())
644                        .multi_turn(50)
645                        .await
646                }
647                ProviderType::Anthropic => {
648                    let client = anthropic::Client::from_env();
649
650                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
651                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
652                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
653                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
654                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
655
656                    let mut builder = client
657                        .agent(&session.model)
658                        .preamble(&preamble)
659                        .max_tokens(4096)
660                        .tool(AnalyzeTool::new(project_path_buf.clone()))
661                        .tool(SecurityScanTool::new(project_path_buf.clone()))
662                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
663                        .tool(HadolintTool::new(project_path_buf.clone()))
664                        .tool(DclintTool::new(project_path_buf.clone()))
665                        .tool(KubelintTool::new(project_path_buf.clone()))
666                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
667                        .tool(K8sCostsTool::new(project_path_buf.clone()))
668                        .tool(K8sDriftTool::new(project_path_buf.clone()))
669                        .tool(HelmlintTool::new(project_path_buf.clone()))
670                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
671                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
672                        .tool(TerraformInstallTool::new())
673                        .tool(ReadFileTool::new(project_path_buf.clone()))
674                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
675                        .tool(WebFetchTool::new())
676                        // Prometheus discovery and connection tools for live K8s analysis
677                        .tool(PrometheusDiscoverTool::new())
678                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
679                        // RAG retrieval tools for compressed tool outputs
680                        .tool(RetrieveOutputTool::new())
681                        .tool(ListOutputsTool::new());
682
683                    // Add tools based on mode
684                    if is_planning {
685                        // Plan mode: read-only shell + plan creation tools
686                        builder = builder
687                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
688                            .tool(PlanCreateTool::new(project_path_buf.clone()))
689                            .tool(PlanListTool::new(project_path_buf.clone()));
690                    } else if is_generation {
691                        // Standard mode + generation query: all tools including file writes and plan execution
692                        let (mut write_file_tool, mut write_files_tool) =
693                            if let Some(ref client) = ide_client {
694                                (
695                                    WriteFileTool::new(project_path_buf.clone())
696                                        .with_ide_client(client.clone()),
697                                    WriteFilesTool::new(project_path_buf.clone())
698                                        .with_ide_client(client.clone()),
699                                )
700                            } else {
701                                (
702                                    WriteFileTool::new(project_path_buf.clone()),
703                                    WriteFilesTool::new(project_path_buf.clone()),
704                                )
705                            };
706                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
707                        if auto_accept_writes {
708                            write_file_tool = write_file_tool.without_confirmation();
709                            write_files_tool = write_files_tool.without_confirmation();
710                        }
711                        builder = builder
712                            .tool(write_file_tool)
713                            .tool(write_files_tool)
714                            .tool(ShellTool::new(project_path_buf.clone()))
715                            .tool(PlanListTool::new(project_path_buf.clone()))
716                            .tool(PlanNextTool::new(project_path_buf.clone()))
717                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
718                    }
719
720                    let agent = builder.build();
721
722                    // Allow up to 50 tool call turns for complex generation tasks
723                    // Use hook to display tool calls as they happen
724                    // Pass conversation history for context continuity
725                    agent
726                        .prompt(&current_input)
727                        .with_history(&mut raw_chat_history)
728                        .with_hook(hook.clone())
729                        .multi_turn(50)
730                        .await
731                }
732                ProviderType::Bedrock => {
733                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
734                    let client = crate::bedrock::client::Client::from_env();
735
736                    // Extended thinking for Claude models via Bedrock
737                    // This enables Claude to show its reasoning process before responding.
738                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
739                    // Extended thinking budget - reduced to help with rate limits
740                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
741                    let thinking_params = serde_json::json!({
742                        "thinking": {
743                            "type": "enabled",
744                            "budget_tokens": 8000
745                        }
746                    });
747
748                    let mut builder = client
749                        .agent(&session.model)
750                        .preamble(&preamble)
751                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
752                        .tool(AnalyzeTool::new(project_path_buf.clone()))
753                        .tool(SecurityScanTool::new(project_path_buf.clone()))
754                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
755                        .tool(HadolintTool::new(project_path_buf.clone()))
756                        .tool(DclintTool::new(project_path_buf.clone()))
757                        .tool(KubelintTool::new(project_path_buf.clone()))
758                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
759                        .tool(K8sCostsTool::new(project_path_buf.clone()))
760                        .tool(K8sDriftTool::new(project_path_buf.clone()))
761                        .tool(HelmlintTool::new(project_path_buf.clone()))
762                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
763                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
764                        .tool(TerraformInstallTool::new())
765                        .tool(ReadFileTool::new(project_path_buf.clone()))
766                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
767                        .tool(WebFetchTool::new())
768                        // Prometheus discovery and connection tools for live K8s analysis
769                        .tool(PrometheusDiscoverTool::new())
770                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
771                        // RAG retrieval tools for compressed tool outputs
772                        .tool(RetrieveOutputTool::new())
773                        .tool(ListOutputsTool::new());
774
775                    // Add tools based on mode
776                    if is_planning {
777                        // Plan mode: read-only shell + plan creation tools
778                        builder = builder
779                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
780                            .tool(PlanCreateTool::new(project_path_buf.clone()))
781                            .tool(PlanListTool::new(project_path_buf.clone()));
782                    } else if is_generation {
783                        // Standard mode + generation query: all tools including file writes and plan execution
784                        let (mut write_file_tool, mut write_files_tool) =
785                            if let Some(ref client) = ide_client {
786                                (
787                                    WriteFileTool::new(project_path_buf.clone())
788                                        .with_ide_client(client.clone()),
789                                    WriteFilesTool::new(project_path_buf.clone())
790                                        .with_ide_client(client.clone()),
791                                )
792                            } else {
793                                (
794                                    WriteFileTool::new(project_path_buf.clone()),
795                                    WriteFilesTool::new(project_path_buf.clone()),
796                                )
797                            };
798                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
799                        if auto_accept_writes {
800                            write_file_tool = write_file_tool.without_confirmation();
801                            write_files_tool = write_files_tool.without_confirmation();
802                        }
803                        builder = builder
804                            .tool(write_file_tool)
805                            .tool(write_files_tool)
806                            .tool(ShellTool::new(project_path_buf.clone()))
807                            .tool(PlanListTool::new(project_path_buf.clone()))
808                            .tool(PlanNextTool::new(project_path_buf.clone()))
809                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
810                    }
811
812                    // Add thinking params for extended reasoning
813                    builder = builder.additional_params(thinking_params);
814
815                    let agent = builder.build();
816
817                    // Use same multi-turn pattern as OpenAI/Anthropic
818                    agent
819                        .prompt(&current_input)
820                        .with_history(&mut raw_chat_history)
821                        .with_hook(hook.clone())
822                        .multi_turn(50)
823                        .await
824                    }
825                }.map_err(|e| e.to_string())
826            } => result
827            };
828
829            // Stop the progress indicator before handling the response
830            progress.stop().await;
831
832            // Suppress unused variable warnings
833            let _ = (&progress_state, user_interrupted);
834
835            match response {
836                Ok(text) => {
837                    // Show final response
838                    println!();
839                    ResponseFormatter::print_response(&text);
840
841                    // Track token usage - use actual from hook if available, else estimate
842                    let hook_usage = hook.get_usage().await;
843                    if hook_usage.has_data() {
844                        // Use actual token counts from API response
845                        session
846                            .token_usage
847                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
848                    } else {
849                        // Fall back to estimation when API doesn't provide usage
850                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
851                        let completion_tokens = TokenUsage::estimate_tokens(&text);
852                        session
853                            .token_usage
854                            .add_estimated(prompt_tokens, completion_tokens);
855                    }
856                    // Reset hook usage for next request batch
857                    hook.reset_usage().await;
858
859                    // Show context indicator like Forge: [model/~tokens]
860                    let model_short = session
861                        .model
862                        .split('/')
863                        .next_back()
864                        .unwrap_or(&session.model)
865                        .split(':')
866                        .next()
867                        .unwrap_or(&session.model);
868                    println!();
869                    println!(
870                        "  {}[{}/{}]{}",
871                        ui::colors::ansi::DIM,
872                        model_short,
873                        session.token_usage.format_compact(),
874                        ui::colors::ansi::RESET
875                    );
876
877                    // Extract tool calls from the hook state for history tracking
878                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
879                    let batch_tool_count = tool_calls.len();
880                    total_tool_calls += batch_tool_count;
881
882                    // Show tool call summary if significant
883                    if batch_tool_count > 10 {
884                        println!(
885                            "{}",
886                            format!(
887                                "  โœ“ Completed with {} tool calls ({} total this session)",
888                                batch_tool_count, total_tool_calls
889                            )
890                            .dimmed()
891                        );
892                    }
893
894                    // Add to conversation history with tool call records
895                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
896
897                    // Check if this heavy turn requires immediate compaction
898                    // This helps prevent context overflow in subsequent requests
899                    if conversation_history.needs_compaction() {
900                        println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
901                        if let Some(summary) = conversation_history.compact() {
902                            println!(
903                                "{}",
904                                format!("  โœ“ Compressed {} turns", summary.matches("Turn").count())
905                                    .dimmed()
906                            );
907                        }
908                    }
909
910                    // Simplify history for OpenAI Responses API reasoning models
911                    // Keep only User text and Assistant text - strip reasoning, tool calls, tool results
912                    // This prevents pairing errors like "rs_... without its required following item"
913                    // and "fc_... without its required reasoning item"
914                    if session.provider == ProviderType::OpenAI {
915                        simplify_history_for_openai_reasoning(&mut raw_chat_history);
916                    }
917
918                    // Also update legacy session history for compatibility
919                    session.history.push(("user".to_string(), input.clone()));
920                    session
921                        .history
922                        .push(("assistant".to_string(), text.clone()));
923
924                    // Record to persistent session storage (includes full history snapshot)
925                    session_recorder.record_user_message(&input);
926                    session_recorder.record_assistant_message(&text, Some(&tool_calls));
927                    if let Err(e) = session_recorder.save_with_history(&conversation_history) {
928                        eprintln!(
929                            "{}",
930                            format!("  Warning: Failed to save session: {}", e).dimmed()
931                        );
932                    }
933
934                    // Check if plan_create was called - show interactive menu
935                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
936                        println!(); // Space before menu
937
938                        // Show the plan action menu (don't switch modes yet - let user choose)
939                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
940                            ui::PlanActionResult::ExecuteAutoAccept => {
941                                // Now switch to standard mode for execution
942                                if session.plan_mode.is_planning() {
943                                    session.plan_mode = session.plan_mode.toggle();
944                                }
945                                auto_accept_writes = true;
946                                pending_input = Some(format!(
947                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
948                                    plan_info.0
949                                ));
950                                succeeded = true;
951                            }
952                            ui::PlanActionResult::ExecuteWithReview => {
953                                // Now switch to standard mode for execution
954                                if session.plan_mode.is_planning() {
955                                    session.plan_mode = session.plan_mode.toggle();
956                                }
957                                pending_input = Some(format!(
958                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
959                                    plan_info.0
960                                ));
961                                succeeded = true;
962                            }
963                            ui::PlanActionResult::ChangePlan(feedback) => {
964                                // Stay in plan mode for modifications
965                                pending_input = Some(format!(
966                                    "Please modify the plan at '{}'. User feedback: {}",
967                                    plan_info.0, feedback
968                                ));
969                                succeeded = true;
970                            }
971                            ui::PlanActionResult::Cancel => {
972                                // Just complete normally, don't execute
973                                succeeded = true;
974                            }
975                        }
976                    } else {
977                        succeeded = true;
978                    }
979                }
980                Err(e) => {
981                    let err_str = e.to_string();
982
983                    println!();
984
985                    // Check if this was a user-initiated cancellation (Ctrl+C)
986                    if err_str.contains("cancelled") || err_str.contains("Cancelled") {
987                        // Extract any completed work before cancellation
988                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
989                        let tool_count = completed_tools.len();
990
991                        eprintln!("{}", "โš  Generation interrupted.".yellow());
992                        if tool_count > 0 {
993                            eprintln!(
994                                "{}",
995                                format!("  {} tool calls completed before interrupt.", tool_count)
996                                    .dimmed()
997                            );
998                            // Add partial progress to history
999                            conversation_history.add_turn(
1000                                current_input.clone(),
1001                                format!("[Interrupted after {} tool calls]", tool_count),
1002                                completed_tools,
1003                            );
1004                        }
1005                        eprintln!("{}", "  Type your next message to continue.".dimmed());
1006
1007                        // Don't retry, don't mark as succeeded - just break to return to prompt
1008                        break;
1009                    }
1010
1011                    // Check if this is a max depth error - handle as checkpoint
1012                    if err_str.contains("MaxDepth")
1013                        || err_str.contains("max_depth")
1014                        || err_str.contains("reached limit")
1015                    {
1016                        // Extract what was done before hitting the limit
1017                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1018                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1019                        let batch_tool_count = completed_tools.len();
1020                        total_tool_calls += batch_tool_count;
1021
1022                        eprintln!("{}", format!(
1023                            "โš  Reached {} tool calls this batch ({} total). Maximum allowed: {}",
1024                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
1025                        ).yellow());
1026
1027                        // Check if we've hit the absolute maximum
1028                        if total_tool_calls >= MAX_TOOL_CALLS {
1029                            eprintln!(
1030                                "{}",
1031                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
1032                                    .red()
1033                            );
1034                            eprintln!(
1035                                "{}",
1036                                "The task is too complex. Try breaking it into smaller parts."
1037                                    .dimmed()
1038                            );
1039                            break;
1040                        }
1041
1042                        // Ask user if they want to continue (unless auto-continue is enabled)
1043                        let should_continue = if auto_continue_tools {
1044                            eprintln!(
1045                                "{}",
1046                                "  Auto-continuing (you selected 'always')...".dimmed()
1047                            );
1048                            true
1049                        } else {
1050                            eprintln!(
1051                                "{}",
1052                                "Excessive tool calls used. Want to continue?".yellow()
1053                            );
1054                            eprintln!(
1055                                "{}",
1056                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
1057                            );
1058                            print!("  > ");
1059                            let _ = std::io::Write::flush(&mut std::io::stdout());
1060
1061                            // Read user input
1062                            let mut response = String::new();
1063                            match std::io::stdin().read_line(&mut response) {
1064                                Ok(_) => {
1065                                    let resp = response.trim().to_lowercase();
1066                                    if resp == "a" || resp == "always" {
1067                                        auto_continue_tools = true;
1068                                        true
1069                                    } else {
1070                                        resp == "y" || resp == "yes" || resp.is_empty()
1071                                    }
1072                                }
1073                                Err(_) => false,
1074                            }
1075                        };
1076
1077                        if !should_continue {
1078                            eprintln!(
1079                                "{}",
1080                                "Stopped by user. Type 'continue' to resume later.".dimmed()
1081                            );
1082                            // Add partial progress to history
1083                            if !completed_tools.is_empty() {
1084                                conversation_history.add_turn(
1085                                    current_input.clone(),
1086                                    format!(
1087                                        "[Stopped at checkpoint - {} tools completed]",
1088                                        batch_tool_count
1089                                    ),
1090                                    vec![],
1091                                );
1092                            }
1093                            break;
1094                        }
1095
1096                        // Continue from checkpoint
1097                        eprintln!(
1098                            "{}",
1099                            format!(
1100                                "  โ†’ Continuing... {} remaining tool calls available",
1101                                MAX_TOOL_CALLS - total_tool_calls
1102                            )
1103                            .dimmed()
1104                        );
1105
1106                        // Add partial progress to history (without duplicating tool calls)
1107                        conversation_history.add_turn(
1108                            current_input.clone(),
1109                            format!(
1110                                "[Checkpoint - {} tools completed, continuing...]",
1111                                batch_tool_count
1112                            ),
1113                            vec![],
1114                        );
1115
1116                        // Build continuation prompt
1117                        current_input =
1118                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
1119
1120                        // Brief delay before continuation
1121                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1122                        continue; // Continue the loop without incrementing retry_attempt
1123                    } else if err_str.contains("rate")
1124                        || err_str.contains("Rate")
1125                        || err_str.contains("429")
1126                        || err_str.contains("Too many tokens")
1127                        || err_str.contains("please wait")
1128                        || err_str.contains("throttl")
1129                        || err_str.contains("Throttl")
1130                    {
1131                        eprintln!("{}", "โš  Rate limited by API provider.".yellow());
1132                        // Wait before retry for rate limits (longer wait for "too many tokens")
1133                        retry_attempt += 1;
1134                        let wait_secs = if err_str.contains("Too many tokens") {
1135                            30
1136                        } else {
1137                            5
1138                        };
1139                        eprintln!(
1140                            "{}",
1141                            format!(
1142                                "  Waiting {} seconds before retry ({}/{})...",
1143                                wait_secs, retry_attempt, MAX_RETRIES
1144                            )
1145                            .dimmed()
1146                        );
1147                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
1148                    } else if is_input_too_long_error(&err_str) {
1149                        // Context too large - truncate raw_chat_history directly
1150                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
1151                        // because conversation_history may be empty/stale during errors
1152                        eprintln!(
1153                            "{}",
1154                            "โš  Context too large for model. Truncating history...".yellow()
1155                        );
1156
1157                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
1158                        let old_msg_count = raw_chat_history.len();
1159
1160                        // Strategy 1: Keep only the last N messages (user/assistant pairs)
1161                        // More aggressive truncation on each retry: 10 โ†’ 6 โ†’ 4 messages
1162                        let keep_count = match retry_attempt {
1163                            0 => 10,
1164                            1 => 6,
1165                            _ => 4,
1166                        };
1167
1168                        if raw_chat_history.len() > keep_count {
1169                            // Drain older messages, keep the most recent ones
1170                            let drain_count = raw_chat_history.len() - keep_count;
1171                            raw_chat_history.drain(0..drain_count);
1172                            // Ensure history starts with User message for OpenAI Responses API compatibility
1173                            ensure_history_starts_with_user(&mut raw_chat_history);
1174                        }
1175
1176                        // Strategy 2: Compact large tool outputs to temp files + summaries
1177                        // This preserves data (agent can read file if needed) while reducing context
1178                        let max_output_chars = match retry_attempt {
1179                            0 => 50_000, // 50KB on first try
1180                            1 => 20_000, // 20KB on second
1181                            _ => 5_000,  // 5KB on third (aggressive)
1182                        };
1183                        compact_large_tool_outputs(&mut raw_chat_history, max_output_chars);
1184
1185                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
1186                        eprintln!("{}", format!(
1187                            "  โœ“ Truncated: {} messages (~{} tokens) โ†’ {} messages (~{} tokens)",
1188                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
1189                        ).green());
1190
1191                        // Preserve compacted summary while clearing turns to stay in sync
1192                        conversation_history.clear_turns_preserve_context();
1193
1194                        // Retry with truncated context
1195                        retry_attempt += 1;
1196                        if retry_attempt < MAX_RETRIES {
1197                            eprintln!(
1198                                "{}",
1199                                format!(
1200                                    "  โ†’ Retrying with truncated context ({}/{})...",
1201                                    retry_attempt, MAX_RETRIES
1202                                )
1203                                .dimmed()
1204                            );
1205                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1206                        } else {
1207                            eprintln!(
1208                                "{}",
1209                                "Context still too large after truncation. Try /clear to reset."
1210                                    .red()
1211                            );
1212                            break;
1213                        }
1214                    } else if is_truncation_error(&err_str) {
1215                        // Truncation error - try intelligent continuation
1216                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1217                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1218
1219                        // Count actually completed tools (not in-progress)
1220                        let completed_count = completed_tools
1221                            .iter()
1222                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
1223                            .count();
1224                        let in_progress_count = completed_tools.len() - completed_count;
1225
1226                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
1227                            // We have partial progress - continue from where we left off
1228                            continuation_count += 1;
1229                            let status_msg = if in_progress_count > 0 {
1230                                format!(
1231                                    "โš  Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
1232                                    completed_count,
1233                                    in_progress_count,
1234                                    continuation_count,
1235                                    MAX_CONTINUATIONS
1236                                )
1237                            } else {
1238                                format!(
1239                                    "โš  Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
1240                                    completed_count, continuation_count, MAX_CONTINUATIONS
1241                                )
1242                            };
1243                            eprintln!("{}", status_msg.yellow());
1244
1245                            // Add partial progress to conversation history
1246                            // NOTE: We intentionally pass empty tool_calls here because the
1247                            // continuation prompt already contains the detailed file list.
1248                            // Including them in history would duplicate the context and waste tokens.
1249                            conversation_history.add_turn(
1250                                current_input.clone(),
1251                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
1252                                    completed_count, in_progress_count),
1253                                vec![]  // Don't duplicate - continuation prompt has the details
1254                            );
1255
1256                            // Check if we need compaction after adding this heavy turn
1257                            // This is important for long multi-turn sessions with many tool calls
1258                            if conversation_history.needs_compaction() {
1259                                eprintln!(
1260                                    "{}",
1261                                    "  ๐Ÿ“ฆ Compacting history before continuation...".dimmed()
1262                                );
1263                                if let Some(summary) = conversation_history.compact() {
1264                                    eprintln!(
1265                                        "{}",
1266                                        format!(
1267                                            "  โœ“ Compressed {} turns",
1268                                            summary.matches("Turn").count()
1269                                        )
1270                                        .dimmed()
1271                                    );
1272                                }
1273                            }
1274
1275                            // Build continuation prompt with context
1276                            current_input = build_continuation_prompt(
1277                                &input,
1278                                &completed_tools,
1279                                &agent_thinking,
1280                            );
1281
1282                            // Log continuation details for debugging
1283                            eprintln!("{}", format!(
1284                                "  โ†’ Continuing with {} files read, {} written, {} other actions tracked",
1285                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
1286                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
1287                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
1288                            ).dimmed());
1289
1290                            // Brief delay before continuation
1291                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1292                            // Don't increment retry_attempt - this is progress via continuation
1293                        } else if retry_attempt < MAX_RETRIES {
1294                            // No tool calls completed - simple retry
1295                            retry_attempt += 1;
1296                            eprintln!(
1297                                "{}",
1298                                format!(
1299                                    "โš  Response error (attempt {}/{}). Retrying...",
1300                                    retry_attempt, MAX_RETRIES
1301                                )
1302                                .yellow()
1303                            );
1304                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1305                        } else {
1306                            // Max retries/continuations reached
1307                            eprintln!("{}", format!("Error: {}", e).red());
1308                            if continuation_count >= MAX_CONTINUATIONS {
1309                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
1310                            } else {
1311                                eprintln!(
1312                                    "{}",
1313                                    "Max retries reached. The response may be too complex."
1314                                        .dimmed()
1315                                );
1316                            }
1317                            eprintln!(
1318                                "{}",
1319                                "Try breaking your request into smaller parts.".dimmed()
1320                            );
1321                            break;
1322                        }
1323                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1324                        // Timeout - simple retry
1325                        retry_attempt += 1;
1326                        if retry_attempt < MAX_RETRIES {
1327                            eprintln!(
1328                                "{}",
1329                                format!(
1330                                    "โš  Request timed out (attempt {}/{}). Retrying...",
1331                                    retry_attempt, MAX_RETRIES
1332                                )
1333                                .yellow()
1334                            );
1335                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1336                        } else {
1337                            eprintln!("{}", "Request timed out. Please try again.".red());
1338                            break;
1339                        }
1340                    } else {
1341                        // Unknown error - show details and break
1342                        eprintln!("{}", format!("Error: {}", e).red());
1343                        if continuation_count > 0 {
1344                            eprintln!(
1345                                "{}",
1346                                format!(
1347                                    "  (occurred during continuation attempt {})",
1348                                    continuation_count
1349                                )
1350                                .dimmed()
1351                            );
1352                        }
1353                        eprintln!("{}", "Error details for debugging:".dimmed());
1354                        eprintln!(
1355                            "{}",
1356                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1357                                .dimmed()
1358                        );
1359                        eprintln!(
1360                            "{}",
1361                            format!(
1362                                "  - continuation_count: {}/{}",
1363                                continuation_count, MAX_CONTINUATIONS
1364                            )
1365                            .dimmed()
1366                        );
1367                        break;
1368                    }
1369                }
1370            }
1371        }
1372        println!();
1373    }
1374
1375    // Clean up terminal layout before exiting (disabled - layout not initialized)
1376    // if let Err(e) = terminal_layout.cleanup() {
1377    //     eprintln!(
1378    //         "{}",
1379    //         format!("Warning: Terminal cleanup failed: {}", e).dimmed()
1380    //     );
1381    // }
1382
1383    Ok(())
1384}
1385
1386// NOTE: wait_for_interrupt function removed - ESC interrupt feature disabled
1387// due to terminal corruption issues with spawn_blocking raw mode handling.
1388// TODO: Re-implement using tool hook callbacks for cleaner interruption.
1389
1390/// Extract tool call records from the hook state for history tracking
1391async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1392    let state = hook.state();
1393    let guard = state.lock().await;
1394
1395    guard
1396        .tool_calls
1397        .iter()
1398        .enumerate()
1399        .map(|(i, tc)| {
1400            let result = if tc.is_running {
1401                // Tool was in progress when error occurred
1402                "[IN PROGRESS - may need to be re-run]".to_string()
1403            } else if let Some(output) = &tc.output {
1404                truncate_string(output, 200)
1405            } else {
1406                "completed".to_string()
1407            };
1408
1409            ToolCallRecord {
1410                tool_name: tc.name.clone(),
1411                args_summary: truncate_string(&tc.args, 100),
1412                result_summary: result,
1413                // Generate a unique tool ID for proper message pairing
1414                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1415                // Mark read-only tools as droppable (their results can be re-fetched)
1416                droppable: matches!(
1417                    tc.name.as_str(),
1418                    "read_file" | "list_directory" | "analyze_project"
1419                ),
1420            }
1421        })
1422        .collect()
1423}
1424
1425/// Extract any agent thinking/messages from the hook for context
1426async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1427    let state = hook.state();
1428    let guard = state.lock().await;
1429    guard.agent_messages.clone()
1430}
1431
1432/// Helper to truncate strings for summaries
1433fn truncate_string(s: &str, max_len: usize) -> String {
1434    if s.len() <= max_len {
1435        s.to_string()
1436    } else {
1437        format!("{}...", &s[..max_len.saturating_sub(3)])
1438    }
1439}
1440
1441/// Compact large tool outputs by saving them to temp files and replacing with summaries.
1442/// This preserves all data (agent can read the file) while reducing context size.
1443fn compact_large_tool_outputs(messages: &mut [rig::completion::Message], max_chars: usize) {
1444    use rig::completion::message::{Text, ToolResultContent, UserContent};
1445    use std::fs;
1446
1447    // Create temp directory for compacted outputs
1448    let temp_dir = std::env::temp_dir().join("syncable-agent-outputs");
1449    let _ = fs::create_dir_all(&temp_dir);
1450
1451    for msg in messages.iter_mut() {
1452        if let rig::completion::Message::User { content } = msg {
1453            for item in content.iter_mut() {
1454                if let UserContent::ToolResult(tr) = item {
1455                    for trc in tr.content.iter_mut() {
1456                        if let ToolResultContent::Text(text) = trc
1457                            && text.text.len() > max_chars
1458                        {
1459                            // Save full output to temp file
1460                            let file_id = format!(
1461                                "{}_{}.txt",
1462                                tr.id,
1463                                std::time::SystemTime::now()
1464                                    .duration_since(std::time::UNIX_EPOCH)
1465                                    .unwrap()
1466                                    .as_millis()
1467                            );
1468                            let file_path = temp_dir.join(&file_id);
1469
1470                            if let Ok(()) = fs::write(&file_path, &text.text) {
1471                                // Create a smart summary
1472                                let summary = create_output_summary(
1473                                    &text.text,
1474                                    &file_path.display().to_string(),
1475                                    max_chars / 2, // Use half max for summary
1476                                );
1477
1478                                // Replace with summary
1479                                *trc = ToolResultContent::Text(Text { text: summary });
1480                            }
1481                        }
1482                    }
1483                }
1484            }
1485        }
1486    }
1487}
1488
1489/// Create a smart summary of a large output using incremental chunk processing.
1490/// Processes output in logical sections, summarizes each, then combines into actionable summary.
1491fn create_output_summary(full_output: &str, file_path: &str, max_summary_len: usize) -> String {
1492    let total_lines = full_output.lines().count();
1493    let total_chars = full_output.len();
1494
1495    let summary_content =
1496        if full_output.trim_start().starts_with('{') || full_output.trim_start().starts_with('[') {
1497            // JSON output - extract structured summary
1498            summarize_json_incrementally(full_output, max_summary_len)
1499        } else {
1500            // Text output - chunk and summarize
1501            summarize_text_incrementally(full_output, max_summary_len)
1502        };
1503
1504    format!(
1505        "[COMPACTED OUTPUT]\n\
1506        Full data: {}\n\
1507        Size: {} chars, {} lines\n\
1508        \n\
1509        {}\n\
1510        \n\
1511        [Read file with offset/limit for specific sections if needed]",
1512        file_path, total_chars, total_lines, summary_content
1513    )
1514}
1515
1516/// Incrementally summarize JSON output, extracting key fields and prioritizing important items.
1517fn summarize_json_incrementally(json_str: &str, max_len: usize) -> String {
1518    let Ok(json) = serde_json::from_str::<serde_json::Value>(json_str) else {
1519        return "Failed to parse JSON".to_string();
1520    };
1521
1522    let mut parts: Vec<String> = Vec::new();
1523    let mut current_len = 0;
1524
1525    match &json {
1526        serde_json::Value::Object(obj) => {
1527            // Priority 1: Summary/stats fields
1528            for key in ["summary", "stats", "metadata", "status"] {
1529                if let Some(v) = obj.get(key) {
1530                    let s = format!("{}:\n{}", key, indent_json(v, 2, 500));
1531                    if current_len + s.len() < max_len {
1532                        parts.push(s.clone());
1533                        current_len += s.len();
1534                    }
1535                }
1536            }
1537
1538            // Priority 2: Error/critical items (summarize each)
1539            for key in [
1540                "errors",
1541                "critical",
1542                "failures",
1543                "issues",
1544                "findings",
1545                "recommendations",
1546            ] {
1547                if let Some(serde_json::Value::Array(arr)) = obj.get(key) {
1548                    if arr.is_empty() {
1549                        continue;
1550                    }
1551                    parts.push(format!("\n{} ({} items):", key, arr.len()));
1552
1553                    // Group by severity/type if present
1554                    let mut by_severity: std::collections::HashMap<
1555                        String,
1556                        Vec<&serde_json::Value>,
1557                    > = std::collections::HashMap::new();
1558
1559                    for item in arr {
1560                        let severity = item
1561                            .get("severity")
1562                            .or_else(|| item.get("level"))
1563                            .or_else(|| item.get("type"))
1564                            .and_then(|v| v.as_str())
1565                            .unwrap_or("other")
1566                            .to_string();
1567                        by_severity.entry(severity).or_default().push(item);
1568                    }
1569
1570                    // Show critical/high first, summarize others
1571                    for sev in [
1572                        "critical", "high", "error", "warning", "medium", "low", "info", "other",
1573                    ] {
1574                        if let Some(items) = by_severity.get(sev) {
1575                            let show_count = match sev {
1576                                "critical" | "high" | "error" => 5.min(items.len()),
1577                                "warning" | "medium" => 3.min(items.len()),
1578                                _ => 2.min(items.len()),
1579                            };
1580
1581                            if !items.is_empty() {
1582                                let s =
1583                                    format!("  [{}] {} items:", sev.to_uppercase(), items.len());
1584                                if current_len + s.len() < max_len {
1585                                    parts.push(s.clone());
1586                                    current_len += s.len();
1587
1588                                    for item in items.iter().take(show_count) {
1589                                        let item_summary = summarize_single_item(item);
1590                                        if current_len + item_summary.len() < max_len {
1591                                            parts.push(format!("    โ€ข {}", item_summary));
1592                                            current_len += item_summary.len();
1593                                        }
1594                                    }
1595
1596                                    if items.len() > show_count {
1597                                        parts.push(format!(
1598                                            "    ... and {} more",
1599                                            items.len() - show_count
1600                                        ));
1601                                    }
1602                                }
1603                            }
1604                        }
1605                    }
1606                }
1607            }
1608
1609            // Priority 3: Show remaining top-level keys
1610            let shown_keys: std::collections::HashSet<&str> = [
1611                "summary",
1612                "stats",
1613                "metadata",
1614                "status",
1615                "errors",
1616                "critical",
1617                "failures",
1618                "issues",
1619                "findings",
1620                "recommendations",
1621            ]
1622            .iter()
1623            .cloned()
1624            .collect();
1625
1626            let other_keys: Vec<_> = obj
1627                .keys()
1628                .filter(|k| !shown_keys.contains(k.as_str()))
1629                .collect();
1630            if !other_keys.is_empty() && current_len < max_len - 200 {
1631                parts.push(format!("\nOther fields: {:?}", other_keys));
1632            }
1633        }
1634        serde_json::Value::Array(arr) => {
1635            parts.push(format!("Array with {} items", arr.len()));
1636
1637            // Try to group by type/severity
1638            for (i, item) in arr.iter().take(10).enumerate() {
1639                let s = format!("[{}] {}", i, summarize_single_item(item));
1640                if current_len + s.len() < max_len {
1641                    parts.push(s.clone());
1642                    current_len += s.len();
1643                }
1644            }
1645            if arr.len() > 10 {
1646                parts.push(format!("... and {} more items", arr.len() - 10));
1647            }
1648        }
1649        _ => {
1650            parts.push(truncate_json_value(&json, max_len));
1651        }
1652    }
1653
1654    parts.join("\n")
1655}
1656
1657/// Summarize a single JSON item (issue, error, etc.) into a one-liner.
1658fn summarize_single_item(item: &serde_json::Value) -> String {
1659    let mut parts: Vec<String> = Vec::new();
1660
1661    // Extract common fields
1662    for key in [
1663        "message",
1664        "description",
1665        "title",
1666        "name",
1667        "file",
1668        "path",
1669        "code",
1670        "rule",
1671    ] {
1672        if let Some(v) = item.get(key)
1673            && let Some(s) = v.as_str()
1674        {
1675            parts.push(truncate_string(s, 80));
1676            break; // Only take first descriptive field
1677        }
1678    }
1679
1680    // Add location if present
1681    if let Some(file) = item
1682        .get("file")
1683        .or_else(|| item.get("path"))
1684        .and_then(|v| v.as_str())
1685    {
1686        if let Some(line) = item.get("line").and_then(|v| v.as_u64()) {
1687            parts.push(format!("at {}:{}", file, line));
1688        } else {
1689            parts.push(format!("in {}", truncate_string(file, 40)));
1690        }
1691    }
1692
1693    if parts.is_empty() {
1694        truncate_json_value(item, 100)
1695    } else {
1696        parts.join(" ")
1697    }
1698}
1699
1700/// Indent JSON for display.
1701fn indent_json(v: &serde_json::Value, indent: usize, max_len: usize) -> String {
1702    let s = serde_json::to_string_pretty(v).unwrap_or_else(|_| v.to_string());
1703    let prefix = " ".repeat(indent);
1704    let indented: String = s
1705        .lines()
1706        .map(|l| format!("{}{}", prefix, l))
1707        .collect::<Vec<_>>()
1708        .join("\n");
1709    if indented.len() > max_len {
1710        format!("{}...", &indented[..max_len.saturating_sub(3)])
1711    } else {
1712        indented
1713    }
1714}
1715
1716/// Incrementally summarize text output by processing in chunks.
1717fn summarize_text_incrementally(text: &str, max_len: usize) -> String {
1718    let lines: Vec<&str> = text.lines().collect();
1719    let mut parts: Vec<String> = Vec::new();
1720    let mut current_len = 0;
1721
1722    // Look for section headers or key patterns
1723    let mut sections: Vec<(usize, &str)> = Vec::new();
1724    for (i, line) in lines.iter().enumerate() {
1725        // Detect headers (lines that look like titles)
1726        if line.starts_with('#')
1727            || line.starts_with("==")
1728            || line.starts_with("--")
1729            || (line.ends_with(':') && line.len() < 50)
1730            || line.chars().all(|c| c.is_uppercase() || c.is_whitespace())
1731        {
1732            sections.push((i, line));
1733        }
1734    }
1735
1736    if !sections.is_empty() {
1737        // Summarize by sections
1738        parts.push(format!("Found {} sections:", sections.len()));
1739        for (i, (line_num, header)) in sections.iter().enumerate() {
1740            let next_section = sections.get(i + 1).map(|(n, _)| *n).unwrap_or(lines.len());
1741            let section_lines = next_section - line_num;
1742
1743            let s = format!(
1744                "  [L{}] {} ({} lines)",
1745                line_num + 1,
1746                header.trim(),
1747                section_lines
1748            );
1749            if current_len + s.len() < max_len / 2 {
1750                parts.push(s.clone());
1751                current_len += s.len();
1752            }
1753        }
1754        parts.push("".to_string());
1755    }
1756
1757    // Show first chunk
1758    let preview_lines = 15.min(lines.len());
1759    parts.push("Content preview:".to_string());
1760    for line in lines.iter().take(preview_lines) {
1761        let s = format!("  {}", truncate_string(line, 120));
1762        if current_len + s.len() < max_len * 3 / 4 {
1763            parts.push(s.clone());
1764            current_len += s.len();
1765        }
1766    }
1767
1768    if lines.len() > preview_lines {
1769        parts.push(format!(
1770            "  ... ({} more lines)",
1771            lines.len() - preview_lines
1772        ));
1773    }
1774
1775    // Show last few lines if space permits
1776    if lines.len() > preview_lines * 2 && current_len < max_len - 500 {
1777        parts.push("\nEnd of output:".to_string());
1778        for line in lines.iter().skip(lines.len() - 5) {
1779            let s = format!("  {}", truncate_string(line, 120));
1780            if current_len + s.len() < max_len {
1781                parts.push(s.clone());
1782                current_len += s.len();
1783            }
1784        }
1785    }
1786
1787    parts.join("\n")
1788}
1789
1790/// Truncate a JSON value for display
1791fn truncate_json_value(v: &serde_json::Value, max_len: usize) -> String {
1792    let s = v.to_string();
1793    if s.len() <= max_len {
1794        s
1795    } else {
1796        format!("{}...", &s[..max_len.saturating_sub(3)])
1797    }
1798}
1799
1800/// Simplify history for OpenAI Responses API compatibility with reasoning models.
1801///
1802/// OpenAI's Responses API has strict pairing requirements:
1803/// - Reasoning items must be followed by their output (text or function_call)
1804/// - Function_call items must be preceded by their reasoning item
1805///
1806/// When passing history across user turns, these pairings get broken, causing errors like:
1807/// - "Item 'rs_...' of type 'reasoning' was provided without its required following item"
1808/// - "Item 'fc_...' of type 'function_call' was provided without its required 'reasoning' item"
1809///
1810/// Solution: Keep only User messages and final Assistant Text responses.
1811/// This preserves conversation context without the complex internal tool/reasoning structure.
1812fn simplify_history_for_openai_reasoning(history: &mut Vec<rig::completion::Message>) {
1813    use rig::completion::message::{AssistantContent, UserContent};
1814    use rig::one_or_many::OneOrMany;
1815
1816    // Filter to keep only User text messages and Assistant text messages
1817    let simplified: Vec<rig::completion::Message> = history
1818        .iter()
1819        .filter_map(|msg| match msg {
1820            // Keep User messages, but only text content (not tool results)
1821            rig::completion::Message::User { content } => {
1822                let text_only: Vec<UserContent> = content
1823                    .iter()
1824                    .filter(|c| matches!(c, UserContent::Text(_)))
1825                    .cloned()
1826                    .collect();
1827                if text_only.is_empty() {
1828                    None
1829                } else {
1830                    let mut iter = text_only.into_iter();
1831                    let first = iter.next().unwrap();
1832                    let rest: Vec<_> = iter.collect();
1833                    let new_content = if rest.is_empty() {
1834                        OneOrMany::one(first)
1835                    } else {
1836                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
1837                    };
1838                    Some(rig::completion::Message::User {
1839                        content: new_content,
1840                    })
1841                }
1842            }
1843            // Keep Assistant messages, but only text content (not reasoning, tool calls)
1844            rig::completion::Message::Assistant { content, id } => {
1845                let text_only: Vec<AssistantContent> = content
1846                    .iter()
1847                    .filter(|c| matches!(c, AssistantContent::Text(_)))
1848                    .cloned()
1849                    .collect();
1850                if text_only.is_empty() {
1851                    None
1852                } else {
1853                    let mut iter = text_only.into_iter();
1854                    let first = iter.next().unwrap();
1855                    let rest: Vec<_> = iter.collect();
1856                    let new_content = if rest.is_empty() {
1857                        OneOrMany::one(first)
1858                    } else {
1859                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
1860                    };
1861                    Some(rig::completion::Message::Assistant {
1862                        content: new_content,
1863                        id: id.clone(),
1864                    })
1865                }
1866            }
1867        })
1868        .collect();
1869
1870    *history = simplified;
1871}
1872
1873/// Ensure history starts with a User message for OpenAI Responses API compatibility.
1874///
1875/// OpenAI's Responses API requires that reasoning items are properly structured within
1876/// a conversation. When history truncation leaves an Assistant message (containing
1877/// Reasoning blocks) at the start, OpenAI rejects it with:
1878/// "Item 'rs_...' of type 'reasoning' was provided without its required following item."
1879///
1880/// This function inserts a synthetic User message at the beginning if history starts
1881/// with an Assistant message, preserving the context while maintaining valid structure.
1882fn ensure_history_starts_with_user(history: &mut Vec<rig::completion::Message>) {
1883    if !history.is_empty()
1884        && matches!(
1885            history.first(),
1886            Some(rig::completion::Message::Assistant { .. })
1887        )
1888    {
1889        // Insert synthetic User message at the beginning to maintain valid conversation structure
1890        history.insert(
1891            0,
1892            rig::completion::Message::User {
1893                content: rig::one_or_many::OneOrMany::one(
1894                    rig::completion::message::UserContent::text("(Conversation continued)"),
1895                ),
1896            },
1897        );
1898    }
1899}
1900
1901/// Estimate token count from raw rig Messages
1902/// This is used for context length management to prevent "input too long" errors.
1903/// Estimates ~4 characters per token.
1904fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
1905    use rig::completion::message::{AssistantContent, ToolResultContent, UserContent};
1906
1907    messages
1908        .iter()
1909        .map(|msg| -> usize {
1910            match msg {
1911                rig::completion::Message::User { content } => {
1912                    content
1913                        .iter()
1914                        .map(|c| -> usize {
1915                            match c {
1916                                UserContent::Text(t) => t.text.len() / 4,
1917                                UserContent::ToolResult(tr) => {
1918                                    // Tool results can be HUGE - properly estimate them
1919                                    tr.content
1920                                        .iter()
1921                                        .map(|trc| match trc {
1922                                            ToolResultContent::Text(t) => t.text.len() / 4,
1923                                            _ => 100,
1924                                        })
1925                                        .sum::<usize>()
1926                                }
1927                                _ => 100, // Estimate for images/documents
1928                            }
1929                        })
1930                        .sum::<usize>()
1931                }
1932                rig::completion::Message::Assistant { content, .. } => {
1933                    content
1934                        .iter()
1935                        .map(|c| -> usize {
1936                            match c {
1937                                AssistantContent::Text(t) => t.text.len() / 4,
1938                                AssistantContent::ToolCall(tc) => {
1939                                    // arguments is serde_json::Value, convert to string for length estimate
1940                                    let args_len = tc.function.arguments.to_string().len();
1941                                    (tc.function.name.len() + args_len) / 4
1942                                }
1943                                _ => 100,
1944                            }
1945                        })
1946                        .sum::<usize>()
1947                }
1948            }
1949        })
1950        .sum()
1951}
1952
1953/// Find a plan_create tool call in the list and extract plan info
1954/// Returns (plan_path, task_count) if found
1955fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
1956    for tc in tool_calls {
1957        if tc.tool_name == "plan_create" {
1958            // Try to parse the result_summary as JSON to extract plan_path
1959            // Note: result_summary may be truncated, so we have multiple fallbacks
1960            let plan_path =
1961                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
1962                    result
1963                        .get("plan_path")
1964                        .and_then(|v| v.as_str())
1965                        .map(|s| s.to_string())
1966                } else {
1967                    None
1968                };
1969
1970            // If JSON parsing failed, find the most recently created plan file
1971            // This is more reliable than trying to reconstruct the path from truncated args
1972            let plan_path = plan_path.unwrap_or_else(|| {
1973                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
1974            });
1975
1976            // Count tasks by reading the plan file directly
1977            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
1978
1979            return Some((plan_path, task_count));
1980        }
1981    }
1982    None
1983}
1984
1985/// Find the most recently created plan file in the plans directory
1986fn find_most_recent_plan_file() -> Option<String> {
1987    let plans_dir = std::env::current_dir().ok()?.join("plans");
1988    if !plans_dir.exists() {
1989        return None;
1990    }
1991
1992    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
1993
1994    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
1995        let path = entry.path();
1996        if path.extension().is_some_and(|e| e == "md")
1997            && let Ok(metadata) = entry.metadata()
1998            && let Ok(modified) = metadata.modified()
1999            && newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true)
2000        {
2001            newest = Some((path, modified));
2002        }
2003    }
2004
2005    newest.map(|(path, _)| {
2006        // Return relative path
2007        path.strip_prefix(std::env::current_dir().unwrap_or_default())
2008            .map(|p| p.display().to_string())
2009            .unwrap_or_else(|_| path.display().to_string())
2010    })
2011}
2012
2013/// Count tasks (checkbox items) in a plan file
2014fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
2015    use regex::Regex;
2016
2017    // Try both relative and absolute paths
2018    let path = std::path::Path::new(plan_path);
2019    let content = if path.exists() {
2020        std::fs::read_to_string(path).ok()?
2021    } else {
2022        // Try with current directory
2023        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
2024    };
2025
2026    // Count task checkboxes: - [ ], - [x], - [~], - [!]
2027    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
2028    let count = content
2029        .lines()
2030        .filter(|line| task_regex.is_match(line))
2031        .count();
2032
2033    Some(count)
2034}
2035
2036/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
2037fn is_truncation_error(err_str: &str) -> bool {
2038    err_str.contains("JsonError")
2039        || err_str.contains("EOF while parsing")
2040        || err_str.contains("JSON")
2041        || err_str.contains("unexpected end")
2042}
2043
2044/// Check if error is "input too long" - context exceeds model limit
2045/// This happens when conversation history grows beyond what the model can handle.
2046/// Recovery: compact history and retry with reduced context.
2047fn is_input_too_long_error(err_str: &str) -> bool {
2048    err_str.contains("too long")
2049        || err_str.contains("Too long")
2050        || err_str.contains("context length")
2051        || err_str.contains("maximum context")
2052        || err_str.contains("exceeds the model")
2053        || err_str.contains("Input is too long")
2054}
2055
2056/// Build a continuation prompt that tells the AI what work was completed
2057/// and asks it to continue from where it left off
2058fn build_continuation_prompt(
2059    original_task: &str,
2060    completed_tools: &[ToolCallRecord],
2061    agent_thinking: &[String],
2062) -> String {
2063    use std::collections::HashSet;
2064
2065    // Group tools by type and extract unique files read
2066    let mut files_read: HashSet<String> = HashSet::new();
2067    let mut files_written: HashSet<String> = HashSet::new();
2068    let mut dirs_listed: HashSet<String> = HashSet::new();
2069    let mut other_tools: Vec<String> = Vec::new();
2070    let mut in_progress: Vec<String> = Vec::new();
2071
2072    for tool in completed_tools {
2073        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
2074
2075        if is_in_progress {
2076            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
2077            continue;
2078        }
2079
2080        match tool.tool_name.as_str() {
2081            "read_file" => {
2082                // Extract path from args
2083                files_read.insert(tool.args_summary.clone());
2084            }
2085            "write_file" | "write_files" => {
2086                files_written.insert(tool.args_summary.clone());
2087            }
2088            "list_directory" => {
2089                dirs_listed.insert(tool.args_summary.clone());
2090            }
2091            _ => {
2092                other_tools.push(format!(
2093                    "{}({})",
2094                    tool.tool_name,
2095                    truncate_string(&tool.args_summary, 40)
2096                ));
2097            }
2098        }
2099    }
2100
2101    let mut prompt = format!(
2102        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
2103        Original task: {}\n",
2104        truncate_string(original_task, 500)
2105    );
2106
2107    // Show files already read - CRITICAL for preventing re-reads
2108    if !files_read.is_empty() {
2109        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
2110        for file in &files_read {
2111            prompt.push_str(&format!("  - {}\n", file));
2112        }
2113    }
2114
2115    if !dirs_listed.is_empty() {
2116        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
2117        for dir in &dirs_listed {
2118            prompt.push_str(&format!("  - {}\n", dir));
2119        }
2120    }
2121
2122    if !files_written.is_empty() {
2123        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
2124        for file in &files_written {
2125            prompt.push_str(&format!("  - {}\n", file));
2126        }
2127    }
2128
2129    if !other_tools.is_empty() {
2130        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
2131        for tool in other_tools.iter().take(20) {
2132            prompt.push_str(&format!("  - {}\n", tool));
2133        }
2134        if other_tools.len() > 20 {
2135            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
2136        }
2137    }
2138
2139    if !in_progress.is_empty() {
2140        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
2141        for tool in &in_progress {
2142            prompt.push_str(&format!("  โš  {}\n", tool));
2143        }
2144    }
2145
2146    // Include last thinking context if available
2147    if let Some(last_thought) = agent_thinking.last() {
2148        prompt.push_str(&format!(
2149            "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
2150            truncate_string(last_thought, 300)
2151        ));
2152    }
2153
2154    prompt.push_str("\n== INSTRUCTIONS ==\n");
2155    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
2156    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
2157    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
2158    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
2159    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
2160
2161    prompt
2162}
2163
2164/// Run a single query and return the response
2165pub async fn run_query(
2166    project_path: &Path,
2167    query: &str,
2168    provider: ProviderType,
2169    model: Option<String>,
2170) -> AgentResult<String> {
2171    use tools::*;
2172
2173    let project_path_buf = project_path.to_path_buf();
2174
2175    // Background process manager for Prometheus port-forwards (single query context)
2176    let bg_manager = Arc::new(BackgroundProcessManager::new());
2177    // Select prompt based on query type (analysis vs generation)
2178    // For single queries (non-interactive), always use standard mode
2179    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
2180    let is_generation = prompts::is_generation_query(query);
2181
2182    match provider {
2183        ProviderType::OpenAI => {
2184            // Use Responses API (default) for reasoning model support
2185            let client = openai::Client::from_env();
2186            let model_name = model.as_deref().unwrap_or("gpt-5.2");
2187
2188            let mut builder = client
2189                .agent(model_name)
2190                .preamble(&preamble)
2191                .max_tokens(4096)
2192                .tool(AnalyzeTool::new(project_path_buf.clone()))
2193                .tool(SecurityScanTool::new(project_path_buf.clone()))
2194                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2195                .tool(HadolintTool::new(project_path_buf.clone()))
2196                .tool(DclintTool::new(project_path_buf.clone()))
2197                .tool(KubelintTool::new(project_path_buf.clone()))
2198                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2199                .tool(K8sCostsTool::new(project_path_buf.clone()))
2200                .tool(K8sDriftTool::new(project_path_buf.clone()))
2201                .tool(HelmlintTool::new(project_path_buf.clone()))
2202                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2203                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2204                .tool(TerraformInstallTool::new())
2205                .tool(ReadFileTool::new(project_path_buf.clone()))
2206                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2207                .tool(WebFetchTool::new())
2208                // Prometheus discovery and connection tools for live K8s analysis
2209                .tool(PrometheusDiscoverTool::new())
2210                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2211                // RAG retrieval tools for compressed tool outputs
2212                .tool(RetrieveOutputTool::new())
2213                .tool(ListOutputsTool::new());
2214
2215            // Add generation tools if this is a generation query
2216            if is_generation {
2217                builder = builder
2218                    .tool(WriteFileTool::new(project_path_buf.clone()))
2219                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2220                    .tool(ShellTool::new(project_path_buf.clone()));
2221            }
2222
2223            // Enable reasoning for OpenAI reasoning models
2224            let model_lower = model_name.to_lowercase();
2225            let is_reasoning_model = model_lower.starts_with("gpt-5")
2226                || model_lower.starts_with("gpt5")
2227                || model_lower.starts_with("o1")
2228                || model_lower.starts_with("o3")
2229                || model_lower.starts_with("o4");
2230
2231            let agent = if is_reasoning_model {
2232                let reasoning_params = serde_json::json!({
2233                    "reasoning": {
2234                        "effort": "medium",
2235                        "summary": "detailed"
2236                    }
2237                });
2238                builder.additional_params(reasoning_params).build()
2239            } else {
2240                builder.build()
2241            };
2242
2243            agent
2244                .prompt(query)
2245                .multi_turn(50)
2246                .await
2247                .map_err(|e| AgentError::ProviderError(e.to_string()))
2248        }
2249        ProviderType::Anthropic => {
2250            let client = anthropic::Client::from_env();
2251            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
2252
2253            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
2254            // handle thinking blocks in multi-turn conversations with tool use.
2255            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
2256
2257            let mut builder = client
2258                .agent(model_name)
2259                .preamble(&preamble)
2260                .max_tokens(4096)
2261                .tool(AnalyzeTool::new(project_path_buf.clone()))
2262                .tool(SecurityScanTool::new(project_path_buf.clone()))
2263                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2264                .tool(HadolintTool::new(project_path_buf.clone()))
2265                .tool(DclintTool::new(project_path_buf.clone()))
2266                .tool(KubelintTool::new(project_path_buf.clone()))
2267                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2268                .tool(K8sCostsTool::new(project_path_buf.clone()))
2269                .tool(K8sDriftTool::new(project_path_buf.clone()))
2270                .tool(HelmlintTool::new(project_path_buf.clone()))
2271                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2272                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2273                .tool(TerraformInstallTool::new())
2274                .tool(ReadFileTool::new(project_path_buf.clone()))
2275                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2276                .tool(WebFetchTool::new())
2277                // Prometheus discovery and connection tools for live K8s analysis
2278                .tool(PrometheusDiscoverTool::new())
2279                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2280                // RAG retrieval tools for compressed tool outputs
2281                .tool(RetrieveOutputTool::new())
2282                .tool(ListOutputsTool::new());
2283
2284            // Add generation tools if this is a generation query
2285            if is_generation {
2286                builder = builder
2287                    .tool(WriteFileTool::new(project_path_buf.clone()))
2288                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2289                    .tool(ShellTool::new(project_path_buf.clone()));
2290            }
2291
2292            let agent = builder.build();
2293
2294            agent
2295                .prompt(query)
2296                .multi_turn(50)
2297                .await
2298                .map_err(|e| AgentError::ProviderError(e.to_string()))
2299        }
2300        ProviderType::Bedrock => {
2301            // Bedrock provider via rig-bedrock - same pattern as Anthropic
2302            let client = crate::bedrock::client::Client::from_env();
2303            let model_name = model
2304                .as_deref()
2305                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
2306
2307            // Extended thinking for Claude via Bedrock
2308            let thinking_params = serde_json::json!({
2309                "thinking": {
2310                    "type": "enabled",
2311                    "budget_tokens": 16000
2312                }
2313            });
2314
2315            let mut builder = client
2316                .agent(model_name)
2317                .preamble(&preamble)
2318                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
2319                .tool(AnalyzeTool::new(project_path_buf.clone()))
2320                .tool(SecurityScanTool::new(project_path_buf.clone()))
2321                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2322                .tool(HadolintTool::new(project_path_buf.clone()))
2323                .tool(DclintTool::new(project_path_buf.clone()))
2324                .tool(KubelintTool::new(project_path_buf.clone()))
2325                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2326                .tool(K8sCostsTool::new(project_path_buf.clone()))
2327                .tool(K8sDriftTool::new(project_path_buf.clone()))
2328                .tool(HelmlintTool::new(project_path_buf.clone()))
2329                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2330                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2331                .tool(TerraformInstallTool::new())
2332                .tool(ReadFileTool::new(project_path_buf.clone()))
2333                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2334                .tool(WebFetchTool::new())
2335                // Prometheus discovery and connection tools for live K8s analysis
2336                .tool(PrometheusDiscoverTool::new())
2337                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2338                // RAG retrieval tools for compressed tool outputs
2339                .tool(RetrieveOutputTool::new())
2340                .tool(ListOutputsTool::new());
2341
2342            // Add generation tools if this is a generation query
2343            if is_generation {
2344                builder = builder
2345                    .tool(WriteFileTool::new(project_path_buf.clone()))
2346                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2347                    .tool(ShellTool::new(project_path_buf.clone()));
2348            }
2349
2350            let agent = builder.additional_params(thinking_params).build();
2351
2352            agent
2353                .prompt(query)
2354                .multi_turn(50)
2355                .await
2356                .map_err(|e| AgentError::ProviderError(e.to_string()))
2357        }
2358    }
2359}