syncable_cli/agent/
mod.rs

1//! Agent module for interactive AI-powered CLI assistance
2//!
3//! This module provides an agent layer using the Rig library that allows users
4//! to interact with the CLI through natural language conversations.
5//!
6//! # Features
7//!
8//! - **Conversation History**: Maintains context across multiple turns
9//! - **Automatic Compaction**: Compresses old history when token count exceeds threshold
10//! - **Tool Tracking**: Records tool calls for better context preservation
11//!
12//! # Usage
13//!
14//! ```bash
15//! # Interactive mode
16//! sync-ctl chat
17//!
18//! # With specific provider
19//! sync-ctl chat --provider openai --model gpt-5.2
20//!
21//! # Single query
22//! sync-ctl chat --query "What security issues does this project have?"
23//! ```
24//!
25//! # Interactive Commands
26//!
27//! - `/model` - Switch to a different AI model
28//! - `/provider` - Switch provider (prompts for API key if needed)
29//! - `/help` - Show available commands
30//! - `/clear` - Clear conversation history
31//! - `/exit` - Exit the chat
32
33pub mod commands;
34pub mod compact;
35pub mod history;
36pub mod ide;
37pub mod persistence;
38pub mod prompts;
39pub mod session;
40pub mod tools;
41pub mod ui;
42use colored::Colorize;
43use commands::TokenUsage;
44use history::{ConversationHistory, ToolCallRecord};
45use ide::IdeClient;
46use rig::{
47    client::{CompletionClient, ProviderClient},
48    completion::Prompt,
49    providers::{anthropic, openai},
50};
51use session::{ChatSession, PlanMode};
52use std::path::Path;
53use std::sync::Arc;
54use tokio::sync::Mutex as TokioMutex;
55use ui::{ResponseFormatter, ToolDisplayHook};
56
57/// Provider type for the agent
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
59pub enum ProviderType {
60    #[default]
61    OpenAI,
62    Anthropic,
63    Bedrock,
64}
65
66impl std::fmt::Display for ProviderType {
67    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68        match self {
69            ProviderType::OpenAI => write!(f, "openai"),
70            ProviderType::Anthropic => write!(f, "anthropic"),
71            ProviderType::Bedrock => write!(f, "bedrock"),
72        }
73    }
74}
75
76impl std::str::FromStr for ProviderType {
77    type Err = String;
78
79    fn from_str(s: &str) -> Result<Self, Self::Err> {
80        match s.to_lowercase().as_str() {
81            "openai" => Ok(ProviderType::OpenAI),
82            "anthropic" => Ok(ProviderType::Anthropic),
83            "bedrock" | "aws" | "aws-bedrock" => Ok(ProviderType::Bedrock),
84            _ => Err(format!(
85                "Unknown provider: {}. Use: openai, anthropic, or bedrock",
86                s
87            )),
88        }
89    }
90}
91
92/// Error types for the agent
93#[derive(Debug, thiserror::Error)]
94pub enum AgentError {
95    #[error("Missing API key. Set {0} environment variable.")]
96    MissingApiKey(String),
97
98    #[error("Provider error: {0}")]
99    ProviderError(String),
100
101    #[error("Tool error: {0}")]
102    ToolError(String),
103}
104
105pub type AgentResult<T> = Result<T, AgentError>;
106
107/// Get the system prompt for the agent based on query type and plan mode
108fn get_system_prompt(project_path: &Path, query: Option<&str>, plan_mode: PlanMode) -> String {
109    // In planning mode, use the read-only exploration prompt
110    if plan_mode.is_planning() {
111        return prompts::get_planning_prompt(project_path);
112    }
113
114    if let Some(q) = query {
115        // First check if it's a code development task (highest priority)
116        if prompts::is_code_development_query(q) {
117            return prompts::get_code_development_prompt(project_path);
118        }
119        // Then check if it's DevOps generation (Docker, Terraform, Helm)
120        if prompts::is_generation_query(q) {
121            return prompts::get_devops_prompt(project_path, Some(q));
122        }
123    }
124    // Default to analysis prompt
125    prompts::get_analysis_prompt(project_path)
126}
127
128/// Run the agent in interactive mode with custom REPL supporting /model and /provider commands
129pub async fn run_interactive(
130    project_path: &Path,
131    provider: ProviderType,
132    model: Option<String>,
133) -> AgentResult<()> {
134    use tools::*;
135
136    let mut session = ChatSession::new(project_path, provider, model);
137
138    // Shared background process manager for Prometheus port-forwards
139    let bg_manager = Arc::new(BackgroundProcessManager::new());
140
141    // Terminal layout for split screen is disabled for now - see notes below
142    // let terminal_layout = ui::TerminalLayout::new();
143    // let layout_state = terminal_layout.state();
144
145    // Initialize conversation history with compaction support
146    let mut conversation_history = ConversationHistory::new();
147
148    // Initialize IDE client for native diff viewing
149    let ide_client: Option<Arc<TokioMutex<IdeClient>>> = {
150        let mut client = IdeClient::new().await;
151        if client.is_ide_available() {
152            match client.connect().await {
153                Ok(()) => {
154                    println!(
155                        "{} Connected to {} IDE companion",
156                        "โœ“".green(),
157                        client.ide_name().unwrap_or("VS Code")
158                    );
159                    Some(Arc::new(TokioMutex::new(client)))
160                }
161                Err(e) => {
162                    // IDE detected but companion not running or connection failed
163                    println!("{} IDE companion not connected: {}", "!".yellow(), e);
164                    None
165                }
166            }
167        } else {
168            println!(
169                "{} No IDE detected (TERM_PROGRAM={})",
170                "ยท".dimmed(),
171                std::env::var("TERM_PROGRAM").unwrap_or_default()
172            );
173            None
174        }
175    };
176
177    // Load API key from config file to env if not already set
178    ChatSession::load_api_key_to_env(session.provider);
179
180    // Check if API key is configured, prompt if not
181    if !ChatSession::has_api_key(session.provider) {
182        ChatSession::prompt_api_key(session.provider)?;
183    }
184
185    session.print_banner();
186
187    // Display platform context if a project is selected
188    if session.platform_session.is_project_selected() {
189        println!(
190            "{}",
191            format!("Platform context: {}", session.platform_session.display_context()).dimmed()
192        );
193    }
194
195    // NOTE: Terminal layout with ANSI scroll regions is disabled for now.
196    // The scroll region approach conflicts with the existing input/output flow.
197    // TODO: Implement proper scroll region support that integrates with the input handler.
198    // For now, we rely on the pause/resume mechanism in progress indicator.
199    //
200    // if let Err(e) = terminal_layout.init() {
201    //     eprintln!(
202    //         "{}",
203    //         format!("Note: Terminal layout initialization failed: {}. Using fallback mode.", e)
204    //             .dimmed()
205    //     );
206    // }
207
208    // Raw Rig messages for multi-turn - preserves Reasoning blocks for thinking
209    // Our ConversationHistory only stores text summaries, but rig needs full Message structure
210    let mut raw_chat_history: Vec<rig::completion::Message> = Vec::new();
211
212    // Pending input for auto-continue after plan creation
213    let mut pending_input: Option<String> = None;
214    // Auto-accept mode for plan execution (skips write confirmations)
215    let mut auto_accept_writes = false;
216
217    // Initialize session recorder for conversation persistence
218    let mut session_recorder = persistence::SessionRecorder::new(project_path);
219
220    loop {
221        // Show conversation status if we have history
222        if !conversation_history.is_empty() {
223            println!(
224                "{}",
225                format!("  ๐Ÿ’ฌ Context: {}", conversation_history.status()).dimmed()
226            );
227        }
228
229        // Check for pending input (from plan menu selection)
230        let input = if let Some(pending) = pending_input.take() {
231            // Show what we're executing
232            println!("{} {}", "โ†’".cyan(), pending.dimmed());
233            pending
234        } else {
235            // New user turn - reset auto-accept mode from previous plan execution
236            auto_accept_writes = false;
237
238            // Read user input (returns InputResult)
239            let input_result = match session.read_input() {
240                Ok(result) => result,
241                Err(_) => break,
242            };
243
244            // Handle the input result
245            match input_result {
246                ui::InputResult::Submit(text) => ChatSession::process_submitted_text(&text),
247                ui::InputResult::Cancel | ui::InputResult::Exit => break,
248                ui::InputResult::TogglePlanMode => {
249                    // Toggle planning mode - minimal feedback, no extra newlines
250                    let new_mode = session.toggle_plan_mode();
251                    if new_mode.is_planning() {
252                        println!("{}", "โ˜… plan mode".yellow());
253                    } else {
254                        println!("{}", "โ–ถ standard mode".green());
255                    }
256                    continue;
257                }
258            }
259        };
260
261        if input.is_empty() {
262            continue;
263        }
264
265        // Check for commands
266        if ChatSession::is_command(&input) {
267            // Special handling for /clear to also clear conversation history
268            if input.trim().to_lowercase() == "/clear" || input.trim().to_lowercase() == "/c" {
269                conversation_history.clear();
270                raw_chat_history.clear();
271            }
272            match session.process_command(&input) {
273                Ok(true) => {
274                    // Check if /resume loaded a session
275                    if let Some(record) = session.pending_resume.take() {
276                        // Display previous messages
277                        println!();
278                        println!("{}", "โ”€โ”€โ”€ Previous Conversation โ”€โ”€โ”€".dimmed());
279                        for msg in &record.messages {
280                            match msg.role {
281                                persistence::MessageRole::User => {
282                                    println!();
283                                    println!(
284                                        "{} {}",
285                                        "You:".cyan().bold(),
286                                        truncate_string(&msg.content, 500)
287                                    );
288                                }
289                                persistence::MessageRole::Assistant => {
290                                    println!();
291                                    // Show tool calls if any (same format as live display)
292                                    if let Some(ref tools) = msg.tool_calls {
293                                        for tc in tools {
294                                            // Match live tool display: green dot for completed, cyan bold name
295                                            if tc.args_summary.is_empty() {
296                                                println!(
297                                                    "{} {}",
298                                                    "โ—".green(),
299                                                    tc.name.cyan().bold()
300                                                );
301                                            } else {
302                                                println!(
303                                                    "{} {}({})",
304                                                    "โ—".green(),
305                                                    tc.name.cyan().bold(),
306                                                    truncate_string(&tc.args_summary, 50).dimmed()
307                                                );
308                                            }
309                                        }
310                                    }
311                                    // Show response (same ResponseFormatter as live)
312                                    if !msg.content.is_empty() {
313                                        ResponseFormatter::print_response(&truncate_string(
314                                            &msg.content,
315                                            1000,
316                                        ));
317                                    }
318                                }
319                                persistence::MessageRole::System => {
320                                    // Skip system messages in display
321                                }
322                            }
323                        }
324                        println!("{}", "โ”€โ”€โ”€ End of History โ”€โ”€โ”€".dimmed());
325                        println!();
326
327                        // Try to restore from history_snapshot (new format with full context)
328                        let restored_from_snapshot = if let Some(history_json) =
329                            &record.history_snapshot
330                        {
331                            match ConversationHistory::from_json(history_json) {
332                                Ok(restored) => {
333                                    conversation_history = restored;
334                                    // Rebuild raw_chat_history from restored conversation_history
335                                    raw_chat_history = conversation_history.to_messages();
336                                    println!(
337                                            "{}",
338                                            "  โœ“ Restored full conversation context (including compacted history)".green()
339                                        );
340                                    true
341                                }
342                                Err(e) => {
343                                    eprintln!(
344                                        "{}",
345                                        format!(
346                                            "  Warning: Failed to restore history snapshot: {}",
347                                            e
348                                        )
349                                        .yellow()
350                                    );
351                                    false
352                                }
353                            }
354                        } else {
355                            false
356                        };
357
358                        // Fallback: Load from messages (old format or if snapshot failed)
359                        if !restored_from_snapshot {
360                            // Load messages into raw_chat_history for AI context
361                            for msg in &record.messages {
362                                match msg.role {
363                                    persistence::MessageRole::User => {
364                                        raw_chat_history.push(rig::completion::Message::User {
365                                            content: rig::one_or_many::OneOrMany::one(
366                                                rig::completion::message::UserContent::text(
367                                                    &msg.content,
368                                                ),
369                                            ),
370                                        });
371                                    }
372                                    persistence::MessageRole::Assistant => {
373                                        raw_chat_history
374                                            .push(rig::completion::Message::Assistant {
375                                            id: Some(msg.id.clone()),
376                                            content: rig::one_or_many::OneOrMany::one(
377                                                rig::completion::message::AssistantContent::text(
378                                                    &msg.content,
379                                                ),
380                                            ),
381                                        });
382                                    }
383                                    persistence::MessageRole::System => {}
384                                }
385                            }
386
387                            // Load into conversation_history with tool calls from message records
388                            for msg in &record.messages {
389                                if msg.role == persistence::MessageRole::User {
390                                    // Find the next assistant message
391                                    let (response, tool_calls) = record
392                                        .messages
393                                        .iter()
394                                        .skip_while(|m| m.id != msg.id)
395                                        .skip(1)
396                                        .find(|m| m.role == persistence::MessageRole::Assistant)
397                                        .map(|m| {
398                                            let tcs = m.tool_calls.as_ref().map(|calls| {
399                                                calls
400                                                    .iter()
401                                                    .map(|tc| history::ToolCallRecord {
402                                                        tool_name: tc.name.clone(),
403                                                        args_summary: tc.args_summary.clone(),
404                                                        result_summary: tc.result_summary.clone(),
405                                                        tool_id: None,
406                                                        droppable: false,
407                                                    })
408                                                    .collect::<Vec<_>>()
409                                            });
410                                            (m.content.clone(), tcs.unwrap_or_default())
411                                        })
412                                        .unwrap_or_default();
413
414                                    conversation_history.add_turn(
415                                        msg.content.clone(),
416                                        response,
417                                        tool_calls,
418                                    );
419                                }
420                            }
421                            println!(
422                                "{}",
423                                format!(
424                                    "  โœ“ Loaded {} messages (legacy format).",
425                                    record.messages.len()
426                                )
427                                .green()
428                            );
429                        }
430                        println!();
431                    }
432                    continue;
433                }
434                Ok(false) => break, // /exit
435                Err(e) => {
436                    eprintln!("{}", format!("Error: {}", e).red());
437                    continue;
438                }
439            }
440        }
441
442        // Check API key before making request (in case provider changed)
443        if !ChatSession::has_api_key(session.provider) {
444            eprintln!(
445                "{}",
446                "No API key configured. Use /provider to set one.".yellow()
447            );
448            continue;
449        }
450
451        // Check if compaction is needed before making the request
452        if conversation_history.needs_compaction() {
453            println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
454            if let Some(summary) = conversation_history.compact() {
455                println!(
456                    "{}",
457                    format!("  โœ“ Compressed {} turns", summary.matches("Turn").count()).dimmed()
458                );
459            }
460        }
461
462        // Pre-request check: estimate if we're approaching context limit
463        // Check raw_chat_history (actual messages) not conversation_history
464        // because conversation_history may be out of sync
465        let estimated_input_tokens = estimate_raw_history_tokens(&raw_chat_history)
466            + input.len() / 4  // New input
467            + 5000; // System prompt overhead estimate
468
469        if estimated_input_tokens > 150_000 {
470            println!(
471                "{}",
472                "  โš  Large context detected. Pre-truncating...".yellow()
473            );
474
475            let old_count = raw_chat_history.len();
476            // Keep last 20 messages when approaching limit
477            if raw_chat_history.len() > 20 {
478                let drain_count = raw_chat_history.len() - 20;
479                raw_chat_history.drain(0..drain_count);
480                // Ensure history starts with User message for OpenAI Responses API compatibility
481                ensure_history_starts_with_user(&mut raw_chat_history);
482                // Preserve compacted summary while clearing turns to stay in sync
483                conversation_history.clear_turns_preserve_context();
484                println!(
485                    "{}",
486                    format!(
487                        "  โœ“ Truncated {} โ†’ {} messages",
488                        old_count,
489                        raw_chat_history.len()
490                    )
491                    .dimmed()
492                );
493            }
494        }
495
496        // Retry loop for automatic error recovery
497        // MAX_RETRIES is for failures without progress
498        // MAX_CONTINUATIONS is for truncations WITH progress (more generous)
499        // TOOL_CALL_CHECKPOINT is the interval at which we ask user to confirm
500        // MAX_TOOL_CALLS is the absolute maximum (300 = 6 checkpoints x 50)
501        const MAX_RETRIES: u32 = 3;
502        const MAX_CONTINUATIONS: u32 = 10;
503        const _TOOL_CALL_CHECKPOINT: usize = 50;
504        const MAX_TOOL_CALLS: usize = 300;
505        let mut retry_attempt = 0;
506        let mut continuation_count = 0;
507        let mut total_tool_calls: usize = 0;
508        let mut auto_continue_tools = false; // User can select "always" to skip future prompts
509        let mut current_input = input.clone();
510        let mut succeeded = false;
511
512        while retry_attempt < MAX_RETRIES && continuation_count < MAX_CONTINUATIONS && !succeeded {
513            // Log if this is a continuation attempt
514            if continuation_count > 0 {
515                eprintln!("{}", "  ๐Ÿ“ก Sending continuation request...".dimmed());
516            }
517
518            // Create hook for Claude Code style tool display
519            let hook = ToolDisplayHook::new();
520
521            // Create progress indicator for visual feedback during generation
522            let progress = ui::GenerationIndicator::new();
523            // Layout connection disabled - using inline progress mode
524            // progress.state().set_layout(layout_state.clone());
525            hook.set_progress_state(progress.state()).await;
526
527            let project_path_buf = session.project_path.clone();
528            // Select prompt based on query type (analysis vs generation) and plan mode
529            let preamble = get_system_prompt(
530                &session.project_path,
531                Some(&current_input),
532                session.plan_mode,
533            );
534            let is_generation = prompts::is_generation_query(&current_input);
535            let is_planning = session.plan_mode.is_planning();
536
537            // Note: using raw_chat_history directly which preserves Reasoning blocks
538            // This is needed for extended thinking to work with multi-turn conversations
539
540            // Get progress state for interrupt detection
541            let progress_state = progress.state();
542
543            // Use tokio::select! to race the API call against Ctrl+C
544            // This allows immediate cancellation, not just between tool calls
545            let mut user_interrupted = false;
546
547            // API call with Ctrl+C interrupt support
548            let response = tokio::select! {
549                biased; // Check ctrl_c first for faster response
550
551                _ = tokio::signal::ctrl_c() => {
552                    user_interrupted = true;
553                    Err::<String, String>("User cancelled".to_string())
554                }
555
556                result = async {
557                    match session.provider {
558                ProviderType::OpenAI => {
559                    // Use Responses API (default) for reasoning model support.
560                    // rig-core 0.28+ handles Reasoning items properly in multi-turn.
561                    let client = openai::Client::from_env();
562
563                    let mut builder = client
564                        .agent(&session.model)
565                        .preamble(&preamble)
566                        .max_tokens(4096)
567                        .tool(AnalyzeTool::new(project_path_buf.clone()))
568                        .tool(SecurityScanTool::new(project_path_buf.clone()))
569                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
570                        .tool(HadolintTool::new(project_path_buf.clone()))
571                        .tool(DclintTool::new(project_path_buf.clone()))
572                        .tool(KubelintTool::new(project_path_buf.clone()))
573                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
574                        .tool(K8sCostsTool::new(project_path_buf.clone()))
575                        .tool(K8sDriftTool::new(project_path_buf.clone()))
576                        .tool(HelmlintTool::new(project_path_buf.clone()))
577                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
578                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
579                        .tool(TerraformInstallTool::new())
580                        .tool(ReadFileTool::new(project_path_buf.clone()))
581                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
582                        .tool(WebFetchTool::new())
583                        // Prometheus discovery and connection tools for live K8s analysis
584                        .tool(PrometheusDiscoverTool::new())
585                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
586                        // RAG retrieval tools for compressed tool outputs
587                        .tool(RetrieveOutputTool::new())
588                        .tool(ListOutputsTool::new())
589                        // Platform tools for project management
590                        .tool(ListOrganizationsTool::new())
591                        .tool(ListProjectsTool::new())
592                        .tool(SelectProjectTool::new())
593                        .tool(CurrentContextTool::new())
594                        .tool(OpenProviderSettingsTool::new())
595                        .tool(CheckProviderConnectionTool::new())
596                        .tool(ListDeploymentCapabilitiesTool::new())
597                        // Deployment tools for service management
598                        .tool(CreateDeploymentConfigTool::new())
599                        .tool(DeployServiceTool::new(project_path_buf.clone()))
600                        .tool(ListDeploymentConfigsTool::new())
601                        .tool(TriggerDeploymentTool::new())
602                        .tool(GetDeploymentStatusTool::new())
603                        .tool(ListDeploymentsTool::new())
604                        .tool(GetServiceLogsTool::new());
605
606                    // Add tools based on mode
607                    if is_planning {
608                        // Plan mode: read-only shell + plan creation tools
609                        builder = builder
610                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
611                            .tool(PlanCreateTool::new(project_path_buf.clone()))
612                            .tool(PlanListTool::new(project_path_buf.clone()));
613                    } else if is_generation {
614                        // Standard mode + generation query: all tools including file writes and plan execution
615                        let (mut write_file_tool, mut write_files_tool) =
616                            if let Some(ref client) = ide_client {
617                                (
618                                    WriteFileTool::new(project_path_buf.clone())
619                                        .with_ide_client(client.clone()),
620                                    WriteFilesTool::new(project_path_buf.clone())
621                                        .with_ide_client(client.clone()),
622                                )
623                            } else {
624                                (
625                                    WriteFileTool::new(project_path_buf.clone()),
626                                    WriteFilesTool::new(project_path_buf.clone()),
627                                )
628                            };
629                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
630                        if auto_accept_writes {
631                            write_file_tool = write_file_tool.without_confirmation();
632                            write_files_tool = write_files_tool.without_confirmation();
633                        }
634                        builder = builder
635                            .tool(write_file_tool)
636                            .tool(write_files_tool)
637                            .tool(ShellTool::new(project_path_buf.clone()))
638                            .tool(PlanListTool::new(project_path_buf.clone()))
639                            .tool(PlanNextTool::new(project_path_buf.clone()))
640                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
641                    }
642
643                    // Enable reasoning for OpenAI reasoning models (GPT-5.x, O1, O3, O4)
644                    let model_lower = session.model.to_lowercase();
645                    let is_reasoning_model = model_lower.starts_with("gpt-5")
646                        || model_lower.starts_with("gpt5")
647                        || model_lower.starts_with("o1")
648                        || model_lower.starts_with("o3")
649                        || model_lower.starts_with("o4");
650
651                    let agent = if is_reasoning_model {
652                        let reasoning_params = serde_json::json!({
653                            "reasoning": {
654                                "effort": "medium",
655                                "summary": "detailed"
656                            }
657                        });
658                        builder.additional_params(reasoning_params).build()
659                    } else {
660                        builder.build()
661                    };
662
663                    // Use multi_turn with Responses API
664                    agent
665                        .prompt(&current_input)
666                        .with_history(&mut raw_chat_history)
667                        .with_hook(hook.clone())
668                        .multi_turn(50)
669                        .await
670                }
671                ProviderType::Anthropic => {
672                    let client = anthropic::Client::from_env();
673
674                    // TODO: Extended thinking for Claude is disabled because rig-bedrock/rig-anthropic
675                    // don't properly handle thinking blocks in multi-turn conversations with tool use.
676                    // When thinking is enabled, ALL assistant messages must start with thinking blocks
677                    // BEFORE tool_use blocks, but rig doesn't preserve/replay these.
678                    // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference impl.
679
680                    let mut builder = client
681                        .agent(&session.model)
682                        .preamble(&preamble)
683                        .max_tokens(4096)
684                        .tool(AnalyzeTool::new(project_path_buf.clone()))
685                        .tool(SecurityScanTool::new(project_path_buf.clone()))
686                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
687                        .tool(HadolintTool::new(project_path_buf.clone()))
688                        .tool(DclintTool::new(project_path_buf.clone()))
689                        .tool(KubelintTool::new(project_path_buf.clone()))
690                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
691                        .tool(K8sCostsTool::new(project_path_buf.clone()))
692                        .tool(K8sDriftTool::new(project_path_buf.clone()))
693                        .tool(HelmlintTool::new(project_path_buf.clone()))
694                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
695                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
696                        .tool(TerraformInstallTool::new())
697                        .tool(ReadFileTool::new(project_path_buf.clone()))
698                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
699                        .tool(WebFetchTool::new())
700                        // Prometheus discovery and connection tools for live K8s analysis
701                        .tool(PrometheusDiscoverTool::new())
702                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
703                        // RAG retrieval tools for compressed tool outputs
704                        .tool(RetrieveOutputTool::new())
705                        .tool(ListOutputsTool::new())
706                        // Platform tools for project management
707                        .tool(ListOrganizationsTool::new())
708                        .tool(ListProjectsTool::new())
709                        .tool(SelectProjectTool::new())
710                        .tool(CurrentContextTool::new())
711                        .tool(OpenProviderSettingsTool::new())
712                        .tool(CheckProviderConnectionTool::new())
713                        .tool(ListDeploymentCapabilitiesTool::new())
714                        // Deployment tools for service management
715                        .tool(CreateDeploymentConfigTool::new())
716                        .tool(DeployServiceTool::new(project_path_buf.clone()))
717                        .tool(ListDeploymentConfigsTool::new())
718                        .tool(TriggerDeploymentTool::new())
719                        .tool(GetDeploymentStatusTool::new())
720                        .tool(ListDeploymentsTool::new())
721                        .tool(GetServiceLogsTool::new());
722
723                    // Add tools based on mode
724                    if is_planning {
725                        // Plan mode: read-only shell + plan creation tools
726                        builder = builder
727                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
728                            .tool(PlanCreateTool::new(project_path_buf.clone()))
729                            .tool(PlanListTool::new(project_path_buf.clone()));
730                    } else if is_generation {
731                        // Standard mode + generation query: all tools including file writes and plan execution
732                        let (mut write_file_tool, mut write_files_tool) =
733                            if let Some(ref client) = ide_client {
734                                (
735                                    WriteFileTool::new(project_path_buf.clone())
736                                        .with_ide_client(client.clone()),
737                                    WriteFilesTool::new(project_path_buf.clone())
738                                        .with_ide_client(client.clone()),
739                                )
740                            } else {
741                                (
742                                    WriteFileTool::new(project_path_buf.clone()),
743                                    WriteFilesTool::new(project_path_buf.clone()),
744                                )
745                            };
746                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
747                        if auto_accept_writes {
748                            write_file_tool = write_file_tool.without_confirmation();
749                            write_files_tool = write_files_tool.without_confirmation();
750                        }
751                        builder = builder
752                            .tool(write_file_tool)
753                            .tool(write_files_tool)
754                            .tool(ShellTool::new(project_path_buf.clone()))
755                            .tool(PlanListTool::new(project_path_buf.clone()))
756                            .tool(PlanNextTool::new(project_path_buf.clone()))
757                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
758                    }
759
760                    let agent = builder.build();
761
762                    // Allow up to 50 tool call turns for complex generation tasks
763                    // Use hook to display tool calls as they happen
764                    // Pass conversation history for context continuity
765                    agent
766                        .prompt(&current_input)
767                        .with_history(&mut raw_chat_history)
768                        .with_hook(hook.clone())
769                        .multi_turn(50)
770                        .await
771                }
772                ProviderType::Bedrock => {
773                    // Bedrock provider via rig-bedrock - same pattern as OpenAI/Anthropic
774                    let client = crate::bedrock::client::Client::from_env();
775
776                    // Extended thinking for Claude models via Bedrock
777                    // This enables Claude to show its reasoning process before responding.
778                    // Requires vendored rig-bedrock that preserves Reasoning blocks with tool calls.
779                    // Extended thinking budget - reduced to help with rate limits
780                    // 8000 is enough for most tasks, increase to 16000 for complex analysis
781                    let thinking_params = serde_json::json!({
782                        "thinking": {
783                            "type": "enabled",
784                            "budget_tokens": 8000
785                        }
786                    });
787
788                    let mut builder = client
789                        .agent(&session.model)
790                        .preamble(&preamble)
791                        .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
792                        .tool(AnalyzeTool::new(project_path_buf.clone()))
793                        .tool(SecurityScanTool::new(project_path_buf.clone()))
794                        .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
795                        .tool(HadolintTool::new(project_path_buf.clone()))
796                        .tool(DclintTool::new(project_path_buf.clone()))
797                        .tool(KubelintTool::new(project_path_buf.clone()))
798                        .tool(K8sOptimizeTool::new(project_path_buf.clone()))
799                        .tool(K8sCostsTool::new(project_path_buf.clone()))
800                        .tool(K8sDriftTool::new(project_path_buf.clone()))
801                        .tool(HelmlintTool::new(project_path_buf.clone()))
802                        .tool(TerraformFmtTool::new(project_path_buf.clone()))
803                        .tool(TerraformValidateTool::new(project_path_buf.clone()))
804                        .tool(TerraformInstallTool::new())
805                        .tool(ReadFileTool::new(project_path_buf.clone()))
806                        .tool(ListDirectoryTool::new(project_path_buf.clone()))
807                        .tool(WebFetchTool::new())
808                        // Prometheus discovery and connection tools for live K8s analysis
809                        .tool(PrometheusDiscoverTool::new())
810                        .tool(PrometheusConnectTool::new(bg_manager.clone()))
811                        // RAG retrieval tools for compressed tool outputs
812                        .tool(RetrieveOutputTool::new())
813                        .tool(ListOutputsTool::new())
814                        // Platform tools for project management
815                        .tool(ListOrganizationsTool::new())
816                        .tool(ListProjectsTool::new())
817                        .tool(SelectProjectTool::new())
818                        .tool(CurrentContextTool::new())
819                        .tool(OpenProviderSettingsTool::new())
820                        .tool(CheckProviderConnectionTool::new())
821                        .tool(ListDeploymentCapabilitiesTool::new())
822                        // Deployment tools for service management
823                        .tool(CreateDeploymentConfigTool::new())
824                        .tool(DeployServiceTool::new(project_path_buf.clone()))
825                        .tool(ListDeploymentConfigsTool::new())
826                        .tool(TriggerDeploymentTool::new())
827                        .tool(GetDeploymentStatusTool::new())
828                        .tool(ListDeploymentsTool::new())
829                        .tool(GetServiceLogsTool::new());
830
831                    // Add tools based on mode
832                    if is_planning {
833                        // Plan mode: read-only shell + plan creation tools
834                        builder = builder
835                            .tool(ShellTool::new(project_path_buf.clone()).with_read_only(true))
836                            .tool(PlanCreateTool::new(project_path_buf.clone()))
837                            .tool(PlanListTool::new(project_path_buf.clone()));
838                    } else if is_generation {
839                        // Standard mode + generation query: all tools including file writes and plan execution
840                        let (mut write_file_tool, mut write_files_tool) =
841                            if let Some(ref client) = ide_client {
842                                (
843                                    WriteFileTool::new(project_path_buf.clone())
844                                        .with_ide_client(client.clone()),
845                                    WriteFilesTool::new(project_path_buf.clone())
846                                        .with_ide_client(client.clone()),
847                                )
848                            } else {
849                                (
850                                    WriteFileTool::new(project_path_buf.clone()),
851                                    WriteFilesTool::new(project_path_buf.clone()),
852                                )
853                            };
854                        // Disable confirmations if auto-accept mode is enabled (from plan menu)
855                        if auto_accept_writes {
856                            write_file_tool = write_file_tool.without_confirmation();
857                            write_files_tool = write_files_tool.without_confirmation();
858                        }
859                        builder = builder
860                            .tool(write_file_tool)
861                            .tool(write_files_tool)
862                            .tool(ShellTool::new(project_path_buf.clone()))
863                            .tool(PlanListTool::new(project_path_buf.clone()))
864                            .tool(PlanNextTool::new(project_path_buf.clone()))
865                            .tool(PlanUpdateTool::new(project_path_buf.clone()));
866                    }
867
868                    // Add thinking params for extended reasoning
869                    builder = builder.additional_params(thinking_params);
870
871                    let agent = builder.build();
872
873                    // Use same multi-turn pattern as OpenAI/Anthropic
874                    agent
875                        .prompt(&current_input)
876                        .with_history(&mut raw_chat_history)
877                        .with_hook(hook.clone())
878                        .multi_turn(50)
879                        .await
880                    }
881                }.map_err(|e| e.to_string())
882            } => result
883            };
884
885            // Stop the progress indicator before handling the response
886            progress.stop().await;
887
888            // Suppress unused variable warnings
889            let _ = (&progress_state, user_interrupted);
890
891            match response {
892                Ok(text) => {
893                    // Show final response
894                    println!();
895                    ResponseFormatter::print_response(&text);
896
897                    // Track token usage - use actual from hook if available, else estimate
898                    let hook_usage = hook.get_usage().await;
899                    if hook_usage.has_data() {
900                        // Use actual token counts from API response
901                        session
902                            .token_usage
903                            .add_actual(hook_usage.input_tokens, hook_usage.output_tokens);
904                    } else {
905                        // Fall back to estimation when API doesn't provide usage
906                        let prompt_tokens = TokenUsage::estimate_tokens(&input);
907                        let completion_tokens = TokenUsage::estimate_tokens(&text);
908                        session
909                            .token_usage
910                            .add_estimated(prompt_tokens, completion_tokens);
911                    }
912                    // Reset hook usage for next request batch
913                    hook.reset_usage().await;
914
915                    // Show context indicator like Forge: [model/~tokens]
916                    let model_short = session
917                        .model
918                        .split('/')
919                        .next_back()
920                        .unwrap_or(&session.model)
921                        .split(':')
922                        .next()
923                        .unwrap_or(&session.model);
924                    println!();
925                    println!(
926                        "  {}[{}/{}]{}",
927                        ui::colors::ansi::DIM,
928                        model_short,
929                        session.token_usage.format_compact(),
930                        ui::colors::ansi::RESET
931                    );
932
933                    // Extract tool calls from the hook state for history tracking
934                    let tool_calls = extract_tool_calls_from_hook(&hook).await;
935                    let batch_tool_count = tool_calls.len();
936                    total_tool_calls += batch_tool_count;
937
938                    // Show tool call summary if significant
939                    if batch_tool_count > 10 {
940                        println!(
941                            "{}",
942                            format!(
943                                "  โœ“ Completed with {} tool calls ({} total this session)",
944                                batch_tool_count, total_tool_calls
945                            )
946                            .dimmed()
947                        );
948                    }
949
950                    // Add to conversation history with tool call records
951                    conversation_history.add_turn(input.clone(), text.clone(), tool_calls.clone());
952
953                    // Check if this heavy turn requires immediate compaction
954                    // This helps prevent context overflow in subsequent requests
955                    if conversation_history.needs_compaction() {
956                        println!("{}", "  ๐Ÿ“ฆ Compacting conversation history...".dimmed());
957                        if let Some(summary) = conversation_history.compact() {
958                            println!(
959                                "{}",
960                                format!("  โœ“ Compressed {} turns", summary.matches("Turn").count())
961                                    .dimmed()
962                            );
963                        }
964                    }
965
966                    // Simplify history for OpenAI Responses API reasoning models
967                    // Keep only User text and Assistant text - strip reasoning, tool calls, tool results
968                    // This prevents pairing errors like "rs_... without its required following item"
969                    // and "fc_... without its required reasoning item"
970                    if session.provider == ProviderType::OpenAI {
971                        simplify_history_for_openai_reasoning(&mut raw_chat_history);
972                    }
973
974                    // Also update legacy session history for compatibility
975                    session.history.push(("user".to_string(), input.clone()));
976                    session
977                        .history
978                        .push(("assistant".to_string(), text.clone()));
979
980                    // Record to persistent session storage (includes full history snapshot)
981                    session_recorder.record_user_message(&input);
982                    session_recorder.record_assistant_message(&text, Some(&tool_calls));
983                    if let Err(e) = session_recorder.save_with_history(&conversation_history) {
984                        eprintln!(
985                            "{}",
986                            format!("  Warning: Failed to save session: {}", e).dimmed()
987                        );
988                    }
989
990                    // Check if plan_create was called - show interactive menu
991                    if let Some(plan_info) = find_plan_create_call(&tool_calls) {
992                        println!(); // Space before menu
993
994                        // Show the plan action menu (don't switch modes yet - let user choose)
995                        match ui::show_plan_action_menu(&plan_info.0, plan_info.1) {
996                            ui::PlanActionResult::ExecuteAutoAccept => {
997                                // Now switch to standard mode for execution
998                                if session.plan_mode.is_planning() {
999                                    session.plan_mode = session.plan_mode.toggle();
1000                                }
1001                                auto_accept_writes = true;
1002                                pending_input = Some(format!(
1003                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order. Auto-accept all file writes.",
1004                                    plan_info.0
1005                                ));
1006                                succeeded = true;
1007                            }
1008                            ui::PlanActionResult::ExecuteWithReview => {
1009                                // Now switch to standard mode for execution
1010                                if session.plan_mode.is_planning() {
1011                                    session.plan_mode = session.plan_mode.toggle();
1012                                }
1013                                pending_input = Some(format!(
1014                                    "Execute the plan at '{}'. Use plan_next to get tasks and execute them in order.",
1015                                    plan_info.0
1016                                ));
1017                                succeeded = true;
1018                            }
1019                            ui::PlanActionResult::ChangePlan(feedback) => {
1020                                // Stay in plan mode for modifications
1021                                pending_input = Some(format!(
1022                                    "Please modify the plan at '{}'. User feedback: {}",
1023                                    plan_info.0, feedback
1024                                ));
1025                                succeeded = true;
1026                            }
1027                            ui::PlanActionResult::Cancel => {
1028                                // Just complete normally, don't execute
1029                                succeeded = true;
1030                            }
1031                        }
1032                    } else {
1033                        succeeded = true;
1034                    }
1035                }
1036                Err(e) => {
1037                    let err_str = e.to_string();
1038
1039                    println!();
1040
1041                    // Check if this was a user-initiated cancellation (Ctrl+C)
1042                    if err_str.contains("cancelled") || err_str.contains("Cancelled") {
1043                        // Extract any completed work before cancellation
1044                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1045                        let tool_count = completed_tools.len();
1046
1047                        eprintln!("{}", "โš  Generation interrupted.".yellow());
1048                        if tool_count > 0 {
1049                            eprintln!(
1050                                "{}",
1051                                format!("  {} tool calls completed before interrupt.", tool_count)
1052                                    .dimmed()
1053                            );
1054                            // Add partial progress to history
1055                            conversation_history.add_turn(
1056                                current_input.clone(),
1057                                format!("[Interrupted after {} tool calls]", tool_count),
1058                                completed_tools,
1059                            );
1060                        }
1061                        eprintln!("{}", "  Type your next message to continue.".dimmed());
1062
1063                        // Don't retry, don't mark as succeeded - just break to return to prompt
1064                        break;
1065                    }
1066
1067                    // Check if this is a max depth error - handle as checkpoint
1068                    if err_str.contains("MaxDepth")
1069                        || err_str.contains("max_depth")
1070                        || err_str.contains("reached limit")
1071                    {
1072                        // Extract what was done before hitting the limit
1073                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1074                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1075                        let batch_tool_count = completed_tools.len();
1076                        total_tool_calls += batch_tool_count;
1077
1078                        eprintln!("{}", format!(
1079                            "โš  Reached {} tool calls this batch ({} total). Maximum allowed: {}",
1080                            batch_tool_count, total_tool_calls, MAX_TOOL_CALLS
1081                        ).yellow());
1082
1083                        // Check if we've hit the absolute maximum
1084                        if total_tool_calls >= MAX_TOOL_CALLS {
1085                            eprintln!(
1086                                "{}",
1087                                format!("Maximum tool call limit ({}) reached.", MAX_TOOL_CALLS)
1088                                    .red()
1089                            );
1090                            eprintln!(
1091                                "{}",
1092                                "The task is too complex. Try breaking it into smaller parts."
1093                                    .dimmed()
1094                            );
1095                            break;
1096                        }
1097
1098                        // Ask user if they want to continue (unless auto-continue is enabled)
1099                        let should_continue = if auto_continue_tools {
1100                            eprintln!(
1101                                "{}",
1102                                "  Auto-continuing (you selected 'always')...".dimmed()
1103                            );
1104                            true
1105                        } else {
1106                            eprintln!(
1107                                "{}",
1108                                "Excessive tool calls used. Want to continue?".yellow()
1109                            );
1110                            eprintln!(
1111                                "{}",
1112                                "  [y] Yes, continue  [n] No, stop  [a] Always continue".dimmed()
1113                            );
1114                            print!("  > ");
1115                            let _ = std::io::Write::flush(&mut std::io::stdout());
1116
1117                            // Read user input
1118                            let mut response = String::new();
1119                            match std::io::stdin().read_line(&mut response) {
1120                                Ok(_) => {
1121                                    let resp = response.trim().to_lowercase();
1122                                    if resp == "a" || resp == "always" {
1123                                        auto_continue_tools = true;
1124                                        true
1125                                    } else {
1126                                        resp == "y" || resp == "yes" || resp.is_empty()
1127                                    }
1128                                }
1129                                Err(_) => false,
1130                            }
1131                        };
1132
1133                        if !should_continue {
1134                            eprintln!(
1135                                "{}",
1136                                "Stopped by user. Type 'continue' to resume later.".dimmed()
1137                            );
1138                            // Add partial progress to history
1139                            if !completed_tools.is_empty() {
1140                                conversation_history.add_turn(
1141                                    current_input.clone(),
1142                                    format!(
1143                                        "[Stopped at checkpoint - {} tools completed]",
1144                                        batch_tool_count
1145                                    ),
1146                                    vec![],
1147                                );
1148                            }
1149                            break;
1150                        }
1151
1152                        // Continue from checkpoint
1153                        eprintln!(
1154                            "{}",
1155                            format!(
1156                                "  โ†’ Continuing... {} remaining tool calls available",
1157                                MAX_TOOL_CALLS - total_tool_calls
1158                            )
1159                            .dimmed()
1160                        );
1161
1162                        // Add partial progress to history (without duplicating tool calls)
1163                        conversation_history.add_turn(
1164                            current_input.clone(),
1165                            format!(
1166                                "[Checkpoint - {} tools completed, continuing...]",
1167                                batch_tool_count
1168                            ),
1169                            vec![],
1170                        );
1171
1172                        // Build continuation prompt
1173                        current_input =
1174                            build_continuation_prompt(&input, &completed_tools, &agent_thinking);
1175
1176                        // Brief delay before continuation
1177                        tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1178                        continue; // Continue the loop without incrementing retry_attempt
1179                    } else if err_str.contains("rate")
1180                        || err_str.contains("Rate")
1181                        || err_str.contains("429")
1182                        || err_str.contains("Too many tokens")
1183                        || err_str.contains("please wait")
1184                        || err_str.contains("throttl")
1185                        || err_str.contains("Throttl")
1186                    {
1187                        eprintln!("{}", "โš  Rate limited by API provider.".yellow());
1188                        // Wait before retry for rate limits (longer wait for "too many tokens")
1189                        retry_attempt += 1;
1190                        let wait_secs = if err_str.contains("Too many tokens") {
1191                            30
1192                        } else {
1193                            5
1194                        };
1195                        eprintln!(
1196                            "{}",
1197                            format!(
1198                                "  Waiting {} seconds before retry ({}/{})...",
1199                                wait_secs, retry_attempt, MAX_RETRIES
1200                            )
1201                            .dimmed()
1202                        );
1203                        tokio::time::sleep(tokio::time::Duration::from_secs(wait_secs)).await;
1204                    } else if is_input_too_long_error(&err_str) {
1205                        // Context too large - truncate raw_chat_history directly
1206                        // NOTE: We truncate raw_chat_history (actual messages) not conversation_history
1207                        // because conversation_history may be empty/stale during errors
1208                        eprintln!(
1209                            "{}",
1210                            "โš  Context too large for model. Truncating history...".yellow()
1211                        );
1212
1213                        let old_token_count = estimate_raw_history_tokens(&raw_chat_history);
1214                        let old_msg_count = raw_chat_history.len();
1215
1216                        // Strategy 1: Keep only the last N messages (user/assistant pairs)
1217                        // More aggressive truncation on each retry: 10 โ†’ 6 โ†’ 4 messages
1218                        let keep_count = match retry_attempt {
1219                            0 => 10,
1220                            1 => 6,
1221                            _ => 4,
1222                        };
1223
1224                        if raw_chat_history.len() > keep_count {
1225                            // Drain older messages, keep the most recent ones
1226                            let drain_count = raw_chat_history.len() - keep_count;
1227                            raw_chat_history.drain(0..drain_count);
1228                            // Ensure history starts with User message for OpenAI Responses API compatibility
1229                            ensure_history_starts_with_user(&mut raw_chat_history);
1230                        }
1231
1232                        // Strategy 2: Compact large tool outputs to temp files + summaries
1233                        // This preserves data (agent can read file if needed) while reducing context
1234                        let max_output_chars = match retry_attempt {
1235                            0 => 50_000, // 50KB on first try
1236                            1 => 20_000, // 20KB on second
1237                            _ => 5_000,  // 5KB on third (aggressive)
1238                        };
1239                        compact_large_tool_outputs(&mut raw_chat_history, max_output_chars);
1240
1241                        let new_token_count = estimate_raw_history_tokens(&raw_chat_history);
1242                        eprintln!("{}", format!(
1243                            "  โœ“ Truncated: {} messages (~{} tokens) โ†’ {} messages (~{} tokens)",
1244                            old_msg_count, old_token_count, raw_chat_history.len(), new_token_count
1245                        ).green());
1246
1247                        // Preserve compacted summary while clearing turns to stay in sync
1248                        conversation_history.clear_turns_preserve_context();
1249
1250                        // Retry with truncated context
1251                        retry_attempt += 1;
1252                        if retry_attempt < MAX_RETRIES {
1253                            eprintln!(
1254                                "{}",
1255                                format!(
1256                                    "  โ†’ Retrying with truncated context ({}/{})...",
1257                                    retry_attempt, MAX_RETRIES
1258                                )
1259                                .dimmed()
1260                            );
1261                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1262                        } else {
1263                            eprintln!(
1264                                "{}",
1265                                "Context still too large after truncation. Try /clear to reset."
1266                                    .red()
1267                            );
1268                            break;
1269                        }
1270                    } else if is_truncation_error(&err_str) {
1271                        // Truncation error - try intelligent continuation
1272                        let completed_tools = extract_tool_calls_from_hook(&hook).await;
1273                        let agent_thinking = extract_agent_messages_from_hook(&hook).await;
1274
1275                        // Count actually completed tools (not in-progress)
1276                        let completed_count = completed_tools
1277                            .iter()
1278                            .filter(|t| !t.result_summary.contains("IN PROGRESS"))
1279                            .count();
1280                        let in_progress_count = completed_tools.len() - completed_count;
1281
1282                        if !completed_tools.is_empty() && continuation_count < MAX_CONTINUATIONS {
1283                            // We have partial progress - continue from where we left off
1284                            continuation_count += 1;
1285                            let status_msg = if in_progress_count > 0 {
1286                                format!(
1287                                    "โš  Response truncated. {} completed, {} in-progress. Auto-continuing ({}/{})...",
1288                                    completed_count,
1289                                    in_progress_count,
1290                                    continuation_count,
1291                                    MAX_CONTINUATIONS
1292                                )
1293                            } else {
1294                                format!(
1295                                    "โš  Response truncated. {} tool calls completed. Auto-continuing ({}/{})...",
1296                                    completed_count, continuation_count, MAX_CONTINUATIONS
1297                                )
1298                            };
1299                            eprintln!("{}", status_msg.yellow());
1300
1301                            // Add partial progress to conversation history
1302                            // NOTE: We intentionally pass empty tool_calls here because the
1303                            // continuation prompt already contains the detailed file list.
1304                            // Including them in history would duplicate the context and waste tokens.
1305                            conversation_history.add_turn(
1306                                current_input.clone(),
1307                                format!("[Partial response - {} tools completed, {} in-progress before truncation. See continuation prompt for details.]",
1308                                    completed_count, in_progress_count),
1309                                vec![]  // Don't duplicate - continuation prompt has the details
1310                            );
1311
1312                            // Check if we need compaction after adding this heavy turn
1313                            // This is important for long multi-turn sessions with many tool calls
1314                            if conversation_history.needs_compaction() {
1315                                eprintln!(
1316                                    "{}",
1317                                    "  ๐Ÿ“ฆ Compacting history before continuation...".dimmed()
1318                                );
1319                                if let Some(summary) = conversation_history.compact() {
1320                                    eprintln!(
1321                                        "{}",
1322                                        format!(
1323                                            "  โœ“ Compressed {} turns",
1324                                            summary.matches("Turn").count()
1325                                        )
1326                                        .dimmed()
1327                                    );
1328                                }
1329                            }
1330
1331                            // Build continuation prompt with context
1332                            current_input = build_continuation_prompt(
1333                                &input,
1334                                &completed_tools,
1335                                &agent_thinking,
1336                            );
1337
1338                            // Log continuation details for debugging
1339                            eprintln!("{}", format!(
1340                                "  โ†’ Continuing with {} files read, {} written, {} other actions tracked",
1341                                completed_tools.iter().filter(|t| t.tool_name == "read_file").count(),
1342                                completed_tools.iter().filter(|t| t.tool_name == "write_file" || t.tool_name == "write_files").count(),
1343                                completed_tools.iter().filter(|t| t.tool_name != "read_file" && t.tool_name != "write_file" && t.tool_name != "write_files" && t.tool_name != "list_directory").count()
1344                            ).dimmed());
1345
1346                            // Brief delay before continuation
1347                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1348                            // Don't increment retry_attempt - this is progress via continuation
1349                        } else if retry_attempt < MAX_RETRIES {
1350                            // No tool calls completed - simple retry
1351                            retry_attempt += 1;
1352                            eprintln!(
1353                                "{}",
1354                                format!(
1355                                    "โš  Response error (attempt {}/{}). Retrying...",
1356                                    retry_attempt, MAX_RETRIES
1357                                )
1358                                .yellow()
1359                            );
1360                            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
1361                        } else {
1362                            // Max retries/continuations reached
1363                            eprintln!("{}", format!("Error: {}", e).red());
1364                            if continuation_count >= MAX_CONTINUATIONS {
1365                                eprintln!("{}", format!("Max continuations ({}) reached. The task is too complex for one request.", MAX_CONTINUATIONS).dimmed());
1366                            } else {
1367                                eprintln!(
1368                                    "{}",
1369                                    "Max retries reached. The response may be too complex."
1370                                        .dimmed()
1371                                );
1372                            }
1373                            eprintln!(
1374                                "{}",
1375                                "Try breaking your request into smaller parts.".dimmed()
1376                            );
1377                            break;
1378                        }
1379                    } else if err_str.contains("timeout") || err_str.contains("Timeout") {
1380                        // Timeout - simple retry
1381                        retry_attempt += 1;
1382                        if retry_attempt < MAX_RETRIES {
1383                            eprintln!(
1384                                "{}",
1385                                format!(
1386                                    "โš  Request timed out (attempt {}/{}). Retrying...",
1387                                    retry_attempt, MAX_RETRIES
1388                                )
1389                                .yellow()
1390                            );
1391                            tokio::time::sleep(tokio::time::Duration::from_secs(1)).await;
1392                        } else {
1393                            eprintln!("{}", "Request timed out. Please try again.".red());
1394                            break;
1395                        }
1396                    } else {
1397                        // Unknown error - show details and break
1398                        eprintln!("{}", format!("Error: {}", e).red());
1399                        if continuation_count > 0 {
1400                            eprintln!(
1401                                "{}",
1402                                format!(
1403                                    "  (occurred during continuation attempt {})",
1404                                    continuation_count
1405                                )
1406                                .dimmed()
1407                            );
1408                        }
1409                        eprintln!("{}", "Error details for debugging:".dimmed());
1410                        eprintln!(
1411                            "{}",
1412                            format!("  - retry_attempt: {}/{}", retry_attempt, MAX_RETRIES)
1413                                .dimmed()
1414                        );
1415                        eprintln!(
1416                            "{}",
1417                            format!(
1418                                "  - continuation_count: {}/{}",
1419                                continuation_count, MAX_CONTINUATIONS
1420                            )
1421                            .dimmed()
1422                        );
1423                        break;
1424                    }
1425                }
1426            }
1427        }
1428        println!();
1429    }
1430
1431    // Clean up terminal layout before exiting (disabled - layout not initialized)
1432    // if let Err(e) = terminal_layout.cleanup() {
1433    //     eprintln!(
1434    //         "{}",
1435    //         format!("Warning: Terminal cleanup failed: {}", e).dimmed()
1436    //     );
1437    // }
1438
1439    Ok(())
1440}
1441
1442// NOTE: wait_for_interrupt function removed - ESC interrupt feature disabled
1443// due to terminal corruption issues with spawn_blocking raw mode handling.
1444// TODO: Re-implement using tool hook callbacks for cleaner interruption.
1445
1446/// Extract tool call records from the hook state for history tracking
1447async fn extract_tool_calls_from_hook(hook: &ToolDisplayHook) -> Vec<ToolCallRecord> {
1448    let state = hook.state();
1449    let guard = state.lock().await;
1450
1451    guard
1452        .tool_calls
1453        .iter()
1454        .enumerate()
1455        .map(|(i, tc)| {
1456            let result = if tc.is_running {
1457                // Tool was in progress when error occurred
1458                "[IN PROGRESS - may need to be re-run]".to_string()
1459            } else if let Some(output) = &tc.output {
1460                truncate_string(output, 200)
1461            } else {
1462                "completed".to_string()
1463            };
1464
1465            ToolCallRecord {
1466                tool_name: tc.name.clone(),
1467                args_summary: truncate_string(&tc.args, 100),
1468                result_summary: result,
1469                // Generate a unique tool ID for proper message pairing
1470                tool_id: Some(format!("tool_{}_{}", tc.name, i)),
1471                // Mark read-only tools as droppable (their results can be re-fetched)
1472                droppable: matches!(
1473                    tc.name.as_str(),
1474                    "read_file" | "list_directory" | "analyze_project"
1475                ),
1476            }
1477        })
1478        .collect()
1479}
1480
1481/// Extract any agent thinking/messages from the hook for context
1482async fn extract_agent_messages_from_hook(hook: &ToolDisplayHook) -> Vec<String> {
1483    let state = hook.state();
1484    let guard = state.lock().await;
1485    guard.agent_messages.clone()
1486}
1487
1488/// Helper to truncate strings for summaries
1489fn truncate_string(s: &str, max_len: usize) -> String {
1490    if s.len() <= max_len {
1491        s.to_string()
1492    } else {
1493        format!("{}...", &s[..max_len.saturating_sub(3)])
1494    }
1495}
1496
1497/// Compact large tool outputs by saving them to temp files and replacing with summaries.
1498/// This preserves all data (agent can read the file) while reducing context size.
1499fn compact_large_tool_outputs(messages: &mut [rig::completion::Message], max_chars: usize) {
1500    use rig::completion::message::{Text, ToolResultContent, UserContent};
1501    use std::fs;
1502
1503    // Create temp directory for compacted outputs
1504    let temp_dir = std::env::temp_dir().join("syncable-agent-outputs");
1505    let _ = fs::create_dir_all(&temp_dir);
1506
1507    for msg in messages.iter_mut() {
1508        if let rig::completion::Message::User { content } = msg {
1509            for item in content.iter_mut() {
1510                if let UserContent::ToolResult(tr) = item {
1511                    for trc in tr.content.iter_mut() {
1512                        if let ToolResultContent::Text(text) = trc
1513                            && text.text.len() > max_chars
1514                        {
1515                            // Save full output to temp file
1516                            let file_id = format!(
1517                                "{}_{}.txt",
1518                                tr.id,
1519                                std::time::SystemTime::now()
1520                                    .duration_since(std::time::UNIX_EPOCH)
1521                                    .unwrap()
1522                                    .as_millis()
1523                            );
1524                            let file_path = temp_dir.join(&file_id);
1525
1526                            if let Ok(()) = fs::write(&file_path, &text.text) {
1527                                // Create a smart summary
1528                                let summary = create_output_summary(
1529                                    &text.text,
1530                                    &file_path.display().to_string(),
1531                                    max_chars / 2, // Use half max for summary
1532                                );
1533
1534                                // Replace with summary
1535                                *trc = ToolResultContent::Text(Text { text: summary });
1536                            }
1537                        }
1538                    }
1539                }
1540            }
1541        }
1542    }
1543}
1544
1545/// Create a smart summary of a large output using incremental chunk processing.
1546/// Processes output in logical sections, summarizes each, then combines into actionable summary.
1547fn create_output_summary(full_output: &str, file_path: &str, max_summary_len: usize) -> String {
1548    let total_lines = full_output.lines().count();
1549    let total_chars = full_output.len();
1550
1551    let summary_content =
1552        if full_output.trim_start().starts_with('{') || full_output.trim_start().starts_with('[') {
1553            // JSON output - extract structured summary
1554            summarize_json_incrementally(full_output, max_summary_len)
1555        } else {
1556            // Text output - chunk and summarize
1557            summarize_text_incrementally(full_output, max_summary_len)
1558        };
1559
1560    format!(
1561        "[COMPACTED OUTPUT]\n\
1562        Full data: {}\n\
1563        Size: {} chars, {} lines\n\
1564        \n\
1565        {}\n\
1566        \n\
1567        [Read file with offset/limit for specific sections if needed]",
1568        file_path, total_chars, total_lines, summary_content
1569    )
1570}
1571
1572/// Incrementally summarize JSON output, extracting key fields and prioritizing important items.
1573fn summarize_json_incrementally(json_str: &str, max_len: usize) -> String {
1574    let Ok(json) = serde_json::from_str::<serde_json::Value>(json_str) else {
1575        return "Failed to parse JSON".to_string();
1576    };
1577
1578    let mut parts: Vec<String> = Vec::new();
1579    let mut current_len = 0;
1580
1581    match &json {
1582        serde_json::Value::Object(obj) => {
1583            // Priority 1: Summary/stats fields
1584            for key in ["summary", "stats", "metadata", "status"] {
1585                if let Some(v) = obj.get(key) {
1586                    let s = format!("{}:\n{}", key, indent_json(v, 2, 500));
1587                    if current_len + s.len() < max_len {
1588                        parts.push(s.clone());
1589                        current_len += s.len();
1590                    }
1591                }
1592            }
1593
1594            // Priority 2: Error/critical items (summarize each)
1595            for key in [
1596                "errors",
1597                "critical",
1598                "failures",
1599                "issues",
1600                "findings",
1601                "recommendations",
1602            ] {
1603                if let Some(serde_json::Value::Array(arr)) = obj.get(key) {
1604                    if arr.is_empty() {
1605                        continue;
1606                    }
1607                    parts.push(format!("\n{} ({} items):", key, arr.len()));
1608
1609                    // Group by severity/type if present
1610                    let mut by_severity: std::collections::HashMap<
1611                        String,
1612                        Vec<&serde_json::Value>,
1613                    > = std::collections::HashMap::new();
1614
1615                    for item in arr {
1616                        let severity = item
1617                            .get("severity")
1618                            .or_else(|| item.get("level"))
1619                            .or_else(|| item.get("type"))
1620                            .and_then(|v| v.as_str())
1621                            .unwrap_or("other")
1622                            .to_string();
1623                        by_severity.entry(severity).or_default().push(item);
1624                    }
1625
1626                    // Show critical/high first, summarize others
1627                    for sev in [
1628                        "critical", "high", "error", "warning", "medium", "low", "info", "other",
1629                    ] {
1630                        if let Some(items) = by_severity.get(sev) {
1631                            let show_count = match sev {
1632                                "critical" | "high" | "error" => 5.min(items.len()),
1633                                "warning" | "medium" => 3.min(items.len()),
1634                                _ => 2.min(items.len()),
1635                            };
1636
1637                            if !items.is_empty() {
1638                                let s =
1639                                    format!("  [{}] {} items:", sev.to_uppercase(), items.len());
1640                                if current_len + s.len() < max_len {
1641                                    parts.push(s.clone());
1642                                    current_len += s.len();
1643
1644                                    for item in items.iter().take(show_count) {
1645                                        let item_summary = summarize_single_item(item);
1646                                        if current_len + item_summary.len() < max_len {
1647                                            parts.push(format!("    โ€ข {}", item_summary));
1648                                            current_len += item_summary.len();
1649                                        }
1650                                    }
1651
1652                                    if items.len() > show_count {
1653                                        parts.push(format!(
1654                                            "    ... and {} more",
1655                                            items.len() - show_count
1656                                        ));
1657                                    }
1658                                }
1659                            }
1660                        }
1661                    }
1662                }
1663            }
1664
1665            // Priority 3: Show remaining top-level keys
1666            let shown_keys: std::collections::HashSet<&str> = [
1667                "summary",
1668                "stats",
1669                "metadata",
1670                "status",
1671                "errors",
1672                "critical",
1673                "failures",
1674                "issues",
1675                "findings",
1676                "recommendations",
1677            ]
1678            .iter()
1679            .cloned()
1680            .collect();
1681
1682            let other_keys: Vec<_> = obj
1683                .keys()
1684                .filter(|k| !shown_keys.contains(k.as_str()))
1685                .collect();
1686            if !other_keys.is_empty() && current_len < max_len - 200 {
1687                parts.push(format!("\nOther fields: {:?}", other_keys));
1688            }
1689        }
1690        serde_json::Value::Array(arr) => {
1691            parts.push(format!("Array with {} items", arr.len()));
1692
1693            // Try to group by type/severity
1694            for (i, item) in arr.iter().take(10).enumerate() {
1695                let s = format!("[{}] {}", i, summarize_single_item(item));
1696                if current_len + s.len() < max_len {
1697                    parts.push(s.clone());
1698                    current_len += s.len();
1699                }
1700            }
1701            if arr.len() > 10 {
1702                parts.push(format!("... and {} more items", arr.len() - 10));
1703            }
1704        }
1705        _ => {
1706            parts.push(truncate_json_value(&json, max_len));
1707        }
1708    }
1709
1710    parts.join("\n")
1711}
1712
1713/// Summarize a single JSON item (issue, error, etc.) into a one-liner.
1714fn summarize_single_item(item: &serde_json::Value) -> String {
1715    let mut parts: Vec<String> = Vec::new();
1716
1717    // Extract common fields
1718    for key in [
1719        "message",
1720        "description",
1721        "title",
1722        "name",
1723        "file",
1724        "path",
1725        "code",
1726        "rule",
1727    ] {
1728        if let Some(v) = item.get(key)
1729            && let Some(s) = v.as_str()
1730        {
1731            parts.push(truncate_string(s, 80));
1732            break; // Only take first descriptive field
1733        }
1734    }
1735
1736    // Add location if present
1737    if let Some(file) = item
1738        .get("file")
1739        .or_else(|| item.get("path"))
1740        .and_then(|v| v.as_str())
1741    {
1742        if let Some(line) = item.get("line").and_then(|v| v.as_u64()) {
1743            parts.push(format!("at {}:{}", file, line));
1744        } else {
1745            parts.push(format!("in {}", truncate_string(file, 40)));
1746        }
1747    }
1748
1749    if parts.is_empty() {
1750        truncate_json_value(item, 100)
1751    } else {
1752        parts.join(" ")
1753    }
1754}
1755
1756/// Indent JSON for display.
1757fn indent_json(v: &serde_json::Value, indent: usize, max_len: usize) -> String {
1758    let s = serde_json::to_string_pretty(v).unwrap_or_else(|_| v.to_string());
1759    let prefix = " ".repeat(indent);
1760    let indented: String = s
1761        .lines()
1762        .map(|l| format!("{}{}", prefix, l))
1763        .collect::<Vec<_>>()
1764        .join("\n");
1765    if indented.len() > max_len {
1766        format!("{}...", &indented[..max_len.saturating_sub(3)])
1767    } else {
1768        indented
1769    }
1770}
1771
1772/// Incrementally summarize text output by processing in chunks.
1773fn summarize_text_incrementally(text: &str, max_len: usize) -> String {
1774    let lines: Vec<&str> = text.lines().collect();
1775    let mut parts: Vec<String> = Vec::new();
1776    let mut current_len = 0;
1777
1778    // Look for section headers or key patterns
1779    let mut sections: Vec<(usize, &str)> = Vec::new();
1780    for (i, line) in lines.iter().enumerate() {
1781        // Detect headers (lines that look like titles)
1782        if line.starts_with('#')
1783            || line.starts_with("==")
1784            || line.starts_with("--")
1785            || (line.ends_with(':') && line.len() < 50)
1786            || line.chars().all(|c| c.is_uppercase() || c.is_whitespace())
1787        {
1788            sections.push((i, line));
1789        }
1790    }
1791
1792    if !sections.is_empty() {
1793        // Summarize by sections
1794        parts.push(format!("Found {} sections:", sections.len()));
1795        for (i, (line_num, header)) in sections.iter().enumerate() {
1796            let next_section = sections.get(i + 1).map(|(n, _)| *n).unwrap_or(lines.len());
1797            let section_lines = next_section - line_num;
1798
1799            let s = format!(
1800                "  [L{}] {} ({} lines)",
1801                line_num + 1,
1802                header.trim(),
1803                section_lines
1804            );
1805            if current_len + s.len() < max_len / 2 {
1806                parts.push(s.clone());
1807                current_len += s.len();
1808            }
1809        }
1810        parts.push("".to_string());
1811    }
1812
1813    // Show first chunk
1814    let preview_lines = 15.min(lines.len());
1815    parts.push("Content preview:".to_string());
1816    for line in lines.iter().take(preview_lines) {
1817        let s = format!("  {}", truncate_string(line, 120));
1818        if current_len + s.len() < max_len * 3 / 4 {
1819            parts.push(s.clone());
1820            current_len += s.len();
1821        }
1822    }
1823
1824    if lines.len() > preview_lines {
1825        parts.push(format!(
1826            "  ... ({} more lines)",
1827            lines.len() - preview_lines
1828        ));
1829    }
1830
1831    // Show last few lines if space permits
1832    if lines.len() > preview_lines * 2 && current_len < max_len - 500 {
1833        parts.push("\nEnd of output:".to_string());
1834        for line in lines.iter().skip(lines.len() - 5) {
1835            let s = format!("  {}", truncate_string(line, 120));
1836            if current_len + s.len() < max_len {
1837                parts.push(s.clone());
1838                current_len += s.len();
1839            }
1840        }
1841    }
1842
1843    parts.join("\n")
1844}
1845
1846/// Truncate a JSON value for display
1847fn truncate_json_value(v: &serde_json::Value, max_len: usize) -> String {
1848    let s = v.to_string();
1849    if s.len() <= max_len {
1850        s
1851    } else {
1852        format!("{}...", &s[..max_len.saturating_sub(3)])
1853    }
1854}
1855
1856/// Simplify history for OpenAI Responses API compatibility with reasoning models.
1857///
1858/// OpenAI's Responses API has strict pairing requirements:
1859/// - Reasoning items must be followed by their output (text or function_call)
1860/// - Function_call items must be preceded by their reasoning item
1861///
1862/// When passing history across user turns, these pairings get broken, causing errors like:
1863/// - "Item 'rs_...' of type 'reasoning' was provided without its required following item"
1864/// - "Item 'fc_...' of type 'function_call' was provided without its required 'reasoning' item"
1865///
1866/// Solution: Keep only User messages and final Assistant Text responses.
1867/// This preserves conversation context without the complex internal tool/reasoning structure.
1868fn simplify_history_for_openai_reasoning(history: &mut Vec<rig::completion::Message>) {
1869    use rig::completion::message::{AssistantContent, UserContent};
1870    use rig::one_or_many::OneOrMany;
1871
1872    // Filter to keep only User text messages and Assistant text messages
1873    let simplified: Vec<rig::completion::Message> = history
1874        .iter()
1875        .filter_map(|msg| match msg {
1876            // Keep User messages, but only text content (not tool results)
1877            rig::completion::Message::User { content } => {
1878                let text_only: Vec<UserContent> = content
1879                    .iter()
1880                    .filter(|c| matches!(c, UserContent::Text(_)))
1881                    .cloned()
1882                    .collect();
1883                if text_only.is_empty() {
1884                    None
1885                } else {
1886                    let mut iter = text_only.into_iter();
1887                    let first = iter.next().unwrap();
1888                    let rest: Vec<_> = iter.collect();
1889                    let new_content = if rest.is_empty() {
1890                        OneOrMany::one(first)
1891                    } else {
1892                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
1893                    };
1894                    Some(rig::completion::Message::User {
1895                        content: new_content,
1896                    })
1897                }
1898            }
1899            // Keep Assistant messages, but only text content (not reasoning, tool calls)
1900            rig::completion::Message::Assistant { content, id } => {
1901                let text_only: Vec<AssistantContent> = content
1902                    .iter()
1903                    .filter(|c| matches!(c, AssistantContent::Text(_)))
1904                    .cloned()
1905                    .collect();
1906                if text_only.is_empty() {
1907                    None
1908                } else {
1909                    let mut iter = text_only.into_iter();
1910                    let first = iter.next().unwrap();
1911                    let rest: Vec<_> = iter.collect();
1912                    let new_content = if rest.is_empty() {
1913                        OneOrMany::one(first)
1914                    } else {
1915                        OneOrMany::many(std::iter::once(first).chain(rest)).unwrap()
1916                    };
1917                    Some(rig::completion::Message::Assistant {
1918                        content: new_content,
1919                        id: id.clone(),
1920                    })
1921                }
1922            }
1923        })
1924        .collect();
1925
1926    *history = simplified;
1927}
1928
1929/// Ensure history starts with a User message for OpenAI Responses API compatibility.
1930///
1931/// OpenAI's Responses API requires that reasoning items are properly structured within
1932/// a conversation. When history truncation leaves an Assistant message (containing
1933/// Reasoning blocks) at the start, OpenAI rejects it with:
1934/// "Item 'rs_...' of type 'reasoning' was provided without its required following item."
1935///
1936/// This function inserts a synthetic User message at the beginning if history starts
1937/// with an Assistant message, preserving the context while maintaining valid structure.
1938fn ensure_history_starts_with_user(history: &mut Vec<rig::completion::Message>) {
1939    if !history.is_empty()
1940        && matches!(
1941            history.first(),
1942            Some(rig::completion::Message::Assistant { .. })
1943        )
1944    {
1945        // Insert synthetic User message at the beginning to maintain valid conversation structure
1946        history.insert(
1947            0,
1948            rig::completion::Message::User {
1949                content: rig::one_or_many::OneOrMany::one(
1950                    rig::completion::message::UserContent::text("(Conversation continued)"),
1951                ),
1952            },
1953        );
1954    }
1955}
1956
1957/// Estimate token count from raw rig Messages
1958/// This is used for context length management to prevent "input too long" errors.
1959/// Estimates ~4 characters per token.
1960fn estimate_raw_history_tokens(messages: &[rig::completion::Message]) -> usize {
1961    use rig::completion::message::{AssistantContent, ToolResultContent, UserContent};
1962
1963    messages
1964        .iter()
1965        .map(|msg| -> usize {
1966            match msg {
1967                rig::completion::Message::User { content } => {
1968                    content
1969                        .iter()
1970                        .map(|c| -> usize {
1971                            match c {
1972                                UserContent::Text(t) => t.text.len() / 4,
1973                                UserContent::ToolResult(tr) => {
1974                                    // Tool results can be HUGE - properly estimate them
1975                                    tr.content
1976                                        .iter()
1977                                        .map(|trc| match trc {
1978                                            ToolResultContent::Text(t) => t.text.len() / 4,
1979                                            _ => 100,
1980                                        })
1981                                        .sum::<usize>()
1982                                }
1983                                _ => 100, // Estimate for images/documents
1984                            }
1985                        })
1986                        .sum::<usize>()
1987                }
1988                rig::completion::Message::Assistant { content, .. } => {
1989                    content
1990                        .iter()
1991                        .map(|c| -> usize {
1992                            match c {
1993                                AssistantContent::Text(t) => t.text.len() / 4,
1994                                AssistantContent::ToolCall(tc) => {
1995                                    // arguments is serde_json::Value, convert to string for length estimate
1996                                    let args_len = tc.function.arguments.to_string().len();
1997                                    (tc.function.name.len() + args_len) / 4
1998                                }
1999                                _ => 100,
2000                            }
2001                        })
2002                        .sum::<usize>()
2003                }
2004            }
2005        })
2006        .sum()
2007}
2008
2009/// Find a plan_create tool call in the list and extract plan info
2010/// Returns (plan_path, task_count) if found
2011fn find_plan_create_call(tool_calls: &[ToolCallRecord]) -> Option<(String, usize)> {
2012    for tc in tool_calls {
2013        if tc.tool_name == "plan_create" {
2014            // Try to parse the result_summary as JSON to extract plan_path
2015            // Note: result_summary may be truncated, so we have multiple fallbacks
2016            let plan_path =
2017                if let Ok(result) = serde_json::from_str::<serde_json::Value>(&tc.result_summary) {
2018                    result
2019                        .get("plan_path")
2020                        .and_then(|v| v.as_str())
2021                        .map(|s| s.to_string())
2022                } else {
2023                    None
2024                };
2025
2026            // If JSON parsing failed, find the most recently created plan file
2027            // This is more reliable than trying to reconstruct the path from truncated args
2028            let plan_path = plan_path.unwrap_or_else(|| {
2029                find_most_recent_plan_file().unwrap_or_else(|| "plans/plan.md".to_string())
2030            });
2031
2032            // Count tasks by reading the plan file directly
2033            let task_count = count_tasks_in_plan_file(&plan_path).unwrap_or(0);
2034
2035            return Some((plan_path, task_count));
2036        }
2037    }
2038    None
2039}
2040
2041/// Find the most recently created plan file in the plans directory
2042fn find_most_recent_plan_file() -> Option<String> {
2043    let plans_dir = std::env::current_dir().ok()?.join("plans");
2044    if !plans_dir.exists() {
2045        return None;
2046    }
2047
2048    let mut newest: Option<(std::path::PathBuf, std::time::SystemTime)> = None;
2049
2050    for entry in std::fs::read_dir(&plans_dir).ok()?.flatten() {
2051        let path = entry.path();
2052        if path.extension().is_some_and(|e| e == "md")
2053            && let Ok(metadata) = entry.metadata()
2054            && let Ok(modified) = metadata.modified()
2055            && newest.as_ref().map(|(_, t)| modified > *t).unwrap_or(true)
2056        {
2057            newest = Some((path, modified));
2058        }
2059    }
2060
2061    newest.map(|(path, _)| {
2062        // Return relative path
2063        path.strip_prefix(std::env::current_dir().unwrap_or_default())
2064            .map(|p| p.display().to_string())
2065            .unwrap_or_else(|_| path.display().to_string())
2066    })
2067}
2068
2069/// Count tasks (checkbox items) in a plan file
2070fn count_tasks_in_plan_file(plan_path: &str) -> Option<usize> {
2071    use regex::Regex;
2072
2073    // Try both relative and absolute paths
2074    let path = std::path::Path::new(plan_path);
2075    let content = if path.exists() {
2076        std::fs::read_to_string(path).ok()?
2077    } else {
2078        // Try with current directory
2079        std::fs::read_to_string(std::env::current_dir().ok()?.join(plan_path)).ok()?
2080    };
2081
2082    // Count task checkboxes: - [ ], - [x], - [~], - [!]
2083    let task_regex = Regex::new(r"^\s*-\s*\[[ x~!]\]").ok()?;
2084    let count = content
2085        .lines()
2086        .filter(|line| task_regex.is_match(line))
2087        .count();
2088
2089    Some(count)
2090}
2091
2092/// Check if an error is a truncation/JSON parsing error that can be recovered via continuation
2093fn is_truncation_error(err_str: &str) -> bool {
2094    err_str.contains("JsonError")
2095        || err_str.contains("EOF while parsing")
2096        || err_str.contains("JSON")
2097        || err_str.contains("unexpected end")
2098}
2099
2100/// Check if error is "input too long" - context exceeds model limit
2101/// This happens when conversation history grows beyond what the model can handle.
2102/// Recovery: compact history and retry with reduced context.
2103fn is_input_too_long_error(err_str: &str) -> bool {
2104    err_str.contains("too long")
2105        || err_str.contains("Too long")
2106        || err_str.contains("context length")
2107        || err_str.contains("maximum context")
2108        || err_str.contains("exceeds the model")
2109        || err_str.contains("Input is too long")
2110}
2111
2112/// Build a continuation prompt that tells the AI what work was completed
2113/// and asks it to continue from where it left off
2114fn build_continuation_prompt(
2115    original_task: &str,
2116    completed_tools: &[ToolCallRecord],
2117    agent_thinking: &[String],
2118) -> String {
2119    use std::collections::HashSet;
2120
2121    // Group tools by type and extract unique files read
2122    let mut files_read: HashSet<String> = HashSet::new();
2123    let mut files_written: HashSet<String> = HashSet::new();
2124    let mut dirs_listed: HashSet<String> = HashSet::new();
2125    let mut other_tools: Vec<String> = Vec::new();
2126    let mut in_progress: Vec<String> = Vec::new();
2127
2128    for tool in completed_tools {
2129        let is_in_progress = tool.result_summary.contains("IN PROGRESS");
2130
2131        if is_in_progress {
2132            in_progress.push(format!("{}({})", tool.tool_name, tool.args_summary));
2133            continue;
2134        }
2135
2136        match tool.tool_name.as_str() {
2137            "read_file" => {
2138                // Extract path from args
2139                files_read.insert(tool.args_summary.clone());
2140            }
2141            "write_file" | "write_files" => {
2142                files_written.insert(tool.args_summary.clone());
2143            }
2144            "list_directory" => {
2145                dirs_listed.insert(tool.args_summary.clone());
2146            }
2147            _ => {
2148                other_tools.push(format!(
2149                    "{}({})",
2150                    tool.tool_name,
2151                    truncate_string(&tool.args_summary, 40)
2152                ));
2153            }
2154        }
2155    }
2156
2157    let mut prompt = format!(
2158        "[CONTINUE] Your previous response was interrupted. DO NOT repeat completed work.\n\n\
2159        Original task: {}\n",
2160        truncate_string(original_task, 500)
2161    );
2162
2163    // Show files already read - CRITICAL for preventing re-reads
2164    if !files_read.is_empty() {
2165        prompt.push_str("\n== FILES ALREADY READ (do NOT read again) ==\n");
2166        for file in &files_read {
2167            prompt.push_str(&format!("  - {}\n", file));
2168        }
2169    }
2170
2171    if !dirs_listed.is_empty() {
2172        prompt.push_str("\n== DIRECTORIES ALREADY LISTED ==\n");
2173        for dir in &dirs_listed {
2174            prompt.push_str(&format!("  - {}\n", dir));
2175        }
2176    }
2177
2178    if !files_written.is_empty() {
2179        prompt.push_str("\n== FILES ALREADY WRITTEN ==\n");
2180        for file in &files_written {
2181            prompt.push_str(&format!("  - {}\n", file));
2182        }
2183    }
2184
2185    if !other_tools.is_empty() {
2186        prompt.push_str("\n== OTHER COMPLETED ACTIONS ==\n");
2187        for tool in other_tools.iter().take(20) {
2188            prompt.push_str(&format!("  - {}\n", tool));
2189        }
2190        if other_tools.len() > 20 {
2191            prompt.push_str(&format!("  ... and {} more\n", other_tools.len() - 20));
2192        }
2193    }
2194
2195    if !in_progress.is_empty() {
2196        prompt.push_str("\n== INTERRUPTED (may need re-run) ==\n");
2197        for tool in &in_progress {
2198            prompt.push_str(&format!("  โš  {}\n", tool));
2199        }
2200    }
2201
2202    // Include last thinking context if available
2203    if let Some(last_thought) = agent_thinking.last() {
2204        prompt.push_str(&format!(
2205            "\n== YOUR LAST THOUGHTS ==\n\"{}\"\n",
2206            truncate_string(last_thought, 300)
2207        ));
2208    }
2209
2210    prompt.push_str("\n== INSTRUCTIONS ==\n");
2211    prompt.push_str("IMPORTANT: Your previous response was too long and got cut off.\n");
2212    prompt.push_str("1. Do NOT re-read files listed above - they are already in context.\n");
2213    prompt.push_str("2. If writing a document, write it in SECTIONS - complete one section now, then continue.\n");
2214    prompt.push_str("3. Keep your response SHORT and focused. Better to complete small chunks than fail on large ones.\n");
2215    prompt.push_str("4. If the task involves writing a file, START WRITING NOW - don't explain what you'll do.\n");
2216
2217    prompt
2218}
2219
2220/// Run a single query and return the response
2221pub async fn run_query(
2222    project_path: &Path,
2223    query: &str,
2224    provider: ProviderType,
2225    model: Option<String>,
2226) -> AgentResult<String> {
2227    use tools::*;
2228
2229    let project_path_buf = project_path.to_path_buf();
2230
2231    // Background process manager for Prometheus port-forwards (single query context)
2232    let bg_manager = Arc::new(BackgroundProcessManager::new());
2233    // Select prompt based on query type (analysis vs generation)
2234    // For single queries (non-interactive), always use standard mode
2235    let preamble = get_system_prompt(project_path, Some(query), PlanMode::default());
2236    let is_generation = prompts::is_generation_query(query);
2237
2238    match provider {
2239        ProviderType::OpenAI => {
2240            // Use Responses API (default) for reasoning model support
2241            let client = openai::Client::from_env();
2242            let model_name = model.as_deref().unwrap_or("gpt-5.2");
2243
2244            let mut builder = client
2245                .agent(model_name)
2246                .preamble(&preamble)
2247                .max_tokens(4096)
2248                .tool(AnalyzeTool::new(project_path_buf.clone()))
2249                .tool(SecurityScanTool::new(project_path_buf.clone()))
2250                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2251                .tool(HadolintTool::new(project_path_buf.clone()))
2252                .tool(DclintTool::new(project_path_buf.clone()))
2253                .tool(KubelintTool::new(project_path_buf.clone()))
2254                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2255                .tool(K8sCostsTool::new(project_path_buf.clone()))
2256                .tool(K8sDriftTool::new(project_path_buf.clone()))
2257                .tool(HelmlintTool::new(project_path_buf.clone()))
2258                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2259                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2260                .tool(TerraformInstallTool::new())
2261                .tool(ReadFileTool::new(project_path_buf.clone()))
2262                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2263                .tool(WebFetchTool::new())
2264                // Prometheus discovery and connection tools for live K8s analysis
2265                .tool(PrometheusDiscoverTool::new())
2266                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2267                // RAG retrieval tools for compressed tool outputs
2268                .tool(RetrieveOutputTool::new())
2269                .tool(ListOutputsTool::new())
2270                        // Platform tools for project management
2271                        .tool(ListOrganizationsTool::new())
2272                        .tool(ListProjectsTool::new())
2273                        .tool(SelectProjectTool::new())
2274                        .tool(CurrentContextTool::new())
2275                        .tool(OpenProviderSettingsTool::new())
2276                        .tool(CheckProviderConnectionTool::new())
2277                        .tool(ListDeploymentCapabilitiesTool::new())
2278                        // Deployment tools for service management
2279                        .tool(CreateDeploymentConfigTool::new())
2280                        .tool(DeployServiceTool::new(project_path_buf.clone()))
2281                        .tool(ListDeploymentConfigsTool::new())
2282                        .tool(TriggerDeploymentTool::new())
2283                        .tool(GetDeploymentStatusTool::new())
2284                        .tool(ListDeploymentsTool::new())
2285                        .tool(GetServiceLogsTool::new());
2286
2287            // Add generation tools if this is a generation query
2288            if is_generation {
2289                builder = builder
2290                    .tool(WriteFileTool::new(project_path_buf.clone()))
2291                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2292                    .tool(ShellTool::new(project_path_buf.clone()));
2293            }
2294
2295            // Enable reasoning for OpenAI reasoning models
2296            let model_lower = model_name.to_lowercase();
2297            let is_reasoning_model = model_lower.starts_with("gpt-5")
2298                || model_lower.starts_with("gpt5")
2299                || model_lower.starts_with("o1")
2300                || model_lower.starts_with("o3")
2301                || model_lower.starts_with("o4");
2302
2303            let agent = if is_reasoning_model {
2304                let reasoning_params = serde_json::json!({
2305                    "reasoning": {
2306                        "effort": "medium",
2307                        "summary": "detailed"
2308                    }
2309                });
2310                builder.additional_params(reasoning_params).build()
2311            } else {
2312                builder.build()
2313            };
2314
2315            agent
2316                .prompt(query)
2317                .multi_turn(50)
2318                .await
2319                .map_err(|e| AgentError::ProviderError(e.to_string()))
2320        }
2321        ProviderType::Anthropic => {
2322            let client = anthropic::Client::from_env();
2323            let model_name = model.as_deref().unwrap_or("claude-sonnet-4-5-20250929");
2324
2325            // TODO: Extended thinking for Claude is disabled because rig doesn't properly
2326            // handle thinking blocks in multi-turn conversations with tool use.
2327            // See: forge/crates/forge_services/src/provider/bedrock/provider.rs for reference.
2328
2329            let mut builder = client
2330                .agent(model_name)
2331                .preamble(&preamble)
2332                .max_tokens(4096)
2333                .tool(AnalyzeTool::new(project_path_buf.clone()))
2334                .tool(SecurityScanTool::new(project_path_buf.clone()))
2335                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2336                .tool(HadolintTool::new(project_path_buf.clone()))
2337                .tool(DclintTool::new(project_path_buf.clone()))
2338                .tool(KubelintTool::new(project_path_buf.clone()))
2339                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2340                .tool(K8sCostsTool::new(project_path_buf.clone()))
2341                .tool(K8sDriftTool::new(project_path_buf.clone()))
2342                .tool(HelmlintTool::new(project_path_buf.clone()))
2343                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2344                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2345                .tool(TerraformInstallTool::new())
2346                .tool(ReadFileTool::new(project_path_buf.clone()))
2347                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2348                .tool(WebFetchTool::new())
2349                // Prometheus discovery and connection tools for live K8s analysis
2350                .tool(PrometheusDiscoverTool::new())
2351                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2352                // RAG retrieval tools for compressed tool outputs
2353                .tool(RetrieveOutputTool::new())
2354                .tool(ListOutputsTool::new())
2355                        // Platform tools for project management
2356                        .tool(ListOrganizationsTool::new())
2357                        .tool(ListProjectsTool::new())
2358                        .tool(SelectProjectTool::new())
2359                        .tool(CurrentContextTool::new())
2360                        .tool(OpenProviderSettingsTool::new())
2361                        .tool(CheckProviderConnectionTool::new())
2362                        .tool(ListDeploymentCapabilitiesTool::new())
2363                        // Deployment tools for service management
2364                        .tool(CreateDeploymentConfigTool::new())
2365                        .tool(DeployServiceTool::new(project_path_buf.clone()))
2366                        .tool(ListDeploymentConfigsTool::new())
2367                        .tool(TriggerDeploymentTool::new())
2368                        .tool(GetDeploymentStatusTool::new())
2369                        .tool(ListDeploymentsTool::new())
2370                        .tool(GetServiceLogsTool::new());
2371
2372            // Add generation tools if this is a generation query
2373            if is_generation {
2374                builder = builder
2375                    .tool(WriteFileTool::new(project_path_buf.clone()))
2376                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2377                    .tool(ShellTool::new(project_path_buf.clone()));
2378            }
2379
2380            let agent = builder.build();
2381
2382            agent
2383                .prompt(query)
2384                .multi_turn(50)
2385                .await
2386                .map_err(|e| AgentError::ProviderError(e.to_string()))
2387        }
2388        ProviderType::Bedrock => {
2389            // Bedrock provider via rig-bedrock - same pattern as Anthropic
2390            let client = crate::bedrock::client::Client::from_env();
2391            let model_name = model
2392                .as_deref()
2393                .unwrap_or("global.anthropic.claude-sonnet-4-5-20250929-v1:0");
2394
2395            // Extended thinking for Claude via Bedrock
2396            let thinking_params = serde_json::json!({
2397                "thinking": {
2398                    "type": "enabled",
2399                    "budget_tokens": 16000
2400                }
2401            });
2402
2403            let mut builder = client
2404                .agent(model_name)
2405                .preamble(&preamble)
2406                .max_tokens(64000)  // Max output tokens for Claude Sonnet on Bedrock
2407                .tool(AnalyzeTool::new(project_path_buf.clone()))
2408                .tool(SecurityScanTool::new(project_path_buf.clone()))
2409                .tool(VulnerabilitiesTool::new(project_path_buf.clone()))
2410                .tool(HadolintTool::new(project_path_buf.clone()))
2411                .tool(DclintTool::new(project_path_buf.clone()))
2412                .tool(KubelintTool::new(project_path_buf.clone()))
2413                .tool(K8sOptimizeTool::new(project_path_buf.clone()))
2414                .tool(K8sCostsTool::new(project_path_buf.clone()))
2415                .tool(K8sDriftTool::new(project_path_buf.clone()))
2416                .tool(HelmlintTool::new(project_path_buf.clone()))
2417                .tool(TerraformFmtTool::new(project_path_buf.clone()))
2418                .tool(TerraformValidateTool::new(project_path_buf.clone()))
2419                .tool(TerraformInstallTool::new())
2420                .tool(ReadFileTool::new(project_path_buf.clone()))
2421                .tool(ListDirectoryTool::new(project_path_buf.clone()))
2422                .tool(WebFetchTool::new())
2423                // Prometheus discovery and connection tools for live K8s analysis
2424                .tool(PrometheusDiscoverTool::new())
2425                .tool(PrometheusConnectTool::new(bg_manager.clone()))
2426                // RAG retrieval tools for compressed tool outputs
2427                .tool(RetrieveOutputTool::new())
2428                .tool(ListOutputsTool::new())
2429                        // Platform tools for project management
2430                        .tool(ListOrganizationsTool::new())
2431                        .tool(ListProjectsTool::new())
2432                        .tool(SelectProjectTool::new())
2433                        .tool(CurrentContextTool::new())
2434                        .tool(OpenProviderSettingsTool::new())
2435                        .tool(CheckProviderConnectionTool::new())
2436                        .tool(ListDeploymentCapabilitiesTool::new())
2437                        // Deployment tools for service management
2438                        .tool(CreateDeploymentConfigTool::new())
2439                        .tool(DeployServiceTool::new(project_path_buf.clone()))
2440                        .tool(ListDeploymentConfigsTool::new())
2441                        .tool(TriggerDeploymentTool::new())
2442                        .tool(GetDeploymentStatusTool::new())
2443                        .tool(ListDeploymentsTool::new())
2444                        .tool(GetServiceLogsTool::new());
2445
2446            // Add generation tools if this is a generation query
2447            if is_generation {
2448                builder = builder
2449                    .tool(WriteFileTool::new(project_path_buf.clone()))
2450                    .tool(WriteFilesTool::new(project_path_buf.clone()))
2451                    .tool(ShellTool::new(project_path_buf.clone()));
2452            }
2453
2454            let agent = builder.additional_params(thinking_params).build();
2455
2456            agent
2457                .prompt(query)
2458                .multi_turn(50)
2459                .await
2460                .map_err(|e| AgentError::ProviderError(e.to_string()))
2461        }
2462    }
2463}