Skip to main content

agent_code_lib/query/
mod.rs

1//! Query engine: the core agent loop.
2//!
3//! Implements the agentic cycle:
4//!
5//! 1. Auto-compact if context nears the window limit
6//! 2. Microcompact stale tool results
7//! 3. Call LLM with streaming
8//! 4. Accumulate response content blocks
9//! 5. Handle errors (prompt-too-long, rate limits, max-output-tokens)
10//! 6. Extract tool_use blocks
11//! 7. Execute tools (concurrent/serial batching)
12//! 8. Inject tool results into history
13//! 9. Repeat from step 1 until no tool_use or max turns
14
15pub mod source;
16
17use std::path::PathBuf;
18use std::sync::Arc;
19
20use tokio_util::sync::CancellationToken;
21use tracing::{debug, info, warn};
22use uuid::Uuid;
23
24use crate::hooks::{HookEvent, HookRegistry};
25use crate::llm::message::*;
26use crate::llm::provider::{Provider, ProviderError, ProviderRequest};
27use crate::llm::stream::StreamEvent;
28use crate::permissions::PermissionChecker;
29use crate::services::compact::{self, CompactTracking, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT};
30use crate::services::tokens;
31use crate::state::AppState;
32use crate::tools::ToolContext;
33use crate::tools::executor::{execute_tool_calls, extract_tool_calls};
34use crate::tools::registry::ToolRegistry;
35
36/// Configuration for the query engine.
37pub struct QueryEngineConfig {
38    pub max_turns: Option<usize>,
39    pub verbose: bool,
40    /// Whether this is a non-interactive (one-shot) session.
41    pub unattended: bool,
42}
43
44/// The query engine orchestrates the agent loop.
45pub struct QueryEngine {
46    llm: Arc<dyn Provider>,
47    tools: ToolRegistry,
48    file_cache: Arc<tokio::sync::Mutex<crate::services::file_cache::FileCache>>,
49    permissions: Arc<PermissionChecker>,
50    state: AppState,
51    config: QueryEngineConfig,
52    cancel: CancellationToken,
53    hooks: HookRegistry,
54    cache_tracker: crate::services::cache_tracking::CacheTracker,
55    denial_tracker: Arc<tokio::sync::Mutex<crate::permissions::tracking::DenialTracker>>,
56    extraction_state: Arc<tokio::sync::Mutex<crate::memory::extraction::ExtractionState>>,
57    session_allows: Arc<tokio::sync::Mutex<std::collections::HashSet<String>>>,
58    permission_prompter: Option<Arc<dyn crate::tools::PermissionPrompter>>,
59    /// Cached system prompt (rebuilt only when inputs change).
60    cached_system_prompt: Option<(u64, String)>, // (hash, prompt)
61}
62
63/// Callback for streaming events to the UI.
64pub trait StreamSink: Send + Sync {
65    fn on_text(&self, text: &str);
66    fn on_tool_start(&self, tool_name: &str, input: &serde_json::Value);
67    fn on_tool_result(&self, tool_name: &str, result: &crate::tools::ToolResult);
68    fn on_thinking(&self, _text: &str) {}
69    fn on_turn_complete(&self, _turn: usize) {}
70    fn on_error(&self, error: &str);
71    fn on_usage(&self, _usage: &Usage) {}
72    fn on_compact(&self, _freed_tokens: u64) {}
73    fn on_warning(&self, _msg: &str) {}
74}
75
76/// A no-op stream sink for non-interactive mode.
77pub struct NullSink;
78impl StreamSink for NullSink {
79    fn on_text(&self, _: &str) {}
80    fn on_tool_start(&self, _: &str, _: &serde_json::Value) {}
81    fn on_tool_result(&self, _: &str, _: &crate::tools::ToolResult) {}
82    fn on_error(&self, _: &str) {}
83}
84
85impl QueryEngine {
86    pub fn new(
87        llm: Arc<dyn Provider>,
88        tools: ToolRegistry,
89        permissions: PermissionChecker,
90        state: AppState,
91        config: QueryEngineConfig,
92    ) -> Self {
93        Self {
94            llm,
95            tools,
96            file_cache: Arc::new(tokio::sync::Mutex::new(
97                crate::services::file_cache::FileCache::new(),
98            )),
99            permissions: Arc::new(permissions),
100            state,
101            config,
102            cancel: CancellationToken::new(),
103            hooks: HookRegistry::new(),
104            cache_tracker: crate::services::cache_tracking::CacheTracker::new(),
105            denial_tracker: Arc::new(tokio::sync::Mutex::new(
106                crate::permissions::tracking::DenialTracker::new(100),
107            )),
108            extraction_state: Arc::new(tokio::sync::Mutex::new(
109                crate::memory::extraction::ExtractionState::new(),
110            )),
111            session_allows: Arc::new(tokio::sync::Mutex::new(std::collections::HashSet::new())),
112            permission_prompter: None,
113            cached_system_prompt: None,
114        }
115    }
116
117    /// Load hooks from configuration into the registry.
118    pub fn load_hooks(&mut self, hook_defs: &[crate::hooks::HookDefinition]) {
119        for def in hook_defs {
120            self.hooks.register(def.clone());
121        }
122        if !hook_defs.is_empty() {
123            tracing::info!("Loaded {} hooks from config", hook_defs.len());
124        }
125    }
126
127    /// Get a reference to the app state.
128    pub fn state(&self) -> &AppState {
129        &self.state
130    }
131
132    /// Get a mutable reference to the app state.
133    pub fn state_mut(&mut self) -> &mut AppState {
134        &mut self.state
135    }
136
137    /// Install a Ctrl+C handler that triggers the cancellation token.
138    /// Call this once at startup. Subsequent Ctrl+C signals during a
139    /// turn will cancel the active operation instead of killing the process.
140    pub fn install_signal_handler(&self) {
141        let cancel = self.cancel.clone();
142        tokio::spawn(async move {
143            loop {
144                if tokio::signal::ctrl_c().await.is_ok() {
145                    if cancel.is_cancelled() {
146                        // Second Ctrl+C — hard exit.
147                        std::process::exit(130);
148                    }
149                    cancel.cancel();
150                }
151            }
152        });
153    }
154
155    /// Run a single turn: process user input through the full agent loop.
156    pub async fn run_turn(&mut self, user_input: &str) -> crate::error::Result<()> {
157        self.run_turn_with_sink(user_input, &NullSink).await
158    }
159
160    /// Run a turn with a stream sink for real-time UI updates.
161    pub async fn run_turn_with_sink(
162        &mut self,
163        user_input: &str,
164        sink: &dyn StreamSink,
165    ) -> crate::error::Result<()> {
166        // Reset cancellation token for this turn.
167        self.cancel = CancellationToken::new();
168
169        // Add the user message to history.
170        let user_msg = user_message(user_input);
171        self.state.push_message(user_msg);
172
173        let max_turns = self.config.max_turns.unwrap_or(50);
174        let mut compact_tracking = CompactTracking::default();
175        let mut retry_state = crate::llm::retry::RetryState::default();
176        let retry_config = crate::llm::retry::RetryConfig::default();
177        let mut max_output_recovery_count = 0u32;
178
179        // Agent loop: budget check → normalize → compact → call LLM → execute tools → repeat.
180        for turn in 0..max_turns {
181            self.state.turn_count = turn + 1;
182            self.state.is_query_active = true;
183
184            // Budget check before each turn.
185            let budget_config = crate::services::budget::BudgetConfig::default();
186            match crate::services::budget::check_budget(
187                self.state.total_cost_usd,
188                self.state.total_usage.total(),
189                &budget_config,
190            ) {
191                crate::services::budget::BudgetDecision::Stop { message } => {
192                    sink.on_warning(&message);
193                    self.state.is_query_active = false;
194                    return Ok(());
195                }
196                crate::services::budget::BudgetDecision::ContinueWithWarning {
197                    message, ..
198                } => {
199                    sink.on_warning(&message);
200                }
201                crate::services::budget::BudgetDecision::Continue => {}
202            }
203
204            // Normalize messages for API compatibility.
205            crate::llm::normalize::ensure_tool_result_pairing(&mut self.state.messages);
206            crate::llm::normalize::strip_empty_blocks(&mut self.state.messages);
207            crate::llm::normalize::remove_empty_messages(&mut self.state.messages);
208            crate::llm::normalize::cap_document_blocks(&mut self.state.messages, 500_000);
209            crate::llm::normalize::merge_consecutive_user_messages(&mut self.state.messages);
210
211            debug!("Agent turn {}/{}", turn + 1, max_turns);
212
213            let mut model = self.state.config.api.model.clone();
214
215            // Step 1: Auto-compact if context is too large.
216            if compact::should_auto_compact(self.state.history(), &model, &compact_tracking) {
217                let token_count = tokens::estimate_context_tokens(self.state.history());
218                let threshold = compact::auto_compact_threshold(&model);
219                info!("Auto-compact triggered: {token_count} tokens >= {threshold} threshold");
220
221                // Microcompact first: clear stale tool results.
222                let freed = compact::microcompact(&mut self.state.messages, 5);
223                if freed > 0 {
224                    sink.on_compact(freed);
225                    info!("Microcompact freed ~{freed} tokens");
226                }
227
228                // Check if microcompact was enough.
229                let post_mc_tokens = tokens::estimate_context_tokens(self.state.history());
230                if post_mc_tokens >= threshold {
231                    // Full LLM-based compaction: summarize older messages.
232                    info!("Microcompact insufficient, attempting LLM compaction");
233                    match compact::compact_with_llm(&mut self.state.messages, &*self.llm, &model)
234                        .await
235                    {
236                        Some(removed) => {
237                            info!("LLM compaction removed {removed} messages");
238                            compact_tracking.was_compacted = true;
239                            compact_tracking.consecutive_failures = 0;
240                        }
241                        None => {
242                            compact_tracking.consecutive_failures += 1;
243                            warn!(
244                                "LLM compaction failed (attempt {})",
245                                compact_tracking.consecutive_failures
246                            );
247                            // Fallback: context collapse (snip middle messages).
248                            let effective = compact::effective_context_window(&model);
249                            if let Some(collapse) =
250                                crate::services::context_collapse::collapse_to_budget(
251                                    self.state.history(),
252                                    effective,
253                                )
254                            {
255                                info!(
256                                    "Context collapse snipped {} messages, freed ~{} tokens",
257                                    collapse.snipped_count, collapse.tokens_freed
258                                );
259                                self.state.messages = collapse.api_messages;
260                                sink.on_compact(collapse.tokens_freed);
261                            } else {
262                                // Last resort: aggressive microcompact.
263                                let freed2 = compact::microcompact(&mut self.state.messages, 2);
264                                if freed2 > 0 {
265                                    sink.on_compact(freed2);
266                                }
267                            }
268                        }
269                    }
270                }
271            }
272
273            // Inject compaction reminder if compacted and feature enabled.
274            if compact_tracking.was_compacted && self.state.config.features.compaction_reminders {
275                let reminder = user_message(
276                    "<system-reminder>Context was automatically compacted. \
277                     Earlier messages were summarized. If you need details from \
278                     before compaction, ask the user or re-read the relevant files.</system-reminder>",
279                );
280                self.state.push_message(reminder);
281                compact_tracking.was_compacted = false; // Only remind once per compaction.
282            }
283
284            // Step 2: Check token warning state.
285            let warning = compact::token_warning_state(self.state.history(), &model);
286            if warning.is_blocking {
287                sink.on_warning("Context window nearly full. Consider starting a new session.");
288            } else if warning.is_above_warning {
289                sink.on_warning(&format!("Context {}% remaining", warning.percent_left));
290            }
291
292            // Step 3: Build and send the API request.
293            // Memoize: only rebuild system prompt when inputs change.
294            let prompt_hash = {
295                use std::hash::{Hash, Hasher};
296                let mut h = std::collections::hash_map::DefaultHasher::new();
297                self.state.config.api.model.hash(&mut h);
298                self.state.cwd.hash(&mut h);
299                self.state.config.mcp_servers.len().hash(&mut h);
300                self.tools.all().len().hash(&mut h);
301                h.finish()
302            };
303            let system_prompt = if let Some((cached_hash, ref cached)) = self.cached_system_prompt
304                && cached_hash == prompt_hash
305            {
306                cached.clone()
307            } else {
308                let prompt = build_system_prompt(&self.tools, &self.state);
309                self.cached_system_prompt = Some((prompt_hash, prompt.clone()));
310                prompt
311            };
312            // Use core schemas (deferred tools loaded on demand via ToolSearch).
313            let tool_schemas = self.tools.core_schemas();
314
315            // Escalate max_tokens after a max_output recovery (8k → 64k).
316            let base_tokens = self.state.config.api.max_output_tokens.unwrap_or(16384);
317            let effective_tokens = if max_output_recovery_count > 0 {
318                base_tokens.max(65536) // Escalate to at least 64k after first recovery
319            } else {
320                base_tokens
321            };
322
323            let request = ProviderRequest {
324                messages: self.state.history().to_vec(),
325                system_prompt: system_prompt.clone(),
326                tools: tool_schemas.clone(),
327                model: model.clone(),
328                max_tokens: effective_tokens,
329                temperature: None,
330                enable_caching: true,
331                tool_choice: Default::default(),
332                metadata: None,
333            };
334
335            let mut rx = match self.llm.stream(&request).await {
336                Ok(rx) => {
337                    retry_state.reset();
338                    rx
339                }
340                Err(e) => {
341                    let retryable = match &e {
342                        ProviderError::RateLimited { retry_after_ms } => {
343                            crate::llm::retry::RetryableError::RateLimited {
344                                retry_after: *retry_after_ms,
345                            }
346                        }
347                        ProviderError::Overloaded => crate::llm::retry::RetryableError::Overloaded,
348                        ProviderError::Network(_) => {
349                            crate::llm::retry::RetryableError::StreamInterrupted
350                        }
351                        other => crate::llm::retry::RetryableError::NonRetryable(other.to_string()),
352                    };
353
354                    match retry_state.next_action(&retryable, &retry_config) {
355                        crate::llm::retry::RetryAction::Retry { after } => {
356                            warn!("Retrying in {}ms", after.as_millis());
357                            tokio::time::sleep(after).await;
358                            continue;
359                        }
360                        crate::llm::retry::RetryAction::FallbackModel => {
361                            // Switch to a smaller/cheaper model for this turn.
362                            let fallback = get_fallback_model(&model);
363                            sink.on_warning(&format!("Falling back from {model} to {fallback}"));
364                            model = fallback;
365                            continue;
366                        }
367                        crate::llm::retry::RetryAction::Abort(reason) => {
368                            // Unattended retry: in non-interactive mode, retry
369                            // capacity errors with longer backoff instead of aborting.
370                            if self.config.unattended
371                                && self.state.config.features.unattended_retry
372                                && matches!(
373                                    &e,
374                                    ProviderError::Overloaded | ProviderError::RateLimited { .. }
375                                )
376                            {
377                                warn!("Unattended retry: waiting 30s for capacity");
378                                tokio::time::sleep(std::time::Duration::from_secs(30)).await;
379                                continue;
380                            }
381                            // Before giving up, try reactive compact for size errors.
382                            // Two-stage recovery: context collapse first, then microcompact.
383                            if let ProviderError::RequestTooLarge(body) = &e {
384                                let gap = compact::parse_prompt_too_long_gap(body);
385
386                                // Stage 1: Context collapse (snip middle messages).
387                                let effective = compact::effective_context_window(&model);
388                                if let Some(collapse) =
389                                    crate::services::context_collapse::collapse_to_budget(
390                                        self.state.history(),
391                                        effective,
392                                    )
393                                {
394                                    info!(
395                                        "Reactive collapse: snipped {} messages, freed ~{} tokens",
396                                        collapse.snipped_count, collapse.tokens_freed
397                                    );
398                                    self.state.messages = collapse.api_messages;
399                                    sink.on_compact(collapse.tokens_freed);
400                                    continue;
401                                }
402
403                                // Stage 2: Aggressive microcompact.
404                                let freed = compact::microcompact(&mut self.state.messages, 1);
405                                if freed > 0 {
406                                    sink.on_compact(freed);
407                                    info!(
408                                        "Reactive microcompact freed ~{freed} tokens (gap: {gap:?})"
409                                    );
410                                    continue;
411                                }
412                            }
413                            sink.on_error(&reason);
414                            self.state.is_query_active = false;
415                            return Err(crate::error::Error::Other(e.to_string()));
416                        }
417                    }
418                }
419            };
420
421            // Step 4: Stream response. Start executing read-only tools
422            // as their input completes (streaming tool execution).
423            let mut content_blocks = Vec::new();
424            let mut usage = Usage::default();
425            let mut stop_reason: Option<StopReason> = None;
426            let mut got_error = false;
427            let mut error_text = String::new();
428
429            // Streaming tool handles: tools kicked off during streaming.
430            let mut streaming_tool_handles: Vec<(
431                String,
432                String,
433                tokio::task::JoinHandle<crate::tools::ToolResult>,
434            )> = Vec::new();
435
436            while let Some(event) = rx.recv().await {
437                match event {
438                    StreamEvent::TextDelta(text) => {
439                        sink.on_text(&text);
440                    }
441                    StreamEvent::ContentBlockComplete(block) => {
442                        if let ContentBlock::ToolUse {
443                            ref id,
444                            ref name,
445                            ref input,
446                        } = block
447                        {
448                            sink.on_tool_start(name, input);
449
450                            // Start read-only tools immediately during streaming.
451                            if let Some(tool) = self.tools.get(name)
452                                && tool.is_read_only()
453                                && tool.is_concurrency_safe()
454                            {
455                                let tool = tool.clone();
456                                let input = input.clone();
457                                let cwd = std::path::PathBuf::from(&self.state.cwd);
458                                let cancel = self.cancel.clone();
459                                let perm = self.permissions.clone();
460                                let tool_id = id.clone();
461                                let tool_name = name.clone();
462
463                                let handle = tokio::spawn(async move {
464                                    match tool
465                                        .call(
466                                            input,
467                                            &ToolContext {
468                                                cwd,
469                                                cancel,
470                                                permission_checker: perm.clone(),
471                                                verbose: false,
472                                                plan_mode: false,
473                                                file_cache: None,
474                                                denial_tracker: None,
475                                                task_manager: None,
476                                                session_allows: None,
477                                                permission_prompter: None,
478                                            },
479                                        )
480                                        .await
481                                    {
482                                        Ok(r) => r,
483                                        Err(e) => crate::tools::ToolResult::error(e.to_string()),
484                                    }
485                                });
486
487                                streaming_tool_handles.push((tool_id, tool_name, handle));
488                            }
489                        }
490                        if let ContentBlock::Thinking { ref thinking, .. } = block {
491                            sink.on_thinking(thinking);
492                        }
493                        content_blocks.push(block);
494                    }
495                    StreamEvent::Done {
496                        usage: u,
497                        stop_reason: sr,
498                    } => {
499                        usage = u;
500                        stop_reason = sr;
501                        sink.on_usage(&usage);
502                    }
503                    StreamEvent::Error(msg) => {
504                        got_error = true;
505                        error_text = msg.clone();
506                        sink.on_error(&msg);
507                    }
508                    _ => {}
509                }
510            }
511
512            // Step 5: Record the assistant message.
513            let assistant_msg = Message::Assistant(AssistantMessage {
514                uuid: Uuid::new_v4(),
515                timestamp: chrono::Utc::now().to_rfc3339(),
516                content: content_blocks.clone(),
517                model: Some(model.clone()),
518                usage: Some(usage.clone()),
519                stop_reason: stop_reason.clone(),
520                request_id: None,
521            });
522            self.state.push_message(assistant_msg);
523            self.state.record_usage(&usage, &model);
524
525            // Token budget tracking per turn.
526            if self.state.config.features.token_budget && usage.total() > 0 {
527                let turn_total = usage.input_tokens + usage.output_tokens;
528                if turn_total > 100_000 {
529                    sink.on_warning(&format!(
530                        "High token usage this turn: {} tokens ({}in + {}out)",
531                        turn_total, usage.input_tokens, usage.output_tokens
532                    ));
533                }
534            }
535
536            // Record cache and telemetry.
537            let _cache_event = self.cache_tracker.record(&usage);
538            {
539                let mut span = crate::services::telemetry::api_call_span(
540                    &model,
541                    turn + 1,
542                    &self.state.session_id,
543                );
544                crate::services::telemetry::record_usage(&mut span, &usage);
545                span.finish();
546                tracing::debug!(
547                    "API call: {}ms, {}in/{}out tokens",
548                    span.duration_ms().unwrap_or(0),
549                    usage.input_tokens,
550                    usage.output_tokens,
551                );
552            }
553
554            // Step 6: Handle stream errors.
555            if got_error {
556                // Check if it's a prompt-too-long error in the stream.
557                if error_text.contains("prompt is too long")
558                    || error_text.contains("Prompt is too long")
559                {
560                    let freed = compact::microcompact(&mut self.state.messages, 1);
561                    if freed > 0 {
562                        sink.on_compact(freed);
563                        continue;
564                    }
565                }
566
567                // Check for max-output-tokens hit (partial response).
568                if content_blocks
569                    .iter()
570                    .any(|b| matches!(b, ContentBlock::Text { .. }))
571                    && error_text.contains("max_tokens")
572                    && max_output_recovery_count < MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
573                {
574                    max_output_recovery_count += 1;
575                    info!(
576                        "Max output tokens recovery attempt {}/{}",
577                        max_output_recovery_count, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
578                    );
579                    let recovery_msg = compact::max_output_recovery_message();
580                    self.state.push_message(recovery_msg);
581                    continue;
582                }
583            }
584
585            // Step 6b: Handle max_tokens stop reason (escalate and continue).
586            if matches!(stop_reason, Some(StopReason::MaxTokens))
587                && !got_error
588                && content_blocks
589                    .iter()
590                    .any(|b| matches!(b, ContentBlock::Text { .. }))
591                && max_output_recovery_count < MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
592            {
593                max_output_recovery_count += 1;
594                info!(
595                    "Max tokens stop reason — recovery attempt {}/{}",
596                    max_output_recovery_count, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
597                );
598                let recovery_msg = compact::max_output_recovery_message();
599                self.state.push_message(recovery_msg);
600                continue;
601            }
602
603            // Step 7: Extract tool calls from the response.
604            let tool_calls = extract_tool_calls(&content_blocks);
605
606            if tool_calls.is_empty() {
607                // No tools requested — turn is complete.
608                info!("Turn complete (no tool calls)");
609                sink.on_turn_complete(turn + 1);
610                self.state.is_query_active = false;
611
612                // Fire background memory extraction (fire-and-forget).
613                // Only runs if feature enabled and memory directory exists.
614                if self.state.config.features.extract_memories
615                    && crate::memory::ensure_memory_dir().is_some()
616                {
617                    let extraction_messages = self.state.messages.clone();
618                    let extraction_state = self.extraction_state.clone();
619                    let extraction_llm = self.llm.clone();
620                    let extraction_model = model.clone();
621                    tokio::spawn(async move {
622                        crate::memory::extraction::extract_memories_background(
623                            extraction_messages,
624                            extraction_state,
625                            extraction_llm,
626                            extraction_model,
627                        )
628                        .await;
629                    });
630                }
631
632                return Ok(());
633            }
634
635            // Step 8: Execute tool calls with pre/post hooks.
636            info!("Executing {} tool call(s)", tool_calls.len());
637            let cwd = PathBuf::from(&self.state.cwd);
638            let tool_ctx = ToolContext {
639                cwd,
640                cancel: self.cancel.clone(),
641                permission_checker: self.permissions.clone(),
642                verbose: self.config.verbose,
643                plan_mode: self.state.plan_mode,
644                file_cache: Some(self.file_cache.clone()),
645                denial_tracker: Some(self.denial_tracker.clone()),
646                task_manager: Some(self.state.task_manager.clone()),
647                session_allows: Some(self.session_allows.clone()),
648                permission_prompter: self.permission_prompter.clone(),
649            };
650
651            // Collect streaming tool results first.
652            let streaming_ids: std::collections::HashSet<String> = streaming_tool_handles
653                .iter()
654                .map(|(id, _, _)| id.clone())
655                .collect();
656
657            let mut streaming_results = Vec::new();
658            for (id, name, handle) in streaming_tool_handles.drain(..) {
659                match handle.await {
660                    Ok(result) => streaming_results.push(crate::tools::executor::ToolCallResult {
661                        tool_use_id: id,
662                        tool_name: name,
663                        result,
664                    }),
665                    Err(e) => streaming_results.push(crate::tools::executor::ToolCallResult {
666                        tool_use_id: id,
667                        tool_name: name,
668                        result: crate::tools::ToolResult::error(format!("Task failed: {e}")),
669                    }),
670                }
671            }
672
673            // Fire pre-tool-use hooks.
674            for call in &tool_calls {
675                self.hooks
676                    .run_hooks(&HookEvent::PreToolUse, Some(&call.name), &call.input)
677                    .await;
678            }
679
680            // Execute remaining tools (ones not started during streaming).
681            let remaining_calls: Vec<_> = tool_calls
682                .iter()
683                .filter(|c| !streaming_ids.contains(&c.id))
684                .cloned()
685                .collect();
686
687            let mut results = streaming_results;
688            if !remaining_calls.is_empty() {
689                let batch_results = execute_tool_calls(
690                    &remaining_calls,
691                    self.tools.all(),
692                    &tool_ctx,
693                    &self.permissions,
694                )
695                .await;
696                results.extend(batch_results);
697            }
698
699            // Step 9: Inject tool results + fire post-tool-use hooks.
700            for result in &results {
701                // Handle plan mode state transitions.
702                if !result.result.is_error {
703                    match result.tool_name.as_str() {
704                        "EnterPlanMode" => {
705                            self.state.plan_mode = true;
706                            info!("Plan mode enabled");
707                        }
708                        "ExitPlanMode" => {
709                            self.state.plan_mode = false;
710                            info!("Plan mode disabled");
711                        }
712                        _ => {}
713                    }
714                }
715
716                sink.on_tool_result(&result.tool_name, &result.result);
717
718                // Fire post-tool-use hooks.
719                self.hooks
720                    .run_hooks(
721                        &HookEvent::PostToolUse,
722                        Some(&result.tool_name),
723                        &serde_json::json!({
724                            "tool": result.tool_name,
725                            "is_error": result.result.is_error,
726                        }),
727                    )
728                    .await;
729
730                let msg = tool_result_message(
731                    &result.tool_use_id,
732                    &result.result.content,
733                    result.result.is_error,
734                );
735                self.state.push_message(msg);
736            }
737
738            // Continue the loop — the model will see the tool results.
739        }
740
741        warn!("Max turns ({max_turns}) reached");
742        sink.on_warning(&format!("Agent stopped after {max_turns} turns"));
743        self.state.is_query_active = false;
744        Ok(())
745    }
746
747    /// Cancel the current operation.
748    pub fn cancel(&self) {
749        self.cancel.cancel();
750    }
751
752    /// Get a cloneable cancel token for use in background tasks.
753    pub fn cancel_token(&self) -> tokio_util::sync::CancellationToken {
754        self.cancel.clone()
755    }
756}
757
758/// Get a fallback model (smaller/cheaper) for retry on overload.
759fn get_fallback_model(current: &str) -> String {
760    let lower = current.to_lowercase();
761    if lower.contains("opus") {
762        // Opus → Sonnet
763        current.replace("opus", "sonnet")
764    } else if (lower.contains("gpt-5.4") || lower.contains("gpt-4.1"))
765        && !lower.contains("mini")
766        && !lower.contains("nano")
767    {
768        format!("{current}-mini")
769    } else if lower.contains("large") {
770        current.replace("large", "small")
771    } else {
772        // Already a small model, keep it.
773        current.to_string()
774    }
775}
776
777/// Build the system prompt from tool definitions, app state, and memory.
778pub fn build_system_prompt(tools: &ToolRegistry, state: &AppState) -> String {
779    let mut prompt = String::new();
780
781    prompt.push_str(
782        "You are an AI coding agent. You help users with software engineering tasks \
783         by reading, writing, and searching code. Use the tools available to you to \
784         accomplish tasks.\n\n",
785    );
786
787    // Environment context.
788    let shell = std::env::var("SHELL").unwrap_or_else(|_| "bash".to_string());
789    let is_git = std::path::Path::new(&state.cwd).join(".git").exists();
790    prompt.push_str(&format!(
791        "# Environment\n\
792         - Working directory: {}\n\
793         - Platform: {}\n\
794         - Shell: {shell}\n\
795         - Git repository: {}\n\n",
796        state.cwd,
797        std::env::consts::OS,
798        if is_git { "yes" } else { "no" },
799    ));
800
801    // Inject memory context (project + user + on-demand relevant).
802    let mut memory = crate::memory::MemoryContext::load(Some(std::path::Path::new(&state.cwd)));
803
804    // On-demand: surface relevant memories based on recent conversation.
805    let recent_text: String = state
806        .messages
807        .iter()
808        .rev()
809        .take(5)
810        .filter_map(|m| match m {
811            crate::llm::message::Message::User(u) => Some(
812                u.content
813                    .iter()
814                    .filter_map(|b| b.as_text())
815                    .collect::<Vec<_>>()
816                    .join(" "),
817            ),
818            _ => None,
819        })
820        .collect::<Vec<_>>()
821        .join(" ");
822
823    if !recent_text.is_empty() {
824        memory.load_relevant(&recent_text);
825    }
826
827    let memory_section = memory.to_system_prompt_section();
828    if !memory_section.is_empty() {
829        prompt.push_str(&memory_section);
830    }
831
832    // Tool documentation.
833    prompt.push_str("# Available Tools\n\n");
834    for tool in tools.all() {
835        if tool.is_enabled() {
836            prompt.push_str(&format!("## {}\n{}\n\n", tool.name(), tool.prompt()));
837        }
838    }
839
840    // Available skills.
841    let skills = crate::skills::SkillRegistry::load_all(Some(std::path::Path::new(&state.cwd)));
842    let invocable = skills.user_invocable();
843    if !invocable.is_empty() {
844        prompt.push_str("# Available Skills\n\n");
845        for skill in invocable {
846            let desc = skill.metadata.description.as_deref().unwrap_or("");
847            let when = skill.metadata.when_to_use.as_deref().unwrap_or("");
848            prompt.push_str(&format!("- `/{}`", skill.name));
849            if !desc.is_empty() {
850                prompt.push_str(&format!(": {desc}"));
851            }
852            if !when.is_empty() {
853                prompt.push_str(&format!(" (use when: {when})"));
854            }
855            prompt.push('\n');
856        }
857        prompt.push('\n');
858    }
859
860    // Guidelines and safety framework.
861    prompt.push_str(
862        "# Using tools\n\n\
863         Use dedicated tools instead of shell commands when available:\n\
864         - File search: Glob (not find or ls)\n\
865         - Content search: Grep (not grep or rg)\n\
866         - Read files: FileRead (not cat/head/tail)\n\
867         - Edit files: FileEdit (not sed/awk)\n\
868         - Write files: FileWrite (not echo/cat with redirect)\n\
869         - Reserve Bash for system commands and operations that require shell execution.\n\
870         - Break complex tasks into steps. Use multiple tool calls in parallel when independent.\n\
871         - Use the Agent tool for complex multi-step research or tasks that benefit from isolation.\n\n\
872         # Working with code\n\n\
873         - Read files before editing them. Understand existing code before suggesting changes.\n\
874         - Prefer editing existing files over creating new ones to avoid file bloat.\n\
875         - Only make changes that were requested. Don't add features, refactor, add comments, \
876           or make \"improvements\" beyond the ask.\n\
877         - Don't add error handling for scenarios that can't happen. Don't design for \
878           hypothetical future requirements.\n\
879         - When referencing code, include file_path:line_number.\n\
880         - Be careful not to introduce security vulnerabilities (command injection, XSS, SQL injection, \
881           OWASP top 10). If you notice insecure code you wrote, fix it immediately.\n\
882         - Don't add docstrings, comments, or type annotations to code you didn't change.\n\
883         - Three similar lines of code is better than a premature abstraction.\n\n\
884         # Git safety protocol\n\n\
885         - NEVER update the git config.\n\
886         - NEVER run destructive git commands (push --force, reset --hard, checkout ., restore ., \
887           clean -f, branch -D) unless the user explicitly requests them.\n\
888         - NEVER skip hooks (--no-verify, --no-gpg-sign) unless the user explicitly requests it.\n\
889         - NEVER force push to main/master. Warn the user if they request it.\n\
890         - Always create NEW commits rather than amending, unless the user explicitly requests amend. \
891           After hook failure, the commit did NOT happen — amend would modify the PREVIOUS commit.\n\
892         - When staging files, prefer adding specific files by name rather than git add -A or git add ., \
893           which can accidentally include sensitive files.\n\
894         - NEVER commit changes unless the user explicitly asks.\n\n\
895         # Committing changes\n\n\
896         When the user asks to commit:\n\
897         1. Run git status and git diff to see all changes.\n\
898         2. Run git log --oneline -5 to match the repository's commit message style.\n\
899         3. Draft a concise (1-2 sentence) commit message focusing on \"why\" not \"what\".\n\
900         4. Do not commit files that likely contain secrets (.env, credentials.json).\n\
901         5. Stage specific files, create the commit.\n\
902         6. If pre-commit hook fails, fix the issue and create a NEW commit.\n\
903         7. When creating commits, include a co-author attribution line at the end of the message.\n\n\
904         # Creating pull requests\n\n\
905         When the user asks to create a PR:\n\
906         1. Run git status, git diff, and git log to understand all changes on the branch.\n\
907         2. Analyze ALL commits (not just the latest) that will be in the PR.\n\
908         3. Draft a short title (under 70 chars) and detailed body with summary and test plan.\n\
909         4. Push to remote with -u flag if needed, then create PR using gh pr create.\n\
910         5. Return the PR URL when done.\n\n\
911         # Executing actions safely\n\n\
912         Consider the reversibility and blast radius of every action:\n\
913         - Freely take local, reversible actions (editing files, running tests).\n\
914         - For hard-to-reverse or shared-state actions, confirm with the user first:\n\
915           - Destructive: deleting files/branches, dropping tables, rm -rf, overwriting uncommitted changes.\n\
916           - Hard to reverse: force-pushing, git reset --hard, amending published commits.\n\
917           - Visible to others: pushing code, creating/commenting on PRs/issues, sending messages.\n\
918         - When you encounter an obstacle, do not use destructive actions as a shortcut. \
919           Identify root causes and fix underlying issues.\n\
920         - If you discover unexpected state (unfamiliar files, branches, config), investigate \
921           before deleting or overwriting — it may be the user's in-progress work.\n\n\
922         # Response style\n\n\
923         - Be concise. Lead with the answer or action, not the reasoning.\n\
924         - Skip filler, preamble, and unnecessary transitions.\n\
925         - Don't restate what the user said.\n\
926         - If you can say it in one sentence, don't use three.\n\
927         - Focus output on: decisions that need input, status updates, and errors that change the plan.\n\
928         - When referencing GitHub issues or PRs, use owner/repo#123 format.\n\
929         - Only use emojis if the user explicitly requests it.\n\n\
930         # Memory\n\n\
931         You can save information across sessions by writing memory files.\n\
932         - Save to: ~/.config/agent-code/memory/ (one .md file per topic)\n\
933         - Each file needs YAML frontmatter: name, description, type (user/feedback/project/reference)\n\
934         - After writing a file, update MEMORY.md with a one-line pointer\n\
935         - Memory types: user (role, preferences), feedback (corrections, confirmations), \
936           project (decisions, deadlines), reference (external resources)\n\
937         - Do NOT store: code patterns, git history, debugging solutions, anything derivable from code\n\
938         - Memory is a hint — always verify against current state before acting on it\n",
939    );
940
941    // Detailed tool usage examples and workflow patterns.
942    prompt.push_str(
943        "# Tool usage patterns\n\n\
944         Common patterns for effective tool use:\n\n\
945         **Read before edit**: Always read a file before editing it. This ensures you \
946         understand the current state and can make targeted changes.\n\
947         ```\n\
948         1. FileRead file_path → understand structure\n\
949         2. FileEdit old_string, new_string → targeted change\n\
950         ```\n\n\
951         **Search then act**: Use Glob to find files, Grep to find content, then read/edit.\n\
952         ```\n\
953         1. Glob **/*.rs → find Rust files\n\
954         2. Grep pattern path → find specific code\n\
955         3. FileRead → read the match\n\
956         4. FileEdit → make the change\n\
957         ```\n\n\
958         **Parallel tool calls**: When you need to read multiple independent files or run \
959         independent searches, make all the tool calls in one response. Don't serialize \
960         independent operations.\n\n\
961         **Test after change**: After editing code, run tests to verify the change works.\n\
962         ```\n\
963         1. FileEdit → make change\n\
964         2. Bash cargo test / pytest / npm test → verify\n\
965         3. If tests fail, read the error, fix, re-test\n\
966         ```\n\n\
967         # Error recovery\n\n\
968         When something goes wrong:\n\
969         - **Tool not found**: Use ToolSearch to find the right tool name.\n\
970         - **Permission denied**: Explain why the action is needed, ask the user to approve.\n\
971         - **File not found**: Use Glob to find the correct path. Check for typos.\n\
972         - **Edit failed (not unique)**: Provide more surrounding context in old_string, \
973           or use replace_all=true if renaming.\n\
974         - **Command failed**: Read the full error message. Don't retry the same command. \
975           Diagnose the root cause first.\n\
976         - **Context too large**: The system will auto-compact. If you need specific \
977           information from before compaction, re-read the relevant files.\n\
978         - **Rate limited**: The system will auto-retry with backoff. Just wait.\n\n\
979         # Common workflows\n\n\
980         **Bug fix**: Read the failing test → read the source code it tests → \
981         identify the bug → fix it → run the test → confirm it passes.\n\n\
982         **New feature**: Read existing patterns in the codebase → create or edit files → \
983         add tests → run tests → update docs if needed.\n\n\
984         **Code review**: Read the diff → identify issues (bugs, security, style) → \
985         report findings with file:line references.\n\n\
986         **Refactor**: Search for all usages of the symbol → plan the changes → \
987         edit each file → run tests to verify nothing broke.\n\n",
988    );
989
990    // MCP server instructions (dynamic, per-server).
991    if !state.config.mcp_servers.is_empty() {
992        prompt.push_str("# MCP Servers\n\n");
993        prompt.push_str(
994            "Connected MCP servers provide additional tools. MCP tools are prefixed \
995             with `mcp__{server}__{tool}`. Use them like any other tool.\n\n",
996        );
997        for (name, entry) in &state.config.mcp_servers {
998            let transport = if entry.command.is_some() {
999                "stdio"
1000            } else if entry.url.is_some() {
1001                "sse"
1002            } else {
1003                "unknown"
1004            };
1005            prompt.push_str(&format!("- **{name}** ({transport})\n"));
1006        }
1007        prompt.push('\n');
1008    }
1009
1010    // Deferred tools listing.
1011    let deferred = tools.deferred_names();
1012    if !deferred.is_empty() {
1013        prompt.push_str("# Deferred Tools\n\n");
1014        prompt.push_str(
1015            "These tools are available but not loaded by default. \
1016             Use ToolSearch to load them when needed:\n",
1017        );
1018        for name in &deferred {
1019            prompt.push_str(&format!("- {name}\n"));
1020        }
1021        prompt.push('\n');
1022    }
1023
1024    // Task management guidance.
1025    prompt.push_str(
1026        "# Task management\n\n\
1027         - Use TaskCreate to break complex work into trackable steps.\n\
1028         - Mark tasks as in_progress when starting, completed when done.\n\
1029         - Use the Agent tool to spawn subagents for parallel independent work.\n\
1030         - Use EnterPlanMode/ExitPlanMode for read-only exploration before making changes.\n\
1031         - Use EnterWorktree/ExitWorktree for isolated changes in git worktrees.\n\n\
1032         # Output formatting\n\n\
1033         - All text output is displayed to the user. Use GitHub-flavored markdown.\n\
1034         - Use fenced code blocks with language hints for code: ```rust, ```python, etc.\n\
1035         - Use inline `code` for file names, function names, and short code references.\n\
1036         - Use tables for structured comparisons.\n\
1037         - Use bullet lists for multiple items.\n\
1038         - Keep paragraphs short (2-3 sentences).\n\
1039         - Never output raw HTML or complex formatting — stick to standard markdown.\n",
1040    );
1041
1042    prompt
1043}