Skip to main content

agent_code_lib/query/
mod.rs

1//! Query engine: the core agent loop.
2//!
3//! Implements the agentic cycle:
4//!
5//! 1. Auto-compact if context nears the window limit
6//! 2. Microcompact stale tool results
7//! 3. Call LLM with streaming
8//! 4. Accumulate response content blocks
9//! 5. Handle errors (prompt-too-long, rate limits, max-output-tokens)
10//! 6. Extract tool_use blocks
11//! 7. Execute tools (concurrent/serial batching)
12//! 8. Inject tool results into history
13//! 9. Repeat from step 1 until no tool_use or max turns
14
15pub mod source;
16
17use std::path::PathBuf;
18use std::sync::Arc;
19
20use tokio_util::sync::CancellationToken;
21use tracing::{debug, info, warn};
22use uuid::Uuid;
23
24use crate::hooks::{HookEvent, HookRegistry};
25use crate::llm::message::*;
26use crate::llm::provider::{Provider, ProviderError, ProviderRequest};
27use crate::llm::stream::StreamEvent;
28use crate::permissions::PermissionChecker;
29use crate::services::compact::{self, CompactTracking, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT};
30use crate::services::tokens;
31use crate::state::AppState;
32use crate::tools::ToolContext;
33use crate::tools::executor::{execute_tool_calls, extract_tool_calls};
34use crate::tools::registry::ToolRegistry;
35
36/// Configuration for the query engine.
37pub struct QueryEngineConfig {
38    pub max_turns: Option<usize>,
39    pub verbose: bool,
40    /// Whether this is a non-interactive (one-shot) session.
41    pub unattended: bool,
42}
43
44/// The query engine orchestrates the agent loop.
45pub struct QueryEngine {
46    llm: Arc<dyn Provider>,
47    tools: ToolRegistry,
48    file_cache: Arc<tokio::sync::Mutex<crate::services::file_cache::FileCache>>,
49    permissions: Arc<PermissionChecker>,
50    state: AppState,
51    config: QueryEngineConfig,
52    cancel: CancellationToken,
53    hooks: HookRegistry,
54    cache_tracker: crate::services::cache_tracking::CacheTracker,
55    denial_tracker: Arc<tokio::sync::Mutex<crate::permissions::tracking::DenialTracker>>,
56    extraction_state: Arc<tokio::sync::Mutex<crate::memory::extraction::ExtractionState>>,
57    session_allows: Arc<tokio::sync::Mutex<std::collections::HashSet<String>>>,
58    permission_prompter: Option<Arc<dyn crate::tools::PermissionPrompter>>,
59}
60
61/// Callback for streaming events to the UI.
62pub trait StreamSink: Send + Sync {
63    fn on_text(&self, text: &str);
64    fn on_tool_start(&self, tool_name: &str, input: &serde_json::Value);
65    fn on_tool_result(&self, tool_name: &str, result: &crate::tools::ToolResult);
66    fn on_thinking(&self, _text: &str) {}
67    fn on_turn_complete(&self, _turn: usize) {}
68    fn on_error(&self, error: &str);
69    fn on_usage(&self, _usage: &Usage) {}
70    fn on_compact(&self, _freed_tokens: u64) {}
71    fn on_warning(&self, _msg: &str) {}
72}
73
74/// A no-op stream sink for non-interactive mode.
75pub struct NullSink;
76impl StreamSink for NullSink {
77    fn on_text(&self, _: &str) {}
78    fn on_tool_start(&self, _: &str, _: &serde_json::Value) {}
79    fn on_tool_result(&self, _: &str, _: &crate::tools::ToolResult) {}
80    fn on_error(&self, _: &str) {}
81}
82
83impl QueryEngine {
84    pub fn new(
85        llm: Arc<dyn Provider>,
86        tools: ToolRegistry,
87        permissions: PermissionChecker,
88        state: AppState,
89        config: QueryEngineConfig,
90    ) -> Self {
91        Self {
92            llm,
93            tools,
94            file_cache: Arc::new(tokio::sync::Mutex::new(
95                crate::services::file_cache::FileCache::new(),
96            )),
97            permissions: Arc::new(permissions),
98            state,
99            config,
100            cancel: CancellationToken::new(),
101            hooks: HookRegistry::new(),
102            cache_tracker: crate::services::cache_tracking::CacheTracker::new(),
103            denial_tracker: Arc::new(tokio::sync::Mutex::new(
104                crate::permissions::tracking::DenialTracker::new(100),
105            )),
106            extraction_state: Arc::new(tokio::sync::Mutex::new(
107                crate::memory::extraction::ExtractionState::new(),
108            )),
109            session_allows: Arc::new(tokio::sync::Mutex::new(std::collections::HashSet::new())),
110            permission_prompter: None,
111        }
112    }
113
114    /// Load hooks from configuration into the registry.
115    pub fn load_hooks(&mut self, hook_defs: &[crate::hooks::HookDefinition]) {
116        for def in hook_defs {
117            self.hooks.register(def.clone());
118        }
119        if !hook_defs.is_empty() {
120            tracing::info!("Loaded {} hooks from config", hook_defs.len());
121        }
122    }
123
124    /// Get a reference to the app state.
125    pub fn state(&self) -> &AppState {
126        &self.state
127    }
128
129    /// Get a mutable reference to the app state.
130    pub fn state_mut(&mut self) -> &mut AppState {
131        &mut self.state
132    }
133
134    /// Install a Ctrl+C handler that triggers the cancellation token.
135    /// Call this once at startup. Subsequent Ctrl+C signals during a
136    /// turn will cancel the active operation instead of killing the process.
137    pub fn install_signal_handler(&self) {
138        let cancel = self.cancel.clone();
139        tokio::spawn(async move {
140            loop {
141                if tokio::signal::ctrl_c().await.is_ok() {
142                    if cancel.is_cancelled() {
143                        // Second Ctrl+C — hard exit.
144                        std::process::exit(130);
145                    }
146                    cancel.cancel();
147                }
148            }
149        });
150    }
151
152    /// Run a single turn: process user input through the full agent loop.
153    pub async fn run_turn(&mut self, user_input: &str) -> crate::error::Result<()> {
154        self.run_turn_with_sink(user_input, &NullSink).await
155    }
156
157    /// Run a turn with a stream sink for real-time UI updates.
158    pub async fn run_turn_with_sink(
159        &mut self,
160        user_input: &str,
161        sink: &dyn StreamSink,
162    ) -> crate::error::Result<()> {
163        // Reset cancellation token for this turn.
164        self.cancel = CancellationToken::new();
165
166        // Add the user message to history.
167        let user_msg = user_message(user_input);
168        self.state.push_message(user_msg);
169
170        let max_turns = self.config.max_turns.unwrap_or(50);
171        let mut compact_tracking = CompactTracking::default();
172        let mut retry_state = crate::llm::retry::RetryState::default();
173        let retry_config = crate::llm::retry::RetryConfig::default();
174        let mut max_output_recovery_count = 0u32;
175
176        // Agent loop: budget check → normalize → compact → call LLM → execute tools → repeat.
177        for turn in 0..max_turns {
178            self.state.turn_count = turn + 1;
179            self.state.is_query_active = true;
180
181            // Budget check before each turn.
182            let budget_config = crate::services::budget::BudgetConfig::default();
183            match crate::services::budget::check_budget(
184                self.state.total_cost_usd,
185                self.state.total_usage.total(),
186                &budget_config,
187            ) {
188                crate::services::budget::BudgetDecision::Stop { message } => {
189                    sink.on_warning(&message);
190                    self.state.is_query_active = false;
191                    return Ok(());
192                }
193                crate::services::budget::BudgetDecision::ContinueWithWarning {
194                    message, ..
195                } => {
196                    sink.on_warning(&message);
197                }
198                crate::services::budget::BudgetDecision::Continue => {}
199            }
200
201            // Normalize messages for API compatibility.
202            crate::llm::normalize::ensure_tool_result_pairing(&mut self.state.messages);
203            crate::llm::normalize::strip_empty_blocks(&mut self.state.messages);
204            crate::llm::normalize::remove_empty_messages(&mut self.state.messages);
205            crate::llm::normalize::cap_document_blocks(&mut self.state.messages, 500_000);
206            crate::llm::normalize::merge_consecutive_user_messages(&mut self.state.messages);
207
208            debug!("Agent turn {}/{}", turn + 1, max_turns);
209
210            let model = self.state.config.api.model.clone();
211
212            // Step 1: Auto-compact if context is too large.
213            if compact::should_auto_compact(self.state.history(), &model, &compact_tracking) {
214                let token_count = tokens::estimate_context_tokens(self.state.history());
215                let threshold = compact::auto_compact_threshold(&model);
216                info!("Auto-compact triggered: {token_count} tokens >= {threshold} threshold");
217
218                // Microcompact first: clear stale tool results.
219                let freed = compact::microcompact(&mut self.state.messages, 5);
220                if freed > 0 {
221                    sink.on_compact(freed);
222                    info!("Microcompact freed ~{freed} tokens");
223                }
224
225                // Check if microcompact was enough.
226                let post_mc_tokens = tokens::estimate_context_tokens(self.state.history());
227                if post_mc_tokens >= threshold {
228                    // Full LLM-based compaction: summarize older messages.
229                    info!("Microcompact insufficient, attempting LLM compaction");
230                    match compact::compact_with_llm(&mut self.state.messages, &*self.llm, &model)
231                        .await
232                    {
233                        Some(removed) => {
234                            info!("LLM compaction removed {removed} messages");
235                            compact_tracking.was_compacted = true;
236                            compact_tracking.consecutive_failures = 0;
237                        }
238                        None => {
239                            compact_tracking.consecutive_failures += 1;
240                            warn!(
241                                "LLM compaction failed (attempt {})",
242                                compact_tracking.consecutive_failures
243                            );
244                            // Fallback: context collapse (snip middle messages).
245                            let effective = compact::effective_context_window(&model);
246                            if let Some(collapse) =
247                                crate::services::context_collapse::collapse_to_budget(
248                                    self.state.history(),
249                                    effective,
250                                )
251                            {
252                                info!(
253                                    "Context collapse snipped {} messages, freed ~{} tokens",
254                                    collapse.snipped_count, collapse.tokens_freed
255                                );
256                                self.state.messages = collapse.api_messages;
257                                sink.on_compact(collapse.tokens_freed);
258                            } else {
259                                // Last resort: aggressive microcompact.
260                                let freed2 = compact::microcompact(&mut self.state.messages, 2);
261                                if freed2 > 0 {
262                                    sink.on_compact(freed2);
263                                }
264                            }
265                        }
266                    }
267                }
268            }
269
270            // Inject compaction reminder if compacted and feature enabled.
271            if compact_tracking.was_compacted && self.state.config.features.compaction_reminders {
272                let reminder = user_message(
273                    "<system-reminder>Context was automatically compacted. \
274                     Earlier messages were summarized. If you need details from \
275                     before compaction, ask the user or re-read the relevant files.</system-reminder>",
276                );
277                self.state.push_message(reminder);
278                compact_tracking.was_compacted = false; // Only remind once per compaction.
279            }
280
281            // Step 2: Check token warning state.
282            let warning = compact::token_warning_state(self.state.history(), &model);
283            if warning.is_blocking {
284                sink.on_warning("Context window nearly full. Consider starting a new session.");
285            } else if warning.is_above_warning {
286                sink.on_warning(&format!("Context {}% remaining", warning.percent_left));
287            }
288
289            // Step 3: Build and send the API request.
290            let system_prompt = build_system_prompt(&self.tools, &self.state);
291            // Use core schemas (deferred tools loaded on demand via ToolSearch).
292            let tool_schemas = self.tools.core_schemas();
293
294            let request = ProviderRequest {
295                messages: self.state.history().to_vec(),
296                system_prompt: system_prompt.clone(),
297                tools: tool_schemas.clone(),
298                model: model.clone(),
299                max_tokens: self.state.config.api.max_output_tokens.unwrap_or(16384),
300                temperature: None,
301                enable_caching: true,
302                tool_choice: Default::default(),
303                metadata: None,
304            };
305
306            let mut rx = match self.llm.stream(&request).await {
307                Ok(rx) => {
308                    retry_state.reset();
309                    rx
310                }
311                Err(e) => {
312                    let retryable = match &e {
313                        ProviderError::RateLimited { retry_after_ms } => {
314                            crate::llm::retry::RetryableError::RateLimited {
315                                retry_after: *retry_after_ms,
316                            }
317                        }
318                        ProviderError::Overloaded => crate::llm::retry::RetryableError::Overloaded,
319                        ProviderError::Network(_) => {
320                            crate::llm::retry::RetryableError::StreamInterrupted
321                        }
322                        other => crate::llm::retry::RetryableError::NonRetryable(other.to_string()),
323                    };
324
325                    match retry_state.next_action(&retryable, &retry_config) {
326                        crate::llm::retry::RetryAction::Retry { after } => {
327                            warn!("Retrying in {}ms", after.as_millis());
328                            tokio::time::sleep(after).await;
329                            continue;
330                        }
331                        crate::llm::retry::RetryAction::FallbackModel => {
332                            sink.on_warning("Falling back to smaller model");
333                            // TODO: switch model and retry
334                            continue;
335                        }
336                        crate::llm::retry::RetryAction::Abort(reason) => {
337                            // Unattended retry: in non-interactive mode, retry
338                            // capacity errors with longer backoff instead of aborting.
339                            if self.config.unattended
340                                && self.state.config.features.unattended_retry
341                                && matches!(
342                                    &e,
343                                    ProviderError::Overloaded | ProviderError::RateLimited { .. }
344                                )
345                            {
346                                warn!("Unattended retry: waiting 30s for capacity");
347                                tokio::time::sleep(std::time::Duration::from_secs(30)).await;
348                                continue;
349                            }
350                            // Before giving up, try reactive compact for size errors.
351                            if let ProviderError::RequestTooLarge(body) = &e {
352                                let gap = compact::parse_prompt_too_long_gap(body);
353                                let freed = compact::microcompact(&mut self.state.messages, 1);
354                                if freed > 0 {
355                                    sink.on_compact(freed);
356                                    info!("Reactive compact freed ~{freed} tokens (gap: {gap:?})");
357                                    continue;
358                                }
359                            }
360                            sink.on_error(&reason);
361                            self.state.is_query_active = false;
362                            return Err(crate::error::Error::Other(e.to_string()));
363                        }
364                    }
365                }
366            };
367
368            // Step 4: Stream response, submitting tool_use blocks for
369            // overlapped execution as they complete.
370            let mut content_blocks = Vec::new();
371            let mut usage = Usage::default();
372            let mut stop_reason: Option<StopReason> = None;
373            let mut got_error = false;
374            let mut error_text = String::new();
375            let mut _pending_tool_count = 0usize;
376
377            while let Some(event) = rx.recv().await {
378                match event {
379                    StreamEvent::TextDelta(text) => {
380                        sink.on_text(&text);
381                    }
382                    StreamEvent::ContentBlockComplete(block) => {
383                        if let ContentBlock::ToolUse {
384                            ref name,
385                            ref input,
386                            ..
387                        } = block
388                        {
389                            sink.on_tool_start(name, input);
390                            _pending_tool_count += 1;
391                        }
392                        if let ContentBlock::Thinking { ref thinking, .. } = block {
393                            sink.on_thinking(thinking);
394                        }
395                        content_blocks.push(block);
396                    }
397                    StreamEvent::Done {
398                        usage: u,
399                        stop_reason: sr,
400                    } => {
401                        usage = u;
402                        stop_reason = sr;
403                        sink.on_usage(&usage);
404                    }
405                    StreamEvent::Error(msg) => {
406                        got_error = true;
407                        error_text = msg.clone();
408                        sink.on_error(&msg);
409                    }
410                    _ => {}
411                }
412            }
413
414            // Step 5: Record the assistant message.
415            let assistant_msg = Message::Assistant(AssistantMessage {
416                uuid: Uuid::new_v4(),
417                timestamp: chrono::Utc::now().to_rfc3339(),
418                content: content_blocks.clone(),
419                model: Some(model.clone()),
420                usage: Some(usage.clone()),
421                stop_reason: stop_reason.clone(),
422                request_id: None,
423            });
424            self.state.push_message(assistant_msg);
425            self.state.record_usage(&usage, &model);
426
427            // Token budget tracking per turn.
428            if self.state.config.features.token_budget && usage.total() > 0 {
429                let turn_total = usage.input_tokens + usage.output_tokens;
430                if turn_total > 100_000 {
431                    sink.on_warning(&format!(
432                        "High token usage this turn: {} tokens ({}in + {}out)",
433                        turn_total, usage.input_tokens, usage.output_tokens
434                    ));
435                }
436            }
437
438            // Record cache and telemetry.
439            let _cache_event = self.cache_tracker.record(&usage);
440            {
441                let mut span = crate::services::telemetry::api_call_span(
442                    &model,
443                    turn + 1,
444                    &self.state.session_id,
445                );
446                crate::services::telemetry::record_usage(&mut span, &usage);
447                span.finish();
448                tracing::debug!(
449                    "API call: {}ms, {}in/{}out tokens",
450                    span.duration_ms().unwrap_or(0),
451                    usage.input_tokens,
452                    usage.output_tokens,
453                );
454            }
455
456            // Step 6: Handle stream errors.
457            if got_error {
458                // Check if it's a prompt-too-long error in the stream.
459                if error_text.contains("prompt is too long")
460                    || error_text.contains("Prompt is too long")
461                {
462                    let freed = compact::microcompact(&mut self.state.messages, 1);
463                    if freed > 0 {
464                        sink.on_compact(freed);
465                        continue;
466                    }
467                }
468
469                // Check for max-output-tokens hit (partial response).
470                if content_blocks
471                    .iter()
472                    .any(|b| matches!(b, ContentBlock::Text { .. }))
473                    && error_text.contains("max_tokens")
474                    && max_output_recovery_count < MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
475                {
476                    max_output_recovery_count += 1;
477                    info!(
478                        "Max output tokens recovery attempt {}/{}",
479                        max_output_recovery_count, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
480                    );
481                    let recovery_msg = compact::max_output_recovery_message();
482                    self.state.push_message(recovery_msg);
483                    continue;
484                }
485            }
486
487            // Step 6b: Handle max_tokens stop reason (escalate and continue).
488            if matches!(stop_reason, Some(StopReason::MaxTokens))
489                && !got_error
490                && content_blocks
491                    .iter()
492                    .any(|b| matches!(b, ContentBlock::Text { .. }))
493                && max_output_recovery_count < MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
494            {
495                max_output_recovery_count += 1;
496                info!(
497                    "Max tokens stop reason — recovery attempt {}/{}",
498                    max_output_recovery_count, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
499                );
500                let recovery_msg = compact::max_output_recovery_message();
501                self.state.push_message(recovery_msg);
502                continue;
503            }
504
505            // Step 7: Extract tool calls from the response.
506            let tool_calls = extract_tool_calls(&content_blocks);
507
508            if tool_calls.is_empty() {
509                // No tools requested — turn is complete.
510                info!("Turn complete (no tool calls)");
511                sink.on_turn_complete(turn + 1);
512                self.state.is_query_active = false;
513
514                // Fire background memory extraction (fire-and-forget).
515                // Only runs if feature enabled and memory directory exists.
516                if self.state.config.features.extract_memories
517                    && crate::memory::ensure_memory_dir().is_some()
518                {
519                    let extraction_messages = self.state.messages.clone();
520                    let extraction_state = self.extraction_state.clone();
521                    let extraction_llm = self.llm.clone();
522                    let extraction_model = model.clone();
523                    tokio::spawn(async move {
524                        crate::memory::extraction::extract_memories_background(
525                            extraction_messages,
526                            extraction_state,
527                            extraction_llm,
528                            extraction_model,
529                        )
530                        .await;
531                    });
532                }
533
534                return Ok(());
535            }
536
537            // Step 8: Execute tool calls with pre/post hooks.
538            info!("Executing {} tool call(s)", tool_calls.len());
539            let cwd = PathBuf::from(&self.state.cwd);
540            let tool_ctx = ToolContext {
541                cwd,
542                cancel: self.cancel.clone(),
543                permission_checker: self.permissions.clone(),
544                verbose: self.config.verbose,
545                plan_mode: self.state.plan_mode,
546                file_cache: Some(self.file_cache.clone()),
547                denial_tracker: Some(self.denial_tracker.clone()),
548                task_manager: Some(self.state.task_manager.clone()),
549                session_allows: Some(self.session_allows.clone()),
550                permission_prompter: self.permission_prompter.clone(),
551            };
552
553            // Fire pre-tool-use hooks.
554            for call in &tool_calls {
555                self.hooks
556                    .run_hooks(&HookEvent::PreToolUse, Some(&call.name), &call.input)
557                    .await;
558            }
559
560            let results =
561                execute_tool_calls(&tool_calls, self.tools.all(), &tool_ctx, &self.permissions)
562                    .await;
563
564            // Step 9: Inject tool results + fire post-tool-use hooks.
565            for result in &results {
566                sink.on_tool_result(&result.tool_name, &result.result);
567
568                // Fire post-tool-use hooks.
569                self.hooks
570                    .run_hooks(
571                        &HookEvent::PostToolUse,
572                        Some(&result.tool_name),
573                        &serde_json::json!({
574                            "tool": result.tool_name,
575                            "is_error": result.result.is_error,
576                        }),
577                    )
578                    .await;
579
580                let msg = tool_result_message(
581                    &result.tool_use_id,
582                    &result.result.content,
583                    result.result.is_error,
584                );
585                self.state.push_message(msg);
586            }
587
588            // Continue the loop — the model will see the tool results.
589        }
590
591        warn!("Max turns ({max_turns}) reached");
592        sink.on_warning(&format!("Agent stopped after {max_turns} turns"));
593        self.state.is_query_active = false;
594        Ok(())
595    }
596
597    /// Cancel the current operation.
598    pub fn cancel(&self) {
599        self.cancel.cancel();
600    }
601
602    /// Get a cloneable cancel token for use in background tasks.
603    pub fn cancel_token(&self) -> tokio_util::sync::CancellationToken {
604        self.cancel.clone()
605    }
606}
607
608/// Build the system prompt from tool definitions, app state, and memory.
609pub fn build_system_prompt(tools: &ToolRegistry, state: &AppState) -> String {
610    let mut prompt = String::new();
611
612    prompt.push_str(
613        "You are an AI coding agent. You help users with software engineering tasks \
614         by reading, writing, and searching code. Use the tools available to you to \
615         accomplish tasks.\n\n",
616    );
617
618    // Environment context.
619    let shell = std::env::var("SHELL").unwrap_or_else(|_| "bash".to_string());
620    let is_git = std::path::Path::new(&state.cwd).join(".git").exists();
621    prompt.push_str(&format!(
622        "# Environment\n\
623         - Working directory: {}\n\
624         - Platform: {}\n\
625         - Shell: {shell}\n\
626         - Git repository: {}\n\n",
627        state.cwd,
628        std::env::consts::OS,
629        if is_git { "yes" } else { "no" },
630    ));
631
632    // Inject memory context (project + user + on-demand relevant).
633    let mut memory = crate::memory::MemoryContext::load(Some(std::path::Path::new(&state.cwd)));
634
635    // On-demand: surface relevant memories based on recent conversation.
636    let recent_text: String = state
637        .messages
638        .iter()
639        .rev()
640        .take(5)
641        .filter_map(|m| match m {
642            crate::llm::message::Message::User(u) => Some(
643                u.content
644                    .iter()
645                    .filter_map(|b| b.as_text())
646                    .collect::<Vec<_>>()
647                    .join(" "),
648            ),
649            _ => None,
650        })
651        .collect::<Vec<_>>()
652        .join(" ");
653
654    if !recent_text.is_empty() {
655        memory.load_relevant(&recent_text);
656    }
657
658    let memory_section = memory.to_system_prompt_section();
659    if !memory_section.is_empty() {
660        prompt.push_str(&memory_section);
661    }
662
663    // Tool documentation.
664    prompt.push_str("# Available Tools\n\n");
665    for tool in tools.all() {
666        if tool.is_enabled() {
667            prompt.push_str(&format!("## {}\n{}\n\n", tool.name(), tool.prompt()));
668        }
669    }
670
671    // Available skills.
672    let skills = crate::skills::SkillRegistry::load_all(Some(std::path::Path::new(&state.cwd)));
673    let invocable = skills.user_invocable();
674    if !invocable.is_empty() {
675        prompt.push_str("# Available Skills\n\n");
676        for skill in invocable {
677            let desc = skill.metadata.description.as_deref().unwrap_or("");
678            let when = skill.metadata.when_to_use.as_deref().unwrap_or("");
679            prompt.push_str(&format!("- `/{}`", skill.name));
680            if !desc.is_empty() {
681                prompt.push_str(&format!(": {desc}"));
682            }
683            if !when.is_empty() {
684                prompt.push_str(&format!(" (use when: {when})"));
685            }
686            prompt.push('\n');
687        }
688        prompt.push('\n');
689    }
690
691    // Guidelines and safety framework.
692    prompt.push_str(
693        "# Using tools\n\n\
694         Use dedicated tools instead of shell commands when available:\n\
695         - File search: Glob (not find or ls)\n\
696         - Content search: Grep (not grep or rg)\n\
697         - Read files: FileRead (not cat/head/tail)\n\
698         - Edit files: FileEdit (not sed/awk)\n\
699         - Write files: FileWrite (not echo/cat with redirect)\n\
700         - Reserve Bash for system commands and operations that require shell execution.\n\
701         - Break complex tasks into steps. Use multiple tool calls in parallel when independent.\n\
702         - Use the Agent tool for complex multi-step research or tasks that benefit from isolation.\n\n\
703         # Working with code\n\n\
704         - Read files before editing them. Understand existing code before suggesting changes.\n\
705         - Prefer editing existing files over creating new ones to avoid file bloat.\n\
706         - Only make changes that were requested. Don't add features, refactor, add comments, \
707           or make \"improvements\" beyond the ask.\n\
708         - Don't add error handling for scenarios that can't happen. Don't design for \
709           hypothetical future requirements.\n\
710         - When referencing code, include file_path:line_number.\n\
711         - Be careful not to introduce security vulnerabilities (command injection, XSS, SQL injection, \
712           OWASP top 10). If you notice insecure code you wrote, fix it immediately.\n\
713         - Don't add docstrings, comments, or type annotations to code you didn't change.\n\
714         - Three similar lines of code is better than a premature abstraction.\n\n\
715         # Git safety protocol\n\n\
716         - NEVER update the git config.\n\
717         - NEVER run destructive git commands (push --force, reset --hard, checkout ., restore ., \
718           clean -f, branch -D) unless the user explicitly requests them.\n\
719         - NEVER skip hooks (--no-verify, --no-gpg-sign) unless the user explicitly requests it.\n\
720         - NEVER force push to main/master. Warn the user if they request it.\n\
721         - Always create NEW commits rather than amending, unless the user explicitly requests amend. \
722           After hook failure, the commit did NOT happen — amend would modify the PREVIOUS commit.\n\
723         - When staging files, prefer adding specific files by name rather than git add -A or git add ., \
724           which can accidentally include sensitive files.\n\
725         - NEVER commit changes unless the user explicitly asks.\n\n\
726         # Committing changes\n\n\
727         When the user asks to commit:\n\
728         1. Run git status and git diff to see all changes.\n\
729         2. Run git log --oneline -5 to match the repository's commit message style.\n\
730         3. Draft a concise (1-2 sentence) commit message focusing on \"why\" not \"what\".\n\
731         4. Do not commit files that likely contain secrets (.env, credentials.json).\n\
732         5. Stage specific files, create the commit.\n\
733         6. If pre-commit hook fails, fix the issue and create a NEW commit.\n\
734         7. When creating commits, include a co-author attribution line at the end of the message.\n\n\
735         # Creating pull requests\n\n\
736         When the user asks to create a PR:\n\
737         1. Run git status, git diff, and git log to understand all changes on the branch.\n\
738         2. Analyze ALL commits (not just the latest) that will be in the PR.\n\
739         3. Draft a short title (under 70 chars) and detailed body with summary and test plan.\n\
740         4. Push to remote with -u flag if needed, then create PR using gh pr create.\n\
741         5. Return the PR URL when done.\n\n\
742         # Executing actions safely\n\n\
743         Consider the reversibility and blast radius of every action:\n\
744         - Freely take local, reversible actions (editing files, running tests).\n\
745         - For hard-to-reverse or shared-state actions, confirm with the user first:\n\
746           - Destructive: deleting files/branches, dropping tables, rm -rf, overwriting uncommitted changes.\n\
747           - Hard to reverse: force-pushing, git reset --hard, amending published commits.\n\
748           - Visible to others: pushing code, creating/commenting on PRs/issues, sending messages.\n\
749         - When you encounter an obstacle, do not use destructive actions as a shortcut. \
750           Identify root causes and fix underlying issues.\n\
751         - If you discover unexpected state (unfamiliar files, branches, config), investigate \
752           before deleting or overwriting — it may be the user's in-progress work.\n\n\
753         # Response style\n\n\
754         - Be concise. Lead with the answer or action, not the reasoning.\n\
755         - Skip filler, preamble, and unnecessary transitions.\n\
756         - Don't restate what the user said.\n\
757         - If you can say it in one sentence, don't use three.\n\
758         - Focus output on: decisions that need input, status updates, and errors that change the plan.\n\
759         - When referencing GitHub issues or PRs, use owner/repo#123 format.\n\
760         - Only use emojis if the user explicitly requests it.\n\n\
761         # Memory\n\n\
762         You can save information across sessions by writing memory files.\n\
763         - Save to: ~/.config/agent-code/memory/ (one .md file per topic)\n\
764         - Each file needs YAML frontmatter: name, description, type (user/feedback/project/reference)\n\
765         - After writing a file, update MEMORY.md with a one-line pointer\n\
766         - Memory types: user (role, preferences), feedback (corrections, confirmations), \
767           project (decisions, deadlines), reference (external resources)\n\
768         - Do NOT store: code patterns, git history, debugging solutions, anything derivable from code\n\
769         - Memory is a hint — always verify against current state before acting on it\n",
770    );
771
772    // Detailed tool usage examples and workflow patterns.
773    prompt.push_str(
774        "# Tool usage patterns\n\n\
775         Common patterns for effective tool use:\n\n\
776         **Read before edit**: Always read a file before editing it. This ensures you \
777         understand the current state and can make targeted changes.\n\
778         ```\n\
779         1. FileRead file_path → understand structure\n\
780         2. FileEdit old_string, new_string → targeted change\n\
781         ```\n\n\
782         **Search then act**: Use Glob to find files, Grep to find content, then read/edit.\n\
783         ```\n\
784         1. Glob **/*.rs → find Rust files\n\
785         2. Grep pattern path → find specific code\n\
786         3. FileRead → read the match\n\
787         4. FileEdit → make the change\n\
788         ```\n\n\
789         **Parallel tool calls**: When you need to read multiple independent files or run \
790         independent searches, make all the tool calls in one response. Don't serialize \
791         independent operations.\n\n\
792         **Test after change**: After editing code, run tests to verify the change works.\n\
793         ```\n\
794         1. FileEdit → make change\n\
795         2. Bash cargo test / pytest / npm test → verify\n\
796         3. If tests fail, read the error, fix, re-test\n\
797         ```\n\n\
798         # Error recovery\n\n\
799         When something goes wrong:\n\
800         - **Tool not found**: Use ToolSearch to find the right tool name.\n\
801         - **Permission denied**: Explain why the action is needed, ask the user to approve.\n\
802         - **File not found**: Use Glob to find the correct path. Check for typos.\n\
803         - **Edit failed (not unique)**: Provide more surrounding context in old_string, \
804           or use replace_all=true if renaming.\n\
805         - **Command failed**: Read the full error message. Don't retry the same command. \
806           Diagnose the root cause first.\n\
807         - **Context too large**: The system will auto-compact. If you need specific \
808           information from before compaction, re-read the relevant files.\n\
809         - **Rate limited**: The system will auto-retry with backoff. Just wait.\n\n\
810         # Common workflows\n\n\
811         **Bug fix**: Read the failing test → read the source code it tests → \
812         identify the bug → fix it → run the test → confirm it passes.\n\n\
813         **New feature**: Read existing patterns in the codebase → create or edit files → \
814         add tests → run tests → update docs if needed.\n\n\
815         **Code review**: Read the diff → identify issues (bugs, security, style) → \
816         report findings with file:line references.\n\n\
817         **Refactor**: Search for all usages of the symbol → plan the changes → \
818         edit each file → run tests to verify nothing broke.\n\n",
819    );
820
821    // MCP server instructions (dynamic, per-server).
822    if !state.config.mcp_servers.is_empty() {
823        prompt.push_str("# MCP Servers\n\n");
824        prompt.push_str(
825            "Connected MCP servers provide additional tools. MCP tools are prefixed \
826             with `mcp__{server}__{tool}`. Use them like any other tool.\n\n",
827        );
828        for (name, entry) in &state.config.mcp_servers {
829            let transport = if entry.command.is_some() {
830                "stdio"
831            } else if entry.url.is_some() {
832                "sse"
833            } else {
834                "unknown"
835            };
836            prompt.push_str(&format!("- **{name}** ({transport})\n"));
837        }
838        prompt.push('\n');
839    }
840
841    // Deferred tools listing.
842    let deferred = tools.deferred_names();
843    if !deferred.is_empty() {
844        prompt.push_str("# Deferred Tools\n\n");
845        prompt.push_str(
846            "These tools are available but not loaded by default. \
847             Use ToolSearch to load them when needed:\n",
848        );
849        for name in &deferred {
850            prompt.push_str(&format!("- {name}\n"));
851        }
852        prompt.push('\n');
853    }
854
855    // Task management guidance.
856    prompt.push_str(
857        "# Task management\n\n\
858         - Use TaskCreate to break complex work into trackable steps.\n\
859         - Mark tasks as in_progress when starting, completed when done.\n\
860         - Use the Agent tool to spawn subagents for parallel independent work.\n\
861         - Use EnterPlanMode/ExitPlanMode for read-only exploration before making changes.\n\
862         - Use EnterWorktree/ExitWorktree for isolated changes in git worktrees.\n\n\
863         # Output formatting\n\n\
864         - All text output is displayed to the user. Use GitHub-flavored markdown.\n\
865         - Use fenced code blocks with language hints for code: ```rust, ```python, etc.\n\
866         - Use inline `code` for file names, function names, and short code references.\n\
867         - Use tables for structured comparisons.\n\
868         - Use bullet lists for multiple items.\n\
869         - Keep paragraphs short (2-3 sentences).\n\
870         - Never output raw HTML or complex formatting — stick to standard markdown.\n",
871    );
872
873    prompt
874}