1pub mod source;
16
17use std::path::PathBuf;
18use std::sync::Arc;
19
20use tokio_util::sync::CancellationToken;
21use tracing::{debug, info, warn};
22use uuid::Uuid;
23
24use crate::hooks::{HookEvent, HookRegistry};
25use crate::llm::message::*;
26use crate::llm::provider::{Provider, ProviderError, ProviderRequest};
27use crate::llm::stream::StreamEvent;
28use crate::permissions::PermissionChecker;
29use crate::services::compact::{self, CompactTracking, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT};
30use crate::services::tokens;
31use crate::state::AppState;
32use crate::tools::ToolContext;
33use crate::tools::executor::{execute_tool_calls, extract_tool_calls};
34use crate::tools::registry::ToolRegistry;
35
36pub struct QueryEngineConfig {
38 pub max_turns: Option<usize>,
39 pub verbose: bool,
40 pub unattended: bool,
42}
43
44pub struct QueryEngine {
46 llm: Arc<dyn Provider>,
47 tools: ToolRegistry,
48 file_cache: Arc<tokio::sync::Mutex<crate::services::file_cache::FileCache>>,
49 permissions: Arc<PermissionChecker>,
50 state: AppState,
51 config: QueryEngineConfig,
52 cancel: CancellationToken,
53 hooks: HookRegistry,
54 cache_tracker: crate::services::cache_tracking::CacheTracker,
55 denial_tracker: Arc<tokio::sync::Mutex<crate::permissions::tracking::DenialTracker>>,
56 extraction_state: Arc<tokio::sync::Mutex<crate::memory::extraction::ExtractionState>>,
57 session_allows: Arc<tokio::sync::Mutex<std::collections::HashSet<String>>>,
58 permission_prompter: Option<Arc<dyn crate::tools::PermissionPrompter>>,
59 cached_system_prompt: Option<(u64, String)>, }
62
63pub trait StreamSink: Send + Sync {
65 fn on_text(&self, text: &str);
66 fn on_tool_start(&self, tool_name: &str, input: &serde_json::Value);
67 fn on_tool_result(&self, tool_name: &str, result: &crate::tools::ToolResult);
68 fn on_thinking(&self, _text: &str) {}
69 fn on_turn_complete(&self, _turn: usize) {}
70 fn on_error(&self, error: &str);
71 fn on_usage(&self, _usage: &Usage) {}
72 fn on_compact(&self, _freed_tokens: u64) {}
73 fn on_warning(&self, _msg: &str) {}
74}
75
76pub struct NullSink;
78impl StreamSink for NullSink {
79 fn on_text(&self, _: &str) {}
80 fn on_tool_start(&self, _: &str, _: &serde_json::Value) {}
81 fn on_tool_result(&self, _: &str, _: &crate::tools::ToolResult) {}
82 fn on_error(&self, _: &str) {}
83}
84
85impl QueryEngine {
86 pub fn new(
87 llm: Arc<dyn Provider>,
88 tools: ToolRegistry,
89 permissions: PermissionChecker,
90 state: AppState,
91 config: QueryEngineConfig,
92 ) -> Self {
93 Self {
94 llm,
95 tools,
96 file_cache: Arc::new(tokio::sync::Mutex::new(
97 crate::services::file_cache::FileCache::new(),
98 )),
99 permissions: Arc::new(permissions),
100 state,
101 config,
102 cancel: CancellationToken::new(),
103 hooks: HookRegistry::new(),
104 cache_tracker: crate::services::cache_tracking::CacheTracker::new(),
105 denial_tracker: Arc::new(tokio::sync::Mutex::new(
106 crate::permissions::tracking::DenialTracker::new(100),
107 )),
108 extraction_state: Arc::new(tokio::sync::Mutex::new(
109 crate::memory::extraction::ExtractionState::new(),
110 )),
111 session_allows: Arc::new(tokio::sync::Mutex::new(std::collections::HashSet::new())),
112 permission_prompter: None,
113 cached_system_prompt: None,
114 }
115 }
116
117 pub fn load_hooks(&mut self, hook_defs: &[crate::hooks::HookDefinition]) {
119 for def in hook_defs {
120 self.hooks.register(def.clone());
121 }
122 if !hook_defs.is_empty() {
123 tracing::info!("Loaded {} hooks from config", hook_defs.len());
124 }
125 }
126
127 pub fn state(&self) -> &AppState {
129 &self.state
130 }
131
132 pub fn state_mut(&mut self) -> &mut AppState {
134 &mut self.state
135 }
136
137 pub fn install_signal_handler(&self) {
141 let cancel = self.cancel.clone();
142 tokio::spawn(async move {
143 loop {
144 if tokio::signal::ctrl_c().await.is_ok() {
145 if cancel.is_cancelled() {
146 std::process::exit(130);
148 }
149 cancel.cancel();
150 }
151 }
152 });
153 }
154
155 pub async fn run_turn(&mut self, user_input: &str) -> crate::error::Result<()> {
157 self.run_turn_with_sink(user_input, &NullSink).await
158 }
159
160 pub async fn run_turn_with_sink(
162 &mut self,
163 user_input: &str,
164 sink: &dyn StreamSink,
165 ) -> crate::error::Result<()> {
166 self.cancel = CancellationToken::new();
168
169 let user_msg = user_message(user_input);
171 self.state.push_message(user_msg);
172
173 let max_turns = self.config.max_turns.unwrap_or(50);
174 let mut compact_tracking = CompactTracking::default();
175 let mut retry_state = crate::llm::retry::RetryState::default();
176 let retry_config = crate::llm::retry::RetryConfig::default();
177 let mut max_output_recovery_count = 0u32;
178
179 for turn in 0..max_turns {
181 self.state.turn_count = turn + 1;
182 self.state.is_query_active = true;
183
184 let budget_config = crate::services::budget::BudgetConfig::default();
186 match crate::services::budget::check_budget(
187 self.state.total_cost_usd,
188 self.state.total_usage.total(),
189 &budget_config,
190 ) {
191 crate::services::budget::BudgetDecision::Stop { message } => {
192 sink.on_warning(&message);
193 self.state.is_query_active = false;
194 return Ok(());
195 }
196 crate::services::budget::BudgetDecision::ContinueWithWarning {
197 message, ..
198 } => {
199 sink.on_warning(&message);
200 }
201 crate::services::budget::BudgetDecision::Continue => {}
202 }
203
204 crate::llm::normalize::ensure_tool_result_pairing(&mut self.state.messages);
206 crate::llm::normalize::strip_empty_blocks(&mut self.state.messages);
207 crate::llm::normalize::remove_empty_messages(&mut self.state.messages);
208 crate::llm::normalize::cap_document_blocks(&mut self.state.messages, 500_000);
209 crate::llm::normalize::merge_consecutive_user_messages(&mut self.state.messages);
210
211 debug!("Agent turn {}/{}", turn + 1, max_turns);
212
213 let mut model = self.state.config.api.model.clone();
214
215 if compact::should_auto_compact(self.state.history(), &model, &compact_tracking) {
217 let token_count = tokens::estimate_context_tokens(self.state.history());
218 let threshold = compact::auto_compact_threshold(&model);
219 info!("Auto-compact triggered: {token_count} tokens >= {threshold} threshold");
220
221 let freed = compact::microcompact(&mut self.state.messages, 5);
223 if freed > 0 {
224 sink.on_compact(freed);
225 info!("Microcompact freed ~{freed} tokens");
226 }
227
228 let post_mc_tokens = tokens::estimate_context_tokens(self.state.history());
230 if post_mc_tokens >= threshold {
231 info!("Microcompact insufficient, attempting LLM compaction");
233 match compact::compact_with_llm(&mut self.state.messages, &*self.llm, &model)
234 .await
235 {
236 Some(removed) => {
237 info!("LLM compaction removed {removed} messages");
238 compact_tracking.was_compacted = true;
239 compact_tracking.consecutive_failures = 0;
240 }
241 None => {
242 compact_tracking.consecutive_failures += 1;
243 warn!(
244 "LLM compaction failed (attempt {})",
245 compact_tracking.consecutive_failures
246 );
247 let effective = compact::effective_context_window(&model);
249 if let Some(collapse) =
250 crate::services::context_collapse::collapse_to_budget(
251 self.state.history(),
252 effective,
253 )
254 {
255 info!(
256 "Context collapse snipped {} messages, freed ~{} tokens",
257 collapse.snipped_count, collapse.tokens_freed
258 );
259 self.state.messages = collapse.api_messages;
260 sink.on_compact(collapse.tokens_freed);
261 } else {
262 let freed2 = compact::microcompact(&mut self.state.messages, 2);
264 if freed2 > 0 {
265 sink.on_compact(freed2);
266 }
267 }
268 }
269 }
270 }
271 }
272
273 if compact_tracking.was_compacted && self.state.config.features.compaction_reminders {
275 let reminder = user_message(
276 "<system-reminder>Context was automatically compacted. \
277 Earlier messages were summarized. If you need details from \
278 before compaction, ask the user or re-read the relevant files.</system-reminder>",
279 );
280 self.state.push_message(reminder);
281 compact_tracking.was_compacted = false; }
283
284 let warning = compact::token_warning_state(self.state.history(), &model);
286 if warning.is_blocking {
287 sink.on_warning("Context window nearly full. Consider starting a new session.");
288 } else if warning.is_above_warning {
289 sink.on_warning(&format!("Context {}% remaining", warning.percent_left));
290 }
291
292 let prompt_hash = {
295 use std::hash::{Hash, Hasher};
296 let mut h = std::collections::hash_map::DefaultHasher::new();
297 self.state.config.api.model.hash(&mut h);
298 self.state.cwd.hash(&mut h);
299 self.state.config.mcp_servers.len().hash(&mut h);
300 self.tools.all().len().hash(&mut h);
301 h.finish()
302 };
303 let system_prompt = if let Some((cached_hash, ref cached)) = self.cached_system_prompt
304 && cached_hash == prompt_hash
305 {
306 cached.clone()
307 } else {
308 let prompt = build_system_prompt(&self.tools, &self.state);
309 self.cached_system_prompt = Some((prompt_hash, prompt.clone()));
310 prompt
311 };
312 let tool_schemas = self.tools.core_schemas();
314
315 let base_tokens = self.state.config.api.max_output_tokens.unwrap_or(16384);
317 let effective_tokens = if max_output_recovery_count > 0 {
318 base_tokens.max(65536) } else {
320 base_tokens
321 };
322
323 let request = ProviderRequest {
324 messages: self.state.history().to_vec(),
325 system_prompt: system_prompt.clone(),
326 tools: tool_schemas.clone(),
327 model: model.clone(),
328 max_tokens: effective_tokens,
329 temperature: None,
330 enable_caching: true,
331 tool_choice: Default::default(),
332 metadata: None,
333 };
334
335 let mut rx = match self.llm.stream(&request).await {
336 Ok(rx) => {
337 retry_state.reset();
338 rx
339 }
340 Err(e) => {
341 let retryable = match &e {
342 ProviderError::RateLimited { retry_after_ms } => {
343 crate::llm::retry::RetryableError::RateLimited {
344 retry_after: *retry_after_ms,
345 }
346 }
347 ProviderError::Overloaded => crate::llm::retry::RetryableError::Overloaded,
348 ProviderError::Network(_) => {
349 crate::llm::retry::RetryableError::StreamInterrupted
350 }
351 other => crate::llm::retry::RetryableError::NonRetryable(other.to_string()),
352 };
353
354 match retry_state.next_action(&retryable, &retry_config) {
355 crate::llm::retry::RetryAction::Retry { after } => {
356 warn!("Retrying in {}ms", after.as_millis());
357 tokio::time::sleep(after).await;
358 continue;
359 }
360 crate::llm::retry::RetryAction::FallbackModel => {
361 let fallback = get_fallback_model(&model);
363 sink.on_warning(&format!("Falling back from {model} to {fallback}"));
364 model = fallback;
365 continue;
366 }
367 crate::llm::retry::RetryAction::Abort(reason) => {
368 if self.config.unattended
371 && self.state.config.features.unattended_retry
372 && matches!(
373 &e,
374 ProviderError::Overloaded | ProviderError::RateLimited { .. }
375 )
376 {
377 warn!("Unattended retry: waiting 30s for capacity");
378 tokio::time::sleep(std::time::Duration::from_secs(30)).await;
379 continue;
380 }
381 if let ProviderError::RequestTooLarge(body) = &e {
384 let gap = compact::parse_prompt_too_long_gap(body);
385
386 let effective = compact::effective_context_window(&model);
388 if let Some(collapse) =
389 crate::services::context_collapse::collapse_to_budget(
390 self.state.history(),
391 effective,
392 )
393 {
394 info!(
395 "Reactive collapse: snipped {} messages, freed ~{} tokens",
396 collapse.snipped_count, collapse.tokens_freed
397 );
398 self.state.messages = collapse.api_messages;
399 sink.on_compact(collapse.tokens_freed);
400 continue;
401 }
402
403 let freed = compact::microcompact(&mut self.state.messages, 1);
405 if freed > 0 {
406 sink.on_compact(freed);
407 info!(
408 "Reactive microcompact freed ~{freed} tokens (gap: {gap:?})"
409 );
410 continue;
411 }
412 }
413 sink.on_error(&reason);
414 self.state.is_query_active = false;
415 return Err(crate::error::Error::Other(e.to_string()));
416 }
417 }
418 }
419 };
420
421 let mut content_blocks = Vec::new();
424 let mut usage = Usage::default();
425 let mut stop_reason: Option<StopReason> = None;
426 let mut got_error = false;
427 let mut error_text = String::new();
428
429 let mut streaming_tool_handles: Vec<(
431 String,
432 String,
433 tokio::task::JoinHandle<crate::tools::ToolResult>,
434 )> = Vec::new();
435
436 while let Some(event) = rx.recv().await {
437 match event {
438 StreamEvent::TextDelta(text) => {
439 sink.on_text(&text);
440 }
441 StreamEvent::ContentBlockComplete(block) => {
442 if let ContentBlock::ToolUse {
443 ref id,
444 ref name,
445 ref input,
446 } = block
447 {
448 sink.on_tool_start(name, input);
449
450 if let Some(tool) = self.tools.get(name)
452 && tool.is_read_only()
453 && tool.is_concurrency_safe()
454 {
455 let tool = tool.clone();
456 let input = input.clone();
457 let cwd = std::path::PathBuf::from(&self.state.cwd);
458 let cancel = self.cancel.clone();
459 let perm = self.permissions.clone();
460 let tool_id = id.clone();
461 let tool_name = name.clone();
462
463 let handle = tokio::spawn(async move {
464 match tool
465 .call(
466 input,
467 &ToolContext {
468 cwd,
469 cancel,
470 permission_checker: perm.clone(),
471 verbose: false,
472 plan_mode: false,
473 file_cache: None,
474 denial_tracker: None,
475 task_manager: None,
476 session_allows: None,
477 permission_prompter: None,
478 },
479 )
480 .await
481 {
482 Ok(r) => r,
483 Err(e) => crate::tools::ToolResult::error(e.to_string()),
484 }
485 });
486
487 streaming_tool_handles.push((tool_id, tool_name, handle));
488 }
489 }
490 if let ContentBlock::Thinking { ref thinking, .. } = block {
491 sink.on_thinking(thinking);
492 }
493 content_blocks.push(block);
494 }
495 StreamEvent::Done {
496 usage: u,
497 stop_reason: sr,
498 } => {
499 usage = u;
500 stop_reason = sr;
501 sink.on_usage(&usage);
502 }
503 StreamEvent::Error(msg) => {
504 got_error = true;
505 error_text = msg.clone();
506 sink.on_error(&msg);
507 }
508 _ => {}
509 }
510 }
511
512 let assistant_msg = Message::Assistant(AssistantMessage {
514 uuid: Uuid::new_v4(),
515 timestamp: chrono::Utc::now().to_rfc3339(),
516 content: content_blocks.clone(),
517 model: Some(model.clone()),
518 usage: Some(usage.clone()),
519 stop_reason: stop_reason.clone(),
520 request_id: None,
521 });
522 self.state.push_message(assistant_msg);
523 self.state.record_usage(&usage, &model);
524
525 if self.state.config.features.token_budget && usage.total() > 0 {
527 let turn_total = usage.input_tokens + usage.output_tokens;
528 if turn_total > 100_000 {
529 sink.on_warning(&format!(
530 "High token usage this turn: {} tokens ({}in + {}out)",
531 turn_total, usage.input_tokens, usage.output_tokens
532 ));
533 }
534 }
535
536 let _cache_event = self.cache_tracker.record(&usage);
538 {
539 let mut span = crate::services::telemetry::api_call_span(
540 &model,
541 turn + 1,
542 &self.state.session_id,
543 );
544 crate::services::telemetry::record_usage(&mut span, &usage);
545 span.finish();
546 tracing::debug!(
547 "API call: {}ms, {}in/{}out tokens",
548 span.duration_ms().unwrap_or(0),
549 usage.input_tokens,
550 usage.output_tokens,
551 );
552 }
553
554 if got_error {
556 if error_text.contains("prompt is too long")
558 || error_text.contains("Prompt is too long")
559 {
560 let freed = compact::microcompact(&mut self.state.messages, 1);
561 if freed > 0 {
562 sink.on_compact(freed);
563 continue;
564 }
565 }
566
567 if content_blocks
569 .iter()
570 .any(|b| matches!(b, ContentBlock::Text { .. }))
571 && error_text.contains("max_tokens")
572 && max_output_recovery_count < MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
573 {
574 max_output_recovery_count += 1;
575 info!(
576 "Max output tokens recovery attempt {}/{}",
577 max_output_recovery_count, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
578 );
579 let recovery_msg = compact::max_output_recovery_message();
580 self.state.push_message(recovery_msg);
581 continue;
582 }
583 }
584
585 if matches!(stop_reason, Some(StopReason::MaxTokens))
587 && !got_error
588 && content_blocks
589 .iter()
590 .any(|b| matches!(b, ContentBlock::Text { .. }))
591 && max_output_recovery_count < MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
592 {
593 max_output_recovery_count += 1;
594 info!(
595 "Max tokens stop reason — recovery attempt {}/{}",
596 max_output_recovery_count, MAX_OUTPUT_TOKENS_RECOVERY_LIMIT
597 );
598 let recovery_msg = compact::max_output_recovery_message();
599 self.state.push_message(recovery_msg);
600 continue;
601 }
602
603 let tool_calls = extract_tool_calls(&content_blocks);
605
606 if tool_calls.is_empty() {
607 info!("Turn complete (no tool calls)");
609 sink.on_turn_complete(turn + 1);
610 self.state.is_query_active = false;
611
612 if self.state.config.features.extract_memories
615 && crate::memory::ensure_memory_dir().is_some()
616 {
617 let extraction_messages = self.state.messages.clone();
618 let extraction_state = self.extraction_state.clone();
619 let extraction_llm = self.llm.clone();
620 let extraction_model = model.clone();
621 tokio::spawn(async move {
622 crate::memory::extraction::extract_memories_background(
623 extraction_messages,
624 extraction_state,
625 extraction_llm,
626 extraction_model,
627 )
628 .await;
629 });
630 }
631
632 return Ok(());
633 }
634
635 info!("Executing {} tool call(s)", tool_calls.len());
637 let cwd = PathBuf::from(&self.state.cwd);
638 let tool_ctx = ToolContext {
639 cwd,
640 cancel: self.cancel.clone(),
641 permission_checker: self.permissions.clone(),
642 verbose: self.config.verbose,
643 plan_mode: self.state.plan_mode,
644 file_cache: Some(self.file_cache.clone()),
645 denial_tracker: Some(self.denial_tracker.clone()),
646 task_manager: Some(self.state.task_manager.clone()),
647 session_allows: Some(self.session_allows.clone()),
648 permission_prompter: self.permission_prompter.clone(),
649 };
650
651 let streaming_ids: std::collections::HashSet<String> = streaming_tool_handles
653 .iter()
654 .map(|(id, _, _)| id.clone())
655 .collect();
656
657 let mut streaming_results = Vec::new();
658 for (id, name, handle) in streaming_tool_handles.drain(..) {
659 match handle.await {
660 Ok(result) => streaming_results.push(crate::tools::executor::ToolCallResult {
661 tool_use_id: id,
662 tool_name: name,
663 result,
664 }),
665 Err(e) => streaming_results.push(crate::tools::executor::ToolCallResult {
666 tool_use_id: id,
667 tool_name: name,
668 result: crate::tools::ToolResult::error(format!("Task failed: {e}")),
669 }),
670 }
671 }
672
673 for call in &tool_calls {
675 self.hooks
676 .run_hooks(&HookEvent::PreToolUse, Some(&call.name), &call.input)
677 .await;
678 }
679
680 let remaining_calls: Vec<_> = tool_calls
682 .iter()
683 .filter(|c| !streaming_ids.contains(&c.id))
684 .cloned()
685 .collect();
686
687 let mut results = streaming_results;
688 if !remaining_calls.is_empty() {
689 let batch_results = execute_tool_calls(
690 &remaining_calls,
691 self.tools.all(),
692 &tool_ctx,
693 &self.permissions,
694 )
695 .await;
696 results.extend(batch_results);
697 }
698
699 for result in &results {
701 if !result.result.is_error {
703 match result.tool_name.as_str() {
704 "EnterPlanMode" => {
705 self.state.plan_mode = true;
706 info!("Plan mode enabled");
707 }
708 "ExitPlanMode" => {
709 self.state.plan_mode = false;
710 info!("Plan mode disabled");
711 }
712 _ => {}
713 }
714 }
715
716 sink.on_tool_result(&result.tool_name, &result.result);
717
718 self.hooks
720 .run_hooks(
721 &HookEvent::PostToolUse,
722 Some(&result.tool_name),
723 &serde_json::json!({
724 "tool": result.tool_name,
725 "is_error": result.result.is_error,
726 }),
727 )
728 .await;
729
730 let msg = tool_result_message(
731 &result.tool_use_id,
732 &result.result.content,
733 result.result.is_error,
734 );
735 self.state.push_message(msg);
736 }
737
738 }
740
741 warn!("Max turns ({max_turns}) reached");
742 sink.on_warning(&format!("Agent stopped after {max_turns} turns"));
743 self.state.is_query_active = false;
744 Ok(())
745 }
746
747 pub fn cancel(&self) {
749 self.cancel.cancel();
750 }
751
752 pub fn cancel_token(&self) -> tokio_util::sync::CancellationToken {
754 self.cancel.clone()
755 }
756}
757
758fn get_fallback_model(current: &str) -> String {
760 let lower = current.to_lowercase();
761 if lower.contains("opus") {
762 current.replace("opus", "sonnet")
764 } else if (lower.contains("gpt-5.4") || lower.contains("gpt-4.1"))
765 && !lower.contains("mini")
766 && !lower.contains("nano")
767 {
768 format!("{current}-mini")
769 } else if lower.contains("large") {
770 current.replace("large", "small")
771 } else {
772 current.to_string()
774 }
775}
776
777pub fn build_system_prompt(tools: &ToolRegistry, state: &AppState) -> String {
779 let mut prompt = String::new();
780
781 prompt.push_str(
782 "You are an AI coding agent. You help users with software engineering tasks \
783 by reading, writing, and searching code. Use the tools available to you to \
784 accomplish tasks.\n\n",
785 );
786
787 let shell = std::env::var("SHELL").unwrap_or_else(|_| "bash".to_string());
789 let is_git = std::path::Path::new(&state.cwd).join(".git").exists();
790 prompt.push_str(&format!(
791 "# Environment\n\
792 - Working directory: {}\n\
793 - Platform: {}\n\
794 - Shell: {shell}\n\
795 - Git repository: {}\n\n",
796 state.cwd,
797 std::env::consts::OS,
798 if is_git { "yes" } else { "no" },
799 ));
800
801 let mut memory = crate::memory::MemoryContext::load(Some(std::path::Path::new(&state.cwd)));
803
804 let recent_text: String = state
806 .messages
807 .iter()
808 .rev()
809 .take(5)
810 .filter_map(|m| match m {
811 crate::llm::message::Message::User(u) => Some(
812 u.content
813 .iter()
814 .filter_map(|b| b.as_text())
815 .collect::<Vec<_>>()
816 .join(" "),
817 ),
818 _ => None,
819 })
820 .collect::<Vec<_>>()
821 .join(" ");
822
823 if !recent_text.is_empty() {
824 memory.load_relevant(&recent_text);
825 }
826
827 let memory_section = memory.to_system_prompt_section();
828 if !memory_section.is_empty() {
829 prompt.push_str(&memory_section);
830 }
831
832 prompt.push_str("# Available Tools\n\n");
834 for tool in tools.all() {
835 if tool.is_enabled() {
836 prompt.push_str(&format!("## {}\n{}\n\n", tool.name(), tool.prompt()));
837 }
838 }
839
840 let skills = crate::skills::SkillRegistry::load_all(Some(std::path::Path::new(&state.cwd)));
842 let invocable = skills.user_invocable();
843 if !invocable.is_empty() {
844 prompt.push_str("# Available Skills\n\n");
845 for skill in invocable {
846 let desc = skill.metadata.description.as_deref().unwrap_or("");
847 let when = skill.metadata.when_to_use.as_deref().unwrap_or("");
848 prompt.push_str(&format!("- `/{}`", skill.name));
849 if !desc.is_empty() {
850 prompt.push_str(&format!(": {desc}"));
851 }
852 if !when.is_empty() {
853 prompt.push_str(&format!(" (use when: {when})"));
854 }
855 prompt.push('\n');
856 }
857 prompt.push('\n');
858 }
859
860 prompt.push_str(
862 "# Using tools\n\n\
863 Use dedicated tools instead of shell commands when available:\n\
864 - File search: Glob (not find or ls)\n\
865 - Content search: Grep (not grep or rg)\n\
866 - Read files: FileRead (not cat/head/tail)\n\
867 - Edit files: FileEdit (not sed/awk)\n\
868 - Write files: FileWrite (not echo/cat with redirect)\n\
869 - Reserve Bash for system commands and operations that require shell execution.\n\
870 - Break complex tasks into steps. Use multiple tool calls in parallel when independent.\n\
871 - Use the Agent tool for complex multi-step research or tasks that benefit from isolation.\n\n\
872 # Working with code\n\n\
873 - Read files before editing them. Understand existing code before suggesting changes.\n\
874 - Prefer editing existing files over creating new ones to avoid file bloat.\n\
875 - Only make changes that were requested. Don't add features, refactor, add comments, \
876 or make \"improvements\" beyond the ask.\n\
877 - Don't add error handling for scenarios that can't happen. Don't design for \
878 hypothetical future requirements.\n\
879 - When referencing code, include file_path:line_number.\n\
880 - Be careful not to introduce security vulnerabilities (command injection, XSS, SQL injection, \
881 OWASP top 10). If you notice insecure code you wrote, fix it immediately.\n\
882 - Don't add docstrings, comments, or type annotations to code you didn't change.\n\
883 - Three similar lines of code is better than a premature abstraction.\n\n\
884 # Git safety protocol\n\n\
885 - NEVER update the git config.\n\
886 - NEVER run destructive git commands (push --force, reset --hard, checkout ., restore ., \
887 clean -f, branch -D) unless the user explicitly requests them.\n\
888 - NEVER skip hooks (--no-verify, --no-gpg-sign) unless the user explicitly requests it.\n\
889 - NEVER force push to main/master. Warn the user if they request it.\n\
890 - Always create NEW commits rather than amending, unless the user explicitly requests amend. \
891 After hook failure, the commit did NOT happen — amend would modify the PREVIOUS commit.\n\
892 - When staging files, prefer adding specific files by name rather than git add -A or git add ., \
893 which can accidentally include sensitive files.\n\
894 - NEVER commit changes unless the user explicitly asks.\n\n\
895 # Committing changes\n\n\
896 When the user asks to commit:\n\
897 1. Run git status and git diff to see all changes.\n\
898 2. Run git log --oneline -5 to match the repository's commit message style.\n\
899 3. Draft a concise (1-2 sentence) commit message focusing on \"why\" not \"what\".\n\
900 4. Do not commit files that likely contain secrets (.env, credentials.json).\n\
901 5. Stage specific files, create the commit.\n\
902 6. If pre-commit hook fails, fix the issue and create a NEW commit.\n\
903 7. When creating commits, include a co-author attribution line at the end of the message.\n\n\
904 # Creating pull requests\n\n\
905 When the user asks to create a PR:\n\
906 1. Run git status, git diff, and git log to understand all changes on the branch.\n\
907 2. Analyze ALL commits (not just the latest) that will be in the PR.\n\
908 3. Draft a short title (under 70 chars) and detailed body with summary and test plan.\n\
909 4. Push to remote with -u flag if needed, then create PR using gh pr create.\n\
910 5. Return the PR URL when done.\n\n\
911 # Executing actions safely\n\n\
912 Consider the reversibility and blast radius of every action:\n\
913 - Freely take local, reversible actions (editing files, running tests).\n\
914 - For hard-to-reverse or shared-state actions, confirm with the user first:\n\
915 - Destructive: deleting files/branches, dropping tables, rm -rf, overwriting uncommitted changes.\n\
916 - Hard to reverse: force-pushing, git reset --hard, amending published commits.\n\
917 - Visible to others: pushing code, creating/commenting on PRs/issues, sending messages.\n\
918 - When you encounter an obstacle, do not use destructive actions as a shortcut. \
919 Identify root causes and fix underlying issues.\n\
920 - If you discover unexpected state (unfamiliar files, branches, config), investigate \
921 before deleting or overwriting — it may be the user's in-progress work.\n\n\
922 # Response style\n\n\
923 - Be concise. Lead with the answer or action, not the reasoning.\n\
924 - Skip filler, preamble, and unnecessary transitions.\n\
925 - Don't restate what the user said.\n\
926 - If you can say it in one sentence, don't use three.\n\
927 - Focus output on: decisions that need input, status updates, and errors that change the plan.\n\
928 - When referencing GitHub issues or PRs, use owner/repo#123 format.\n\
929 - Only use emojis if the user explicitly requests it.\n\n\
930 # Memory\n\n\
931 You can save information across sessions by writing memory files.\n\
932 - Save to: ~/.config/agent-code/memory/ (one .md file per topic)\n\
933 - Each file needs YAML frontmatter: name, description, type (user/feedback/project/reference)\n\
934 - After writing a file, update MEMORY.md with a one-line pointer\n\
935 - Memory types: user (role, preferences), feedback (corrections, confirmations), \
936 project (decisions, deadlines), reference (external resources)\n\
937 - Do NOT store: code patterns, git history, debugging solutions, anything derivable from code\n\
938 - Memory is a hint — always verify against current state before acting on it\n",
939 );
940
941 prompt.push_str(
943 "# Tool usage patterns\n\n\
944 Common patterns for effective tool use:\n\n\
945 **Read before edit**: Always read a file before editing it. This ensures you \
946 understand the current state and can make targeted changes.\n\
947 ```\n\
948 1. FileRead file_path → understand structure\n\
949 2. FileEdit old_string, new_string → targeted change\n\
950 ```\n\n\
951 **Search then act**: Use Glob to find files, Grep to find content, then read/edit.\n\
952 ```\n\
953 1. Glob **/*.rs → find Rust files\n\
954 2. Grep pattern path → find specific code\n\
955 3. FileRead → read the match\n\
956 4. FileEdit → make the change\n\
957 ```\n\n\
958 **Parallel tool calls**: When you need to read multiple independent files or run \
959 independent searches, make all the tool calls in one response. Don't serialize \
960 independent operations.\n\n\
961 **Test after change**: After editing code, run tests to verify the change works.\n\
962 ```\n\
963 1. FileEdit → make change\n\
964 2. Bash cargo test / pytest / npm test → verify\n\
965 3. If tests fail, read the error, fix, re-test\n\
966 ```\n\n\
967 # Error recovery\n\n\
968 When something goes wrong:\n\
969 - **Tool not found**: Use ToolSearch to find the right tool name.\n\
970 - **Permission denied**: Explain why the action is needed, ask the user to approve.\n\
971 - **File not found**: Use Glob to find the correct path. Check for typos.\n\
972 - **Edit failed (not unique)**: Provide more surrounding context in old_string, \
973 or use replace_all=true if renaming.\n\
974 - **Command failed**: Read the full error message. Don't retry the same command. \
975 Diagnose the root cause first.\n\
976 - **Context too large**: The system will auto-compact. If you need specific \
977 information from before compaction, re-read the relevant files.\n\
978 - **Rate limited**: The system will auto-retry with backoff. Just wait.\n\n\
979 # Common workflows\n\n\
980 **Bug fix**: Read the failing test → read the source code it tests → \
981 identify the bug → fix it → run the test → confirm it passes.\n\n\
982 **New feature**: Read existing patterns in the codebase → create or edit files → \
983 add tests → run tests → update docs if needed.\n\n\
984 **Code review**: Read the diff → identify issues (bugs, security, style) → \
985 report findings with file:line references.\n\n\
986 **Refactor**: Search for all usages of the symbol → plan the changes → \
987 edit each file → run tests to verify nothing broke.\n\n",
988 );
989
990 if !state.config.mcp_servers.is_empty() {
992 prompt.push_str("# MCP Servers\n\n");
993 prompt.push_str(
994 "Connected MCP servers provide additional tools. MCP tools are prefixed \
995 with `mcp__{server}__{tool}`. Use them like any other tool.\n\n",
996 );
997 for (name, entry) in &state.config.mcp_servers {
998 let transport = if entry.command.is_some() {
999 "stdio"
1000 } else if entry.url.is_some() {
1001 "sse"
1002 } else {
1003 "unknown"
1004 };
1005 prompt.push_str(&format!("- **{name}** ({transport})\n"));
1006 }
1007 prompt.push('\n');
1008 }
1009
1010 let deferred = tools.deferred_names();
1012 if !deferred.is_empty() {
1013 prompt.push_str("# Deferred Tools\n\n");
1014 prompt.push_str(
1015 "These tools are available but not loaded by default. \
1016 Use ToolSearch to load them when needed:\n",
1017 );
1018 for name in &deferred {
1019 prompt.push_str(&format!("- {name}\n"));
1020 }
1021 prompt.push('\n');
1022 }
1023
1024 prompt.push_str(
1026 "# Task management\n\n\
1027 - Use TaskCreate to break complex work into trackable steps.\n\
1028 - Mark tasks as in_progress when starting, completed when done.\n\
1029 - Use the Agent tool to spawn subagents for parallel independent work.\n\
1030 - Use EnterPlanMode/ExitPlanMode for read-only exploration before making changes.\n\
1031 - Use EnterWorktree/ExitWorktree for isolated changes in git worktrees.\n\n\
1032 # Output formatting\n\n\
1033 - All text output is displayed to the user. Use GitHub-flavored markdown.\n\
1034 - Use fenced code blocks with language hints for code: ```rust, ```python, etc.\n\
1035 - Use inline `code` for file names, function names, and short code references.\n\
1036 - Use tables for structured comparisons.\n\
1037 - Use bullet lists for multiple items.\n\
1038 - Keep paragraphs short (2-3 sentences).\n\
1039 - Never output raw HTML or complex formatting — stick to standard markdown.\n",
1040 );
1041
1042 prompt
1043}