pawan/agent/
mod.rs

1//! Pawan Agent — core tool-calling loop and session management.
2//!
3//! Houses [`PawanAgent`], all LLM backends, session persistence,
4//! and the event stream. Wire types live in [`types`].
5
6pub mod types;
7pub use types::*;
8
9pub mod backend;
10mod preflight;
11pub mod events;
12pub mod session;
13#[cfg(feature = "git-sessions")]
14pub mod git_session;
15
16// Re-export event types for public API
17pub use events::{
18    AgentEvent, FinishReason, ThinkingDeltaEvent, ToolApprovalEvent,
19    ToolCompleteEvent, ToolStartEvent, TokenUsageInfo, TurnEndEvent,
20    TurnStartEvent, SessionEndEvent,
21};
22
23use crate::config::{LlmProvider, PawanConfig};
24use crate::coordinator::{CoordinatorResult, ToolCallingConfig, ToolCoordinator};
25use crate::credentials;
26use crate::tools::{ToolDefinition, ToolRegistry};
27use crate::{PawanError, Result};
28use backend::openai_compat::{OpenAiCompatBackend, OpenAiCompatConfig};
29use backend::LlmBackend;
30use serde_json::{json, Value};
31use std::path::PathBuf;
32use std::sync::Arc;
33use std::time::Instant;
34
35/// The main Pawan agent — handles conversation, tool calling, and self-healing.
36///
37/// This struct represents the core Pawan agent that handles:
38/// - Conversation history management
39/// - Tool calling with the LLM via pluggable backends
40/// - Streaming responses
41/// - Multiple LLM backends (NVIDIA API, Ollama, OpenAI)
42/// - Context management and token counting
43/// - Integration with Eruka for 3-tier memory injection
44pub struct PawanAgent {
45    /// Configuration
46    config: PawanConfig,
47    /// Tool registry
48    tools: ToolRegistry,
49    /// Conversation history
50    history: Vec<Message>,
51    /// Workspace root
52    workspace_root: PathBuf,
53    /// LLM backend
54    backend: Box<dyn LlmBackend>,
55
56    /// Estimated token count for current context
57    context_tokens_estimate: usize,
58
59    /// Eruka bridge for 3-tier memory injection
60    eruka: Option<crate::eruka_bridge::ErukaClient>,
61
62    /// Stable identifier for this agent instance's session — used as the
63    /// key for eruka sync_turn / on_pre_compress writes so turns from one
64    /// conversation cluster under the same path. Generated fresh in new(),
65    /// overwritten by resume_session() when loading an existing session.
66    session_id: String,
67
68    /// Per-turn architecture context loaded from `.pawan/arch.md` at init.
69    /// When present, prepended to every user message so key architectural
70    /// constraints stay visible even as tool-call history grows long.
71    arch_context: Option<String>,
72    /// Timestamp of last tool call completion for idle timeout tracking
73    last_tool_call_time: Option<Instant>,
74}
75
76/// Probe whether a local inference server is reachable at `url`.
77///
78/// Parses `host:port` from the URL and attempts a TCP connect with a 100 ms
79/// timeout. Returns `true` if the port is open, `false` on any error.
80/// This is intentionally cheap (no HTTP round-trip) so it can run at agent
81/// startup without perceptible latency.
82fn probe_local_endpoint(url: &str) -> bool {
83    use std::net::TcpStream;
84    use std::time::Duration;
85
86    // Strip scheme and path — we only need host:port
87    let hostport = url
88        .trim_start_matches("http://")
89        .trim_start_matches("https://")
90        .split('/')
91        .next()
92        .unwrap_or("");
93
94    // Ensure port is present; default http → 80, https → 443
95    let addr = if hostport.contains(':') {
96        hostport.to_string()
97    } else if url.starts_with("https://") {
98        format!("{hostport}:443")
99    } else {
100        format!("{hostport}:80")
101    };
102
103    // Normalise "localhost" → "127.0.0.1" so we don't accidentally resolve
104    // to ::1 (IPv6) when the listener is bound only to IPv4.
105    let addr = addr.replace("localhost", "127.0.0.1");
106
107    let socket_addr = match addr.parse() {
108        Ok(a) => a,
109        Err(_) => return false,
110    };
111
112    TcpStream::connect_timeout(&socket_addr, Duration::from_millis(100)).is_ok()
113}
114
115/// Retrieve an API key with fallback chain:
116/// 1. Environment variable
117/// 2. Secure credential store
118/// 3. Return None (caller should prompt user)
119///
120/// If the key is found in the secure store, it's also set as an env var
121/// for subsequent calls.
122fn get_api_key_with_secure_fallback(env_var: &str, key_name: &str) -> Option<String> {
123    // First, check environment variable
124    if let Ok(key) = std::env::var(env_var) {
125        return Some(key);
126    }
127
128    // Second, try secure credential store
129    match credentials::get_api_key(key_name) {
130        Ok(Some(key)) => {
131            // Cache in env var for subsequent calls
132            std::env::set_var(env_var, &key);
133            Some(key)
134        }
135        Ok(None) => None,
136        Err(e) => {
137            tracing::warn!("Failed to retrieve {} from secure store: {}", key_name, e);
138            None
139        }
140    }
141}
142
143/// Prompt user to enter an API key and store it securely.
144///
145/// This function:
146/// 1. Prompts the user to enter the API key
147/// 2. Stores it in the secure credential store
148/// 3. Sets it as an environment variable for the current session
149///
150/// Returns the entered key on success, or None if the user cancels.
151fn prompt_and_store_api_key(env_var: &str, key_name: &str, provider: &str) -> Option<String> {
152    eprintln!("\n🔑 {} API key not found.", provider);
153    eprintln!("You can set it via:");
154    eprintln!("  - Environment variable: export {}=<your-key>", env_var);
155    eprintln!("  - Interactive entry (recommended for security)");
156    eprintln!("\nEnter your {} API key:", provider);
157    eprintln!("  (Your key will be stored securely in the OS credential store)\n");
158
159    // Read input securely (no echo)
160    #[cfg(unix)]
161    let key = {
162        use std::io::{self, Write};
163        
164        // Use termios to disable echo on Unix
165        let mut stdout = io::stdout();
166        stdout.flush().ok();
167        
168        // Read password without echo
169        rpassword::prompt_password("> ").ok()
170    };
171
172    #[cfg(windows)]
173    let key = {
174        use std::io::{self, Write};
175        
176        let mut stdout = io::stdout();
177        stdout.flush().ok();
178        
179        // On Windows, use a simple prompt (rpassword handles this)
180        rpassword::prompt_password("> ").ok()
181    };
182
183    #[cfg(not(any(unix, windows)))]
184    let key = {
185        use std::io::{self, Write, BufRead};
186        
187        let mut stdout = io::stdout();
188        let mut stdin = io::stdin();
189        stdout.flush().ok();
190        print!("> ");
191        stdout.flush().ok();
192        
193        let mut input = String::new();
194        stdin.lock().read_line(&mut input).ok();
195        Some(input.trim().to_string())
196    };
197
198    match key {
199        Some(k) if !k.trim().is_empty() => {
200            let key = k.trim().to_string();
201            
202            // Store in secure credential store
203            match credentials::store_api_key(key_name, &key) {
204                Ok(()) => {
205                    tracing::info!("{} API key stored securely", provider);
206                    std::env::set_var(env_var, &key);
207                    Some(key)
208                }
209                Err(e) => {
210                    tracing::warn!("Failed to store key securely: {}. Using session-only.", e);
211                    std::env::set_var(env_var, &key);
212                    Some(key)
213                }
214            }
215        }
216        _ => {
217            eprintln!("\n⚠️  No key entered. {} will not work until a key is set.", provider);
218            None
219        }
220    }
221}
222
223/// Load per-turn architecture context from `<workspace_root>/.pawan/arch.md`.
224///
225/// Returns `None` if the file is absent or empty.
226/// Caps content at 2 000 chars to avoid context bloat from large files;
227/// an ellipsis marker is appended when truncation occurs.
228fn load_arch_context(workspace_root: &std::path::Path) -> Option<String> {
229    let path = workspace_root.join(".pawan").join("arch.md");
230    if !path.exists() {
231        return None;
232    }
233    match std::fs::read_to_string(&path) {
234        Ok(content) if !content.trim().is_empty() => {
235            const MAX_CHARS: usize = 2_000;
236            if content.len() > MAX_CHARS {
237                // Truncate on a char boundary
238                let boundary = content
239                    .char_indices()
240                    .map(|(i, _)| i)
241                    .nth(MAX_CHARS)
242                    .unwrap_or(content.len());
243                Some(format!("{}…(truncated)", &content[..boundary]))
244            } else {
245                Some(content)
246            }
247        }
248        _ => None,
249    }
250}
251
252impl PawanAgent {
253    /// Create a new PawanAgent with auto-selected backend
254    pub fn new(config: PawanConfig, workspace_root: PathBuf) -> Self {
255        let tools = ToolRegistry::with_defaults(workspace_root.clone());
256        let system_prompt = config.get_system_prompt();
257        let backend = Self::create_backend(&config, &system_prompt);
258        let eruka = if config.eruka.enabled {
259            Some(crate::eruka_bridge::ErukaClient::new(config.eruka.clone()))
260        } else {
261            None
262        };
263        let arch_context = load_arch_context(&workspace_root);
264
265        Self {
266            config,
267            tools,
268            history: Vec::new(),
269            workspace_root,
270            backend,
271            context_tokens_estimate: 0,
272            eruka,
273            session_id: uuid::Uuid::new_v4().to_string(),
274            arch_context,
275            last_tool_call_time: None,
276        }
277    }
278
279    /// Create the appropriate backend based on config.
280    ///
281    /// If `use_ares_backend` is true and the `ares` feature is compiled in,
282    /// delegates to ares-server's LLMClient (unified provider abstraction with
283    /// connection pooling). Otherwise uses pawan's built-in OpenAI-compatible
284    /// backend (the original path).
285    fn create_backend(config: &PawanConfig, system_prompt: &str) -> Box<dyn LlmBackend> {
286        // Local-inference-first cost guard: if enabled and the local server
287        // responds within 100 ms, route all traffic there instead of cloud.
288        if config.local_first {
289            let local_url = config
290                .local_endpoint
291                .clone()
292                .unwrap_or_else(|| "http://localhost:11434/v1".to_string());
293            if probe_local_endpoint(&local_url) {
294                tracing::info!(
295                    url = %local_url,
296                    model = %config.model,
297                    "local_first: local server reachable, using local inference"
298                );
299                return Box::new(OpenAiCompatBackend::new(
300                    backend::openai_compat::OpenAiCompatConfig {
301                        api_url: local_url,
302                        api_key: None,
303                        model: config.model.clone(),
304                        temperature: config.temperature,
305                        top_p: config.top_p,
306                        max_tokens: config.max_tokens,
307                        system_prompt: system_prompt.to_string(),
308                        use_thinking: false,
309                        max_retries: config.max_retries,
310                        fallback_models: Vec::new(),
311                        cloud: None,
312                    },
313                ));
314            }
315            tracing::info!(
316                url = %local_url,
317                "local_first: local server unreachable, falling back to cloud provider"
318            );
319        }
320
321        // Try ares backend first if requested
322        if config.use_ares_backend {
323            if let Some(backend) = Self::try_create_ares_backend(config, system_prompt) {
324                return backend;
325            }
326            tracing::warn!(
327                "use_ares_backend=true but ares backend creation failed; \
328                 falling back to pawan's native backend"
329            );
330        }
331
332        match config.provider {
333        LlmProvider::Nvidia | LlmProvider::OpenAI | LlmProvider::Mlx => {
334            let (api_url, api_key) = match config.provider {
335                LlmProvider::Nvidia => {
336                    let url = std::env::var("NVIDIA_API_URL")
337                        .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
338                    
339                    // Try to get key from env or secure store
340                    let key = get_api_key_with_secure_fallback("NVIDIA_API_KEY", "nvidia_api_key");
341                    
342                    // If no key found, prompt user
343                    let key = if key.is_none() {
344                        prompt_and_store_api_key("NVIDIA_API_KEY", "nvidia_api_key", "NVIDIA")
345                    } else {
346                        key
347                    };
348                    
349                    if key.is_none() {
350                        tracing::warn!("NVIDIA_API_KEY not set. Model calls will fail until a key is provided.");
351                    }
352                    (url, key)
353                },
354                LlmProvider::OpenAI => {
355                    let url = config.base_url.clone()
356                        .or_else(|| std::env::var("OPENAI_API_URL").ok())
357                        .unwrap_or_else(|| "https://api.openai.com/v1".to_string());
358                    
359                    let key = get_api_key_with_secure_fallback("OPENAI_API_KEY", "openai_api_key");
360                    let key = if key.is_none() {
361                        prompt_and_store_api_key("OPENAI_API_KEY", "openai_api_key", "OpenAI")
362                    } else {
363                        key
364                    };
365                    
366                    (url, key)
367                },
368                LlmProvider::Mlx => {
369                    // MLX LM server — Apple Silicon native, always local
370                    let url = config.base_url.clone()
371                        .unwrap_or_else(|| "http://localhost:8080/v1".to_string());
372                    tracing::info!(url = %url, "Using MLX LM server (Apple Silicon native)");
373                    (url, None) // mlx_lm.server requires no API key
374                },
375                _ => unreachable!(),
376            };
377                
378                // Build cloud fallback if configured
379                let cloud = config.cloud.as_ref().map(|c| {
380                    let (cloud_url, cloud_key) = match c.provider {
381                        LlmProvider::Nvidia => {
382                            let url = std::env::var("NVIDIA_API_URL")
383                                .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
384                            let key = get_api_key_with_secure_fallback("NVIDIA_API_KEY", "nvidia_api_key");
385                            (url, key)
386                        },
387                        LlmProvider::OpenAI => {
388                            let url = std::env::var("OPENAI_API_URL")
389                                .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
390                            let key = get_api_key_with_secure_fallback("OPENAI_API_KEY", "openai_api_key");
391                            (url, key)
392                        },
393                        LlmProvider::Mlx => {
394                            ("http://localhost:8080/v1".to_string(), None)
395                        },
396                        _ => {
397                            tracing::warn!("Cloud fallback only supports nvidia/openai/mlx providers");
398                            ("https://integrate.api.nvidia.com/v1".to_string(), None)
399                        }
400                    };
401                    backend::openai_compat::CloudFallback {
402                        api_url: cloud_url,
403                        api_key: cloud_key,
404                        model: c.model.clone(),
405                        fallback_models: c.fallback_models.clone(),
406                    }
407                });
408
409                Box::new(OpenAiCompatBackend::new(OpenAiCompatConfig {
410                    api_url,
411                    api_key,
412                    model: config.model.clone(),
413                    temperature: config.temperature,
414                    top_p: config.top_p,
415                    max_tokens: config.max_tokens,
416                    system_prompt: system_prompt.to_string(),
417                    // Enforce thinking budget: if set, disable thinking entirely
418                    // and give all tokens to action output
419                    use_thinking: config.thinking_budget == 0 && config.use_thinking_mode(),
420                    max_retries: config.max_retries,
421                    fallback_models: config.fallback_models.clone(),
422                    cloud,
423                }))
424            }
425            LlmProvider::Ollama => {
426                let url = std::env::var("OLLAMA_URL")
427                    .unwrap_or_else(|_| "http://localhost:11434".to_string());
428
429                Box::new(backend::ollama::OllamaBackend::new(
430                    url,
431                    config.model.clone(),
432                    config.temperature,
433                    system_prompt.to_string(),
434                ))
435            }
436        }
437    }
438
439    /// Try to construct an ares-backed LLM backend from pawan config.
440    /// Returns `None` if the provider isn't supported by ares or required
441    /// credentials are missing — the caller should fall back to pawan's
442    /// native backend.
443    fn try_create_ares_backend(
444        config: &PawanConfig,
445        system_prompt: &str,
446    ) -> Option<Box<dyn LlmBackend>> {
447        use ares::llm::client::{ModelParams, Provider};
448
449        // Map pawan LlmProvider → ares Provider variants.
450        // ares supports: OpenAI (with custom base_url), Ollama, LlamaCpp, Anthropic.
451        // Pawan's Nvidia/OpenAI/Mlx all use OpenAI-compatible endpoints, so they
452        // all map to ares Provider::OpenAI with different base URLs.
453        let params = ModelParams {
454            temperature: Some(config.temperature),
455            max_tokens: Some(config.max_tokens as u32),
456            top_p: Some(config.top_p),
457            frequency_penalty: None,
458            presence_penalty: None,
459        };
460
461        let provider = match config.provider {
462            LlmProvider::Nvidia => {
463                let api_base = std::env::var("NVIDIA_API_URL")
464                    .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
465                let api_key = std::env::var("NVIDIA_API_KEY").ok()?;
466                Provider::OpenAI {
467                    api_key,
468                    api_base,
469                    model: config.model.clone(),
470                    params,
471                }
472            }
473            LlmProvider::OpenAI => {
474                let api_base = config
475                    .base_url
476                    .clone()
477                    .or_else(|| std::env::var("OPENAI_API_URL").ok())
478                    .unwrap_or_else(|| "https://api.openai.com/v1".to_string());
479                let api_key = std::env::var("OPENAI_API_KEY").unwrap_or_default();
480                Provider::OpenAI {
481                    api_key,
482                    api_base,
483                    model: config.model.clone(),
484                    params,
485                }
486            }
487            LlmProvider::Mlx => {
488                // MLX LM server is OpenAI-compatible, no API key needed
489                let api_base = config
490                    .base_url
491                    .clone()
492                    .unwrap_or_else(|| "http://localhost:8080/v1".to_string());
493                Provider::OpenAI {
494                    api_key: String::new(),
495                    api_base,
496                    model: config.model.clone(),
497                    params,
498                }
499            }
500            LlmProvider::Ollama => {
501                // Ares Ollama client is async-constructed (async with_params),
502                // which doesn't fit pawan's sync PawanAgent::new path.
503                // Fall back to pawan's native OllamaBackend for now.
504                return None;
505            }
506        };
507
508        // OpenAI variants construct synchronously — we skip the async
509        // Provider::create_client() entirely for sync construction.
510        let client: Box<dyn ares::llm::LLMClient> = match provider {
511            Provider::OpenAI {
512                api_key,
513                api_base,
514                model,
515                params,
516            } => Box::new(ares::llm::openai::OpenAIClient::with_params(
517                api_key, api_base, model, params,
518            )),
519            _ => return None,
520        };
521
522        tracing::info!(
523            provider = ?config.provider,
524            model = %config.model,
525            "Using ares-backed LLM backend"
526        );
527
528        Some(Box::new(backend::ares_backend::AresBackend::new(
529            client,
530            system_prompt.to_string(),
531        )))
532    }
533
534    /// Create with a specific tool registry
535    pub fn with_tools(mut self, tools: ToolRegistry) -> Self {
536        self.tools = tools;
537        self
538    }
539
540    /// Get mutable access to the tool registry (for registering MCP tools)
541    pub fn tools_mut(&mut self) -> &mut ToolRegistry {
542        &mut self.tools
543    }
544
545    /// Create with a custom backend
546    pub fn with_backend(mut self, backend: Box<dyn LlmBackend>) -> Self {
547        self.backend = backend;
548        self
549    }
550
551    /// Get the current conversation history
552    pub fn history(&self) -> &[Message] {
553        &self.history
554    }
555
556    /// Save current conversation as a session, returns session ID
557    pub fn save_session(&self) -> Result<String> {
558        let mut session = session::Session::new(&self.config.model);
559        session.messages = self.history.clone();
560        session.total_tokens = self.context_tokens_estimate as u64;
561        session.save()?;
562        Ok(session.id)
563    }
564
565    /// Resume a saved session by ID
566    pub fn resume_session(&mut self, session_id: &str) -> Result<()> {
567        let session = session::Session::load(session_id)?;
568        self.history = session.messages;
569        self.context_tokens_estimate = session.total_tokens as usize;
570        // Adopt the loaded session's id so eruka writes cluster under the
571        // same key as the on-disk session.
572        self.session_id = session_id.to_string();
573        Ok(())
574    }
575
576    /// Archive the current conversation to Eruka's context store. Safe to
577    /// call from any async context; returns Ok even when eruka is disabled
578    /// or unreachable so callers can fire-and-forget after save_session().
579    pub async fn archive_to_eruka(&self) -> Result<()> {
580        let Some(eruka) = &self.eruka else {
581            return Ok(());
582        };
583        let mut session = session::Session::new(&self.config.model);
584        session.id = self.session_id.clone();
585        session.messages = self.history.clone();
586        session.total_tokens = self.context_tokens_estimate as u64;
587        eruka.archive_session(&session).await
588    }
589
590    /// Build a compact snapshot of the current history for on_pre_compress.
591    /// Keeps message role + first 200 chars per entry so the eruka write
592    /// stays bounded even with huge histories.
593    fn history_snapshot_for_eruka(history: &[Message]) -> String {
594        let mut out = String::with_capacity(2048);
595        for msg in history {
596            let prefix = match msg.role {
597                Role::User => "U: ",
598                Role::Assistant => "A: ",
599                Role::Tool => "T: ",
600                Role::System => "S: ",
601            };
602            let body: String = msg.content.chars().take(200).collect();
603            out.push_str(prefix);
604            out.push_str(&body);
605            out.push('\n');
606            if out.len() > 4000 {
607                break;
608            }
609        }
610        out
611    }
612
613    /// Get the configuration
614    pub fn config(&self) -> &PawanConfig {
615        &self.config
616    }
617
618    /// Clear the conversation history
619    pub fn clear_history(&mut self) {
620        self.history.clear();
621    }
622    /// Prune conversation history to reduce context size.
623    /// Uses importance scoring (inspired by claude-code-rust's consolidation engine):
624    /// - Tool results with errors: high importance (learning from failures)
625    /// - User messages: medium importance (intent context)
626    /// - Successful tool results: low importance (can be re-derived)
627    ///
628    /// Keeps system prompt + last 4 messages, summarizes the rest.
629    fn prune_history(&mut self) {
630        let len = self.history.len();
631        if len <= 5 {
632            return; // Nothing to prune
633        }
634
635        let keep_end = 4;
636        let start = 1; // Skip system prompt at index 0
637        let end = len - keep_end;
638        let pruned_count = end - start;
639
640        // Score messages by importance for summary prioritization
641        let mut scored: Vec<(f32, &Message)> = self.history[start..end]
642            .iter()
643            .map(|msg| {
644                let score = Self::message_importance(msg);
645                (score, msg)
646            })
647            .collect();
648        scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
649
650        // Build summary from highest-importance messages first (UTF-8 safe)
651        let mut summary = String::with_capacity(2048);
652        for (score, msg) in &scored {
653            let prefix = match msg.role {
654                Role::User => "User: ",
655                Role::Assistant => "Assistant: ",
656                Role::Tool => if *score > 0.7 { "Tool error: " } else { "Tool: " },
657                Role::System => "System: ",
658            };
659            let chunk: String = msg.content.chars().take(200).collect();
660            summary.push_str(prefix);
661            summary.push_str(&chunk);
662            summary.push('\n');
663            if summary.len() > 2000 {
664                let safe_end = summary.char_indices()
665                    .take_while(|(i, _)| *i <= 2000)
666                    .last()
667                    .map(|(i, c)| i + c.len_utf8())
668                    .unwrap_or(0);
669                summary.truncate(safe_end);
670                break;
671            }
672        }
673
674        let summary_msg = Message {
675            role: Role::System,
676            content: format!("Previous conversation summary (pruned {} messages, importance-ranked): {}", pruned_count, summary),
677            tool_calls: vec![],
678            tool_result: None,
679        };
680
681        self.history.drain(start..end);
682        self.history.insert(start, summary_msg);
683
684        tracing::info!(pruned = pruned_count, context_estimate = self.context_tokens_estimate, "Pruned messages from history (importance-ranked)");
685    }
686
687    /// Score a message's importance for pruning decisions (0.0-1.0).
688    /// Higher = more important = kept in summary.
689    fn message_importance(msg: &Message) -> f32 {
690        match msg.role {
691            Role::User => 0.6,       // User intent is moderately important
692            Role::System => 0.3,     // System messages are usually ephemeral
693            Role::Assistant => {
694                if msg.content.contains("error") || msg.content.contains("Error") { 0.8 }
695                else { 0.4 }
696            }
697            Role::Tool => {
698                if let Some(ref result) = msg.tool_result {
699                    if !result.success { 0.9 }  // Failed tools are very important (learning)
700                    else { 0.2 }                 // Successful tools can be re-derived
701                } else {
702                    0.3
703                }
704            }
705        }
706    }
707
708    /// Add a message to history
709    pub fn add_message(&mut self, message: Message) {
710        self.history.push(message);
711    }
712
713    /// Switch the LLM model at runtime. Recreates the backend with the new model.
714    pub fn switch_model(&mut self, model: &str) {
715        self.config.model = model.to_string();
716        let system_prompt = self.config.get_system_prompt();
717        self.backend = Self::create_backend(&self.config, &system_prompt);
718        tracing::info!(model = model, "Model switched at runtime");
719    }
720
721    /// Get the current model name
722    pub fn model_name(&self) -> &str {
723        &self.config.model
724    }
725
726    /// Get tool definitions for the LLM
727    pub fn get_tool_definitions(&self) -> Vec<ToolDefinition> {
728        self.tools.get_definitions()
729    }
730
731    /// Execute a single prompt with tool calling support
732    pub async fn execute(&mut self, user_prompt: &str) -> Result<AgentResponse> {
733        self.execute_with_callbacks(user_prompt, None, None, None)
734            .await
735    }
736
737    /// Execute with optional callbacks for streaming
738    pub async fn execute_with_callbacks(
739        &mut self,
740        user_prompt: &str,
741        on_token: Option<TokenCallback>,
742        on_tool: Option<ToolCallback>,
743        on_tool_start: Option<ToolStartCallback>,
744    ) -> Result<AgentResponse> {
745        self.execute_with_all_callbacks(user_prompt, on_token, on_tool, on_tool_start, None)
746            .await
747    }
748
749    /// Execute with all callbacks, including permission prompt.
750    pub async fn execute_with_all_callbacks(
751        &mut self,
752        user_prompt: &str,
753        on_token: Option<TokenCallback>,
754        on_tool: Option<ToolCallback>,
755        on_tool_start: Option<ToolStartCallback>,
756        on_permission: Option<PermissionCallback>,
757    ) -> Result<AgentResponse> {
758        // Check if coordinator mode is enabled
759        if self.config.use_coordinator {
760            // Coordinator mode does not support callbacks or permission prompts
761            if on_token.is_some() || on_tool.is_some() || on_tool_start.is_some() || on_permission.is_some() {
762                tracing::warn!(
763                    "Callbacks and permission prompts are not supported in coordinator mode; ignoring them"
764                );
765            }
766            return self.execute_with_coordinator(user_prompt).await;
767        }
768
769        // Reset idle timeout for the new turn
770        self.last_tool_call_time = None;
771
772        // Inject Eruka core memory before first LLM call
773        if let Some(eruka) = &self.eruka {
774            if let Err(e) = eruka.inject_core_memory(&mut self.history).await {
775                tracing::warn!("Eruka memory injection failed (non-fatal): {}", e);
776            }
777
778            // Prefetch task-relevant context: semantic search + compressed
779            // general context. Inject as a system message so the LLM can
780            // draw on prior-session context for the same query. Non-fatal.
781            match eruka.prefetch(user_prompt, 2000).await {
782                Ok(Some(ctx)) => {
783                    self.history.push(Message {
784                        role: Role::System,
785                        content: ctx,
786                        tool_calls: vec![],
787                        tool_result: None,
788                    });
789                }
790                Ok(None) => {}
791                Err(e) => tracing::warn!("Eruka prefetch failed (non-fatal): {}", e),
792            }
793        }
794
795        // Per-turn architecture context injection: prepend .pawan/arch.md content
796        // so key constraints stay visible even as tool-call history grows long.
797        let effective_prompt = match &self.arch_context {
798            Some(ctx) => format!(
799                "[Workspace Architecture]\n{ctx}\n[/Workspace Architecture]\n\n{user_prompt}"
800            ),
801            None => user_prompt.to_string(),
802        };
803
804        self.history.push(Message {
805            role: Role::User,
806            content: effective_prompt,
807            tool_calls: vec![],
808            tool_result: None,
809        });
810
811        let mut all_tool_calls = Vec::new();
812        let mut total_usage = TokenUsage::default();
813        let mut iterations = 0;
814        let max_iterations = self.config.max_tool_iterations;
815
816        loop {
817            // Check idle timeout
818            if let Some(last_time) = self.last_tool_call_time {
819                let elapsed = last_time.elapsed().as_secs();
820                if elapsed > self.config.tool_call_idle_timeout_secs {
821                    return Err(PawanError::Agent(format!(
822                        "Tool idle timeout exceeded ({}s > {}s)",
823                        elapsed, self.config.tool_call_idle_timeout_secs
824                    )));
825                }
826            }
827
828            iterations += 1;
829            if iterations > max_iterations {
830                return Err(PawanError::Agent(format!(
831                    "Max tool iterations ({}) exceeded",
832                    max_iterations
833                )));
834            }
835
836            // Budget awareness: when running low on iterations, nudge the model
837            let remaining = max_iterations.saturating_sub(iterations);
838            if remaining == 3 && iterations > 1 {
839                self.history.push(Message {
840                    role: Role::User,
841                    content: format!(
842                        "[SYSTEM] You have {} tool iterations remaining. \
843                         Stop exploring and write the most important output now. \
844                         If you have code to write, write it immediately.",
845                        remaining
846                    ),
847                    tool_calls: vec![],
848                    tool_result: None,
849                });
850            }
851            // Estimate context tokens
852            self.context_tokens_estimate = self.history.iter().map(|m| m.content.len()).sum::<usize>() / 4;
853            if self.context_tokens_estimate > self.config.max_context_tokens {
854                // Snapshot pre-compression content to Eruka so the facts
855                // being discarded survive the prune. Non-fatal.
856                if let Some(eruka) = &self.eruka {
857                    let snapshot = Self::history_snapshot_for_eruka(&self.history);
858                    if let Err(e) = eruka.on_pre_compress(&snapshot, &self.session_id).await {
859                        tracing::warn!("Eruka on_pre_compress failed (non-fatal): {}", e);
860                    }
861                }
862                self.prune_history();
863            }
864
865            // Dynamic tool selection: pick the most relevant tools for this query
866            // Extract latest user message for keyword matching
867            let latest_query = self.history.iter().rev()
868                .find(|m| m.role == Role::User)
869                .map(|m| m.content.as_str())
870                .unwrap_or("");
871            let tool_defs = self.tools.select_for_query(latest_query, 12);
872            if iterations == 1 {
873                let tool_names: Vec<&str> = tool_defs.iter().map(|t| t.name.as_str()).collect();
874                tracing::info!(tools = ?tool_names, count = tool_defs.len(), "Selected tools for query");
875            }
876
877            // Update idle timeout tracker before LLM call to track time spent in generation
878            self.last_tool_call_time = Some(Instant::now());
879
880            // --- Resilient LLM call: retry on transient failures instead of crashing ---
881            let response = {
882                #[allow(unused_assignments)]
883                let mut last_err = None;
884                let max_llm_retries = 3;
885                let mut attempt = 0;
886                loop {
887                    attempt += 1;
888                    match self.backend.generate(&self.history, &tool_defs, on_token.as_ref()).await {
889                        Ok(resp) => break resp,
890                        Err(e) => {
891                            let err_str = e.to_string();
892                            let is_transient = err_str.contains("timeout")
893                                || err_str.contains("connection")
894                                || err_str.contains("429")
895                                || err_str.contains("500")
896                                || err_str.contains("502")
897                                || err_str.contains("503")
898                                || err_str.contains("504")
899                                || err_str.contains("reset")
900                                || err_str.contains("broken pipe");
901
902                            if is_transient && attempt <= max_llm_retries {
903                                let delay = std::time::Duration::from_secs(2u64.pow(attempt as u32));
904                                tracing::warn!(
905                                    attempt = attempt,
906                                    delay_secs = delay.as_secs(),
907                                    error = err_str.as_str(),
908                                    "LLM call failed (transient) — retrying"
909                                );
910                                tokio::time::sleep(delay).await;
911
912                                // If context is too large, prune before retry
913                                if err_str.contains("context") || err_str.contains("token") {
914                                    tracing::info!("Pruning history before retry (possible context overflow)");
915                                    if let Some(eruka) = &self.eruka {
916                                        let snapshot = Self::history_snapshot_for_eruka(&self.history);
917                                        if let Err(e) = eruka.on_pre_compress(&snapshot, &self.session_id).await {
918                                            tracing::warn!("Eruka on_pre_compress failed (non-fatal): {}", e);
919                                        }
920                                    }
921                                    self.prune_history();
922                                }
923                                continue;
924                            }
925
926                            // Non-transient or max retries exhausted
927                            last_err = Some(e);
928                            break {
929                                // Return a synthetic "give up" response instead of crashing
930                                tracing::error!(
931                                    attempt = attempt,
932                                    error = last_err.as_ref().map(|e| e.to_string()).unwrap_or_default().as_str(),
933                                    "LLM call failed permanently — returning error as content"
934                                );
935                                LLMResponse {
936                                    content: format!(
937                                        "LLM error after {} attempts: {}. The task could not be completed.",
938                                        attempt,
939                                        last_err.as_ref().map(|e| e.to_string()).unwrap_or_default()
940                                    ),
941                                    reasoning: None,
942                                    tool_calls: vec![],
943                                    finish_reason: "error".to_string(),
944                                    usage: None,
945                                }
946                            };
947                        }
948                    }
949                }
950            };
951
952            // Accumulate token usage with thinking/action split
953            if let Some(ref usage) = response.usage {
954                total_usage.prompt_tokens += usage.prompt_tokens;
955                total_usage.completion_tokens += usage.completion_tokens;
956                total_usage.total_tokens += usage.total_tokens;
957                total_usage.reasoning_tokens += usage.reasoning_tokens;
958                total_usage.action_tokens += usage.action_tokens;
959
960                // Log token budget split per iteration
961                if usage.reasoning_tokens > 0 {
962                    tracing::info!(
963                        iteration = iterations,
964                        think = usage.reasoning_tokens,
965                        act = usage.action_tokens,
966                        total = usage.completion_tokens,
967                        "Token budget: think:{} act:{} (total:{})",
968                        usage.reasoning_tokens, usage.action_tokens, usage.completion_tokens
969                    );
970                }
971
972                // Thinking budget enforcement
973                let thinking_budget = self.config.thinking_budget;
974                if thinking_budget > 0 && usage.reasoning_tokens > thinking_budget as u64 {
975                    tracing::warn!(
976                        budget = thinking_budget,
977                        actual = usage.reasoning_tokens,
978                        "Thinking budget exceeded ({}/{} tokens)",
979                        usage.reasoning_tokens, thinking_budget
980                    );
981                }
982            }
983
984            // --- Guardrail: strip thinking blocks from content ---
985            let clean_content = {
986                let mut s = response.content.clone();
987                loop {
988                    let lower = s.to_lowercase();
989                    let open = lower.find("<think>");
990                    let close = lower.find("</think>");
991                    match (open, close) {
992                        (Some(i), Some(j)) if j > i => {
993                            let before = s[..i].trim_end().to_string();
994                            let after = if s.len() > j + 8 { s[j + 8..].trim_start().to_string() } else { String::new() };
995                            s = if before.is_empty() { after } else if after.is_empty() { before } else { format!("{}\n{}", before, after) };
996                        }
997                        _ => break,
998                    }
999                }
1000                s
1001            };
1002
1003            if response.tool_calls.is_empty() {
1004                // --- Guardrail: detect chatty no-op (content but no tools on early iterations) ---
1005                // Only nudge if tools are available AND response looks like planning text (not a real answer)
1006                let has_tools = !tool_defs.is_empty();
1007                let lower = clean_content.to_lowercase();
1008                let planning_prefix = lower.starts_with("let me")
1009                    || lower.starts_with("i'll help")
1010                    || lower.starts_with("i will help")
1011                    || lower.starts_with("sure, i")
1012                    || lower.starts_with("okay, i");
1013                let looks_like_planning = clean_content.len() > 200 || (planning_prefix && clean_content.len() > 50);
1014                if has_tools && looks_like_planning && iterations == 1 && iterations < max_iterations && response.finish_reason != "error" {
1015                    tracing::warn!(
1016                        "No tool calls at iteration {} (content: {}B) — nudging model to use tools",
1017                        iterations, clean_content.len()
1018                    );
1019                    self.history.push(Message {
1020                        role: Role::Assistant,
1021                        content: clean_content.clone(),
1022                        tool_calls: vec![],
1023                        tool_result: None,
1024                    });
1025                    self.history.push(Message {
1026                        role: Role::User,
1027                        content: "You must use tools to complete this task. Do NOT just describe what you would do — actually call the tools. Start with bash or read_file.".to_string(),
1028                        tool_calls: vec![],
1029                        tool_result: None,
1030                    });
1031                    continue;
1032                }
1033
1034                // --- Guardrail: detect repeated responses ---
1035                if iterations > 1 {
1036                    let prev_assistant = self.history.iter().rev()
1037                        .find(|m| m.role == Role::Assistant && !m.content.is_empty());
1038                    if let Some(prev) = prev_assistant {
1039                        if prev.content.trim() == clean_content.trim() && iterations < max_iterations {
1040                            tracing::warn!("Repeated response detected at iteration {} — injecting correction", iterations);
1041                            self.history.push(Message {
1042                                role: Role::Assistant,
1043                                content: clean_content.clone(),
1044                                tool_calls: vec![],
1045                                tool_result: None,
1046                            });
1047                            self.history.push(Message {
1048                                role: Role::User,
1049                                content: "You gave the same response as before. Try a different approach. Use anchor_text in edit_file_lines, or use insert_after, or use bash with sed.".to_string(),
1050                                tool_calls: vec![],
1051                                tool_result: None,
1052                            });
1053                            continue;
1054                        }
1055                    }
1056                }
1057
1058                self.history.push(Message {
1059                    role: Role::Assistant,
1060                    content: clean_content.clone(),
1061                    tool_calls: vec![],
1062                    tool_result: None,
1063                });
1064
1065                // Persist this completed turn to Eruka so future prefetches
1066                // and sessions can pull from it. Non-fatal on any error.
1067                if let Some(eruka) = &self.eruka {
1068                    if let Err(e) = eruka
1069                        .sync_turn(user_prompt, &clean_content, &self.session_id)
1070                        .await
1071                    {
1072                        tracing::warn!("Eruka sync_turn failed (non-fatal): {}", e);
1073                    }
1074                }
1075
1076                return Ok(AgentResponse {
1077                    content: clean_content,
1078                    tool_calls: all_tool_calls,
1079                    iterations,
1080                    usage: total_usage,
1081                });
1082            }
1083
1084            self.history.push(Message {
1085                role: Role::Assistant,
1086                content: response.content.clone(),
1087                tool_calls: response.tool_calls.clone(),
1088                tool_result: None,
1089            });
1090
1091            for tool_call in &response.tool_calls {
1092                // Auto-activate extended tools on first use (makes them visible in next iteration)
1093                self.tools.activate(&tool_call.name);
1094
1095                // Check permission (Deny and Prompt-in-headless both block)
1096                let perm = crate::config::ToolPermission::resolve(
1097                    &tool_call.name, &self.config.permissions
1098                );
1099                let denied = match perm {
1100                    crate::config::ToolPermission::Deny => Some("Tool denied by permission policy"),
1101                    crate::config::ToolPermission::Prompt => {
1102                        // For bash: auto-allow read-only commands even under Prompt
1103                        if tool_call.name == "bash" {
1104                            if let Some(cmd) = tool_call.arguments.get("command").and_then(|v| v.as_str()) {
1105                                if crate::tools::bash::is_read_only(cmd) {
1106                                    tracing::debug!(command = cmd, "Auto-allowing read-only bash command under Prompt permission");
1107                                    None
1108                                } else if let Some(ref perm_cb) = on_permission {
1109                                    // Ask TUI for approval
1110                                    let args_summary = cmd.chars().take(120).collect::<String>();
1111                                    let rx = perm_cb(PermissionRequest {
1112                                        tool_name: tool_call.name.clone(),
1113                                        args_summary,
1114                                    });
1115                                    match rx.await {
1116                                        Ok(true) => None,
1117                                        _ => Some("User denied tool execution"),
1118                                    }
1119                                } else {
1120                                    Some("Bash command requires user approval (read-only commands auto-allowed)")
1121                                }
1122                            } else {
1123                                Some("Tool requires user approval")
1124                            }
1125                        } else if let Some(ref perm_cb) = on_permission {
1126                            // Ask TUI for approval
1127                            let args_summary = tool_call.arguments.to_string().chars().take(120).collect::<String>();
1128                            let rx = perm_cb(PermissionRequest {
1129                                tool_name: tool_call.name.clone(),
1130                                args_summary,
1131                            });
1132                            match rx.await {
1133                                Ok(true) => None,
1134                                _ => Some("User denied tool execution"),
1135                            }
1136                        } else {
1137                            // Headless = deny for safety
1138                            Some("Tool requires user approval (set permission to 'allow' or use TUI mode)")
1139                        }
1140                    }
1141                    crate::config::ToolPermission::Allow => None,
1142                };
1143                if let Some(reason) = denied {
1144                    let record = ToolCallRecord {
1145                        id: tool_call.id.clone(),
1146                        name: tool_call.name.clone(),
1147                        arguments: tool_call.arguments.clone(),
1148                        result: json!({"error": reason}),
1149                        success: false,
1150                        duration_ms: 0,
1151                    };
1152
1153                    if let Some(ref callback) = on_tool {
1154                        callback(&record);
1155                    }
1156                    all_tool_calls.push(record);
1157
1158                    self.history.push(Message {
1159                        role: Role::Tool,
1160                        content: format!("{{\"error\": \"{}\"}}", reason),
1161                        tool_calls: vec![],
1162                        tool_result: Some(ToolResultMessage {
1163                            tool_call_id: tool_call.id.clone(),
1164                            content: json!({"error": reason}),
1165                            success: false,
1166                        }),
1167                    });
1168                    continue;
1169                }
1170
1171                // Notify tool start
1172                if let Some(ref callback) = on_tool_start {
1173                    callback(&tool_call.name);
1174                }
1175
1176                // Debug: log tool call args for diagnosis
1177                tracing::debug!(
1178                    tool = tool_call.name.as_str(),
1179                    args_len = serde_json::to_string(&tool_call.arguments).unwrap_or_default().len(),
1180                    "Tool call: {}({})",
1181                    tool_call.name,
1182                    serde_json::to_string(&tool_call.arguments)
1183                        .unwrap_or_default()
1184                        .chars()
1185                        .take(200)
1186                        .collect::<String>()
1187                );
1188
1189                // Validate tool arguments using thulp-core (DRY: reuse thulp's validation)
1190                if let Some(tool) = self.tools.get(&tool_call.name) {
1191                    let schema = tool.parameters_schema();
1192                    if let Ok(params) = thulp_core::ToolDefinition::parse_mcp_input_schema(&schema) {
1193                        let thulp_def = thulp_core::ToolDefinition {
1194                            name: tool_call.name.clone(),
1195                            description: String::new(),
1196                            parameters: params,
1197                        };
1198                        if let Err(e) = thulp_def.validate_args(&tool_call.arguments) {
1199                            tracing::warn!(
1200                                tool = tool_call.name.as_str(),
1201                                error = %e,
1202                                "Tool argument validation failed (continuing anyway)"
1203                            );
1204                        }
1205                    }
1206                }
1207
1208                let start = std::time::Instant::now();
1209
1210                // Check permission for mutating tools
1211                let tool = self.tools.get(&tool_call.name);
1212                let is_mutating = tool.map(|t| t.mutating()).unwrap_or(false);
1213                if is_mutating {
1214                    if let Some(ref callback) = on_permission {
1215                        let args_summary = summarize_args(&tool_call.arguments);
1216                        let request = PermissionRequest {
1217                            tool_name: tool_call.name.clone(),
1218                            args_summary,
1219                        };
1220                        let permission_rx = (callback)(request);
1221                        match permission_rx.await {
1222                            Ok(true) => {
1223                                // Permission granted, continue with execution
1224                            }
1225                            Ok(false) => {
1226                                // Permission denied, skip this tool call
1227                                tracing::info!(tool = tool_call.name.as_str(), "Tool execution denied by user");
1228                                let record = ToolCallRecord {
1229                                    id: tool_call.id.clone(),
1230                                    name: tool_call.name.clone(),
1231                                    arguments: tool_call.arguments.clone(),
1232                                    result: json!({"error": "Tool execution denied by user", "tool": tool_call.name}),
1233                                    success: false,
1234                                    duration_ms: 0,
1235                                };
1236                                if let Some(ref callback) = on_tool {
1237                                    callback(&record);
1238                                }
1239                                continue;
1240                            }
1241                            Err(_) => {
1242                                let record = ToolCallRecord {
1243                                    id: tool_call.id.clone(),
1244                                    name: tool_call.name.clone(),
1245                                    arguments: tool_call.arguments.clone(),
1246                                    result: json!({"error": "Permission channel closed", "tool": tool_call.name}),
1247                                    success: false,
1248                                    duration_ms: 0,
1249                                };
1250                                if let Some(ref callback) = on_tool {
1251                                    callback(&record);
1252                                }
1253                                continue;
1254                            }
1255                        }
1256                    } else {
1257                        tracing::warn!(tool = tool_call.name.as_str(), "No permission callback, auto-approving mutating tool");
1258                    }
1259                }
1260
1261                // Resilient tool execution: catch panics + errors
1262                let result = {
1263                    let tool_future = self.tools.execute(&tool_call.name, tool_call.arguments.clone());
1264                    // Timeout individual tool calls (prevent hangs)
1265                    let timeout_dur = if tool_call.name == "bash" {
1266                        std::time::Duration::from_secs(self.config.bash_timeout_secs)
1267                    } else {
1268                        std::time::Duration::from_secs(30)
1269                    };
1270                    match tokio::time::timeout(timeout_dur, tool_future).await {
1271                        Ok(inner) => inner,
1272                        Err(_) => Err(PawanError::Tool(format!(
1273                            "Tool '{}' timed out after {}s", tool_call.name, timeout_dur.as_secs()
1274                        ))),
1275                    }
1276                };
1277                let duration_ms = start.elapsed().as_millis() as u64;
1278
1279                let (result_value, success) = match result {
1280                    Ok(v) => (v, true),
1281                    Err(e) => {
1282                        tracing::warn!(tool = tool_call.name.as_str(), error = %e, "Tool execution failed");
1283                        (json!({"error": e.to_string(), "tool": tool_call.name, "hint": "Try a different approach or tool"}), false)
1284                    }
1285                };
1286
1287                // Truncate tool results that exceed max chars to prevent context bloat
1288                let max_result_chars = self.config.max_result_chars;
1289                let result_value = truncate_tool_result(result_value, max_result_chars);
1290
1291
1292                let record = ToolCallRecord {
1293                    id: tool_call.id.clone(),
1294                    name: tool_call.name.clone(),
1295                    arguments: tool_call.arguments.clone(),
1296                    result: result_value.clone(),
1297                    success,
1298                    duration_ms,
1299                };
1300
1301                if let Some(ref callback) = on_tool {
1302                    callback(&record);
1303                }
1304
1305                all_tool_calls.push(record);
1306
1307                self.history.push(Message {
1308                    role: Role::Tool,
1309                    content: serde_json::to_string(&result_value).unwrap_or_default(),
1310                    tool_calls: vec![],
1311                    tool_result: Some(ToolResultMessage {
1312                        tool_call_id: tool_call.id.clone(),
1313                        content: result_value,
1314                        success,
1315                    }),
1316                });
1317
1318                // Compile-gated confidence: after writing a .rs file, auto-run cargo check
1319                // and inject the result so the model can self-correct on the same iteration
1320                if success && tool_call.name == "write_file" {
1321                    let wrote_rs = tool_call.arguments.get("path")
1322                        .and_then(|p| p.as_str())
1323                        .map(|p| p.ends_with(".rs"))
1324                        .unwrap_or(false);
1325                    if wrote_rs {
1326                        let ws = self.workspace_root.clone();
1327                        let check_result = tokio::process::Command::new("cargo")
1328                            .arg("check")
1329                            .arg("--message-format=short")
1330                            .current_dir(&ws)
1331                            .output()
1332                            .await;
1333                        match check_result {
1334                            Ok(output) if !output.status.success() => {
1335                                let stderr = String::from_utf8_lossy(&output.stderr);
1336                                // Only inject first 1500 chars of errors to avoid context bloat
1337                                let err_msg: String = stderr.chars().take(1500).collect();
1338                                tracing::info!("Compile-gate: cargo check failed after write_file, injecting errors");
1339                                self.history.push(Message {
1340                                    role: Role::User,
1341                                    content: format!(
1342                                        "[SYSTEM] cargo check failed after your write_file. Fix the errors:\n```\n{}\n```",
1343                                        err_msg
1344                                    ),
1345                                    tool_calls: vec![],
1346                                    tool_result: None,
1347                                });
1348                            }
1349                            Ok(_) => {
1350                                tracing::debug!("Compile-gate: cargo check passed");
1351                            }
1352                            Err(e) => {
1353                                tracing::warn!("Compile-gate: cargo check failed to run: {}", e);
1354                            }
1355                        }
1356                    }
1357                }
1358            }
1359        }
1360    }
1361
1362    /// Execute using the ToolCoordinator instead of the built-in loop.
1363    ///
1364    /// This method provides an alternative implementation that uses the
1365    /// ToolCoordinator for tool-calling loops, which offers:
1366    /// - Parallel tool execution
1367    /// - Per-tool timeout handling
1368    /// - Consistent error handling
1369    /// - Max iteration limits
1370    ///
1371    /// Note: This method does not support streaming callbacks or permission
1372    /// prompts - those are only available in the built-in loop.
1373    async fn execute_with_coordinator(&mut self, user_prompt: &str) -> Result<AgentResponse> {
1374        // Reset idle timeout for the new turn
1375        self.last_tool_call_time = None;
1376
1377        // Inject Eruka core memory before first LLM call
1378        if let Some(eruka) = &self.eruka {
1379            if let Err(e) = eruka.inject_core_memory(&mut self.history).await {
1380                tracing::warn!("Eruka memory injection failed (non-fatal): {}", e);
1381            }
1382
1383            // Prefetch task-relevant context
1384            match eruka.prefetch(user_prompt, 2000).await {
1385                Ok(Some(ctx)) => {
1386                    self.history.push(Message {
1387                        role: Role::System,
1388                        content: ctx,
1389                        tool_calls: vec![],
1390                        tool_result: None,
1391                    });
1392                }
1393                Ok(None) => {}
1394                Err(e) => tracing::warn!("Eruka prefetch failed (non-fatal): {}", e),
1395            }
1396        }
1397
1398        // Per-turn architecture context injection
1399        let effective_prompt = match &self.arch_context {
1400            Some(ctx) => format!(
1401                "[Workspace Architecture]\n{ctx}\n[/Workspace Architecture]\n\n{user_prompt}"
1402            ),
1403            None => user_prompt.to_string(),
1404        };
1405
1406        // Build coordinator config from agent config
1407        let coordinator_config = ToolCallingConfig {
1408            max_iterations: self.config.max_tool_iterations,
1409            parallel_execution: true,
1410            tool_timeout: std::time::Duration::from_secs(self.config.bash_timeout_secs),
1411            stop_on_error: false,
1412        };
1413
1414        // Create a fresh backend for coordinator execution
1415        let system_prompt = self.config.get_system_prompt();
1416        let backend = Self::create_backend(&self.config, &system_prompt);
1417        let backend = Arc::from(backend);
1418
1419        // Create a fresh tool registry for coordinator execution
1420        // Note: This will not include any MCP tools registered at runtime
1421        let registry = Arc::new(ToolRegistry::with_defaults(self.workspace_root.clone()));
1422
1423        // Create coordinator with backend and tool registry
1424        let coordinator = ToolCoordinator::new(backend, registry, coordinator_config);
1425
1426        // Execute with coordinator
1427        let result: CoordinatorResult = coordinator
1428            .execute(Some(&system_prompt), &effective_prompt)
1429            .await
1430            .map_err(|e| PawanError::Agent(format!("Coordinator execution failed: {}", e)))?;
1431
1432        // Convert CoordinatorResult to AgentResponse
1433        let content = result.content.clone();
1434        let agent_response = AgentResponse {
1435            content: result.content,
1436            tool_calls: result.tool_calls,
1437            iterations: result.iterations,
1438            usage: result.total_usage,
1439        };
1440
1441        // Sync turn to Eruka if enabled
1442        if let Some(eruka) = &self.eruka {
1443            if let Err(e) = eruka
1444                .sync_turn(user_prompt, &content, &self.session_id)
1445                .await
1446            {
1447                tracing::warn!("Eruka sync_turn failed (non-fatal): {}", e);
1448            }
1449        }
1450
1451        Ok(agent_response)
1452    }
1453
1454    /// Execute a healing task with real diagnostics
1455    pub async fn heal(&mut self) -> Result<AgentResponse> {
1456        let healer = crate::healing::Healer::new(
1457            self.workspace_root.clone(),
1458            self.config.healing.clone(),
1459        );
1460
1461        let diagnostics = healer.get_diagnostics().await?;
1462        let failed_tests = healer.get_failed_tests().await?;
1463
1464        let mut prompt = format!(
1465            "I need you to heal this Rust project at: {}
1466
1467",
1468            self.workspace_root.display()
1469        );
1470
1471        if !diagnostics.is_empty() {
1472            prompt.push_str(&format!(
1473                "## Compilation Issues ({} found)
1474{}
1475",
1476                diagnostics.len(),
1477                healer.format_diagnostics_for_prompt(&diagnostics)
1478            ));
1479        }
1480
1481        if !failed_tests.is_empty() {
1482            prompt.push_str(&format!(
1483                "## Failed Tests ({} found)
1484{}
1485",
1486                failed_tests.len(),
1487                healer.format_tests_for_prompt(&failed_tests)
1488            ));
1489        }
1490
1491        if diagnostics.is_empty() && failed_tests.is_empty() {
1492            prompt.push_str("No issues found! Run cargo check and cargo test to verify.
1493");
1494        }
1495
1496        prompt.push_str("
1497Fix each issue one at a time. Verify with cargo check after each fix.");
1498
1499        self.execute(&prompt).await
1500    }
1501    /// Execute healing with retries — calls heal(), checks for remaining errors, retries if needed.
1502    ///
1503    /// Two-stage gate:
1504    ///   Stage 1 — `cargo check`: must produce zero errors before proceeding.
1505    ///   Stage 2 — `healing.verify_cmd` (optional): a user-supplied shell command
1506    ///             (e.g. `cargo test --workspace`).  If it exits non-zero the loop
1507    ///             continues so the LLM can address the reported failures.
1508    ///
1509    /// Anti-thrash guard: each Stage-1 error is fingerprinted (kind + code +
1510    /// message prefix).  If the same fingerprint survives `max_attempts`
1511    /// consecutive rounds unchanged the loop halts rather than spinning
1512    /// indefinitely on an error the LLM cannot fix.
1513    pub async fn heal_with_retries(&mut self, max_attempts: usize) -> Result<AgentResponse> {
1514        use std::collections::{HashMap, HashSet};
1515
1516        let mut last_response = self.heal().await?;
1517        // fingerprint → consecutive rounds this error has survived unchanged
1518        let mut stuck_counts: HashMap<u64, usize> = HashMap::new();
1519
1520        for attempt in 1..max_attempts {
1521            // Stage 1: cargo check must be error-free
1522            let fixer = crate::healing::CompilerFixer::new(self.workspace_root.clone());
1523            let remaining = fixer.check().await?;
1524            let errors: Vec<_> = remaining
1525                .iter()
1526                .filter(|d| d.kind == crate::healing::DiagnosticKind::Error)
1527                .collect();
1528
1529            if !errors.is_empty() {
1530                // Update fingerprint counts.
1531                // Drop entries for errors that were fixed; increment survivors.
1532                let current_fps: HashSet<u64> = errors.iter().map(|d| d.fingerprint()).collect();
1533                stuck_counts.retain(|fp, _| current_fps.contains(fp));
1534                for fp in &current_fps {
1535                    *stuck_counts.entry(*fp).or_insert(0) += 1;
1536                }
1537
1538                // Anti-thrash: halt if any error fingerprint has not budged
1539                // after max_attempts consecutive rounds.
1540                let thrashing: Vec<u64> = stuck_counts
1541                    .iter()
1542                    .filter_map(|(&fp, &count)| if count >= max_attempts { Some(fp) } else { None })
1543                    .collect();
1544                if !thrashing.is_empty() {
1545                    tracing::warn!(
1546                        stuck_fingerprints = thrashing.len(),
1547                        attempt,
1548                        "Anti-thrash: {} error(s) unchanged after {} attempts, halting heal loop",
1549                        thrashing.len(),
1550                        max_attempts
1551                    );
1552                    return Ok(last_response);
1553                }
1554
1555                tracing::warn!(
1556                    errors = errors.len(),
1557                    attempt,
1558                    "Stage 1 (cargo check): errors remain, retrying"
1559                );
1560                last_response = self.heal().await?;
1561                continue;
1562            }
1563
1564            // All Stage-1 errors cleared — reset thrash counters.
1565            stuck_counts.clear();
1566
1567            // Stage 2: optional verify_cmd
1568            let verify_cmd = self.config.healing.verify_cmd.clone();
1569            if let Some(ref cmd) = verify_cmd {
1570                match crate::healing::run_verify_cmd(&self.workspace_root, cmd).await {
1571                    Ok(None) => {
1572                        tracing::info!(attempts = attempt, "Stage 2 (verify_cmd) passed, healing complete");
1573                        return Ok(last_response);
1574                    }
1575                    Ok(Some(diag)) => {
1576                        tracing::warn!(
1577                            attempt,
1578                            cmd,
1579                            output = diag.raw,
1580                            "Stage 2 (verify_cmd) failed, retrying"
1581                        );
1582                        last_response = self.heal().await?;
1583                        continue;
1584                    }
1585                    Err(e) => {
1586                        // Cannot spawn the command — don't block healing on this
1587                        tracing::warn!(cmd, error = %e, "verify_cmd could not be run, skipping stage 2");
1588                        return Ok(last_response);
1589                    }
1590                }
1591            } else {
1592                tracing::info!(attempts = attempt, "Stage 1 (cargo check) passed, healing complete");
1593                return Ok(last_response);
1594            }
1595        }
1596
1597        tracing::info!(attempts = max_attempts, "Healing finished (may still have errors)");
1598        Ok(last_response)
1599    }
1600    /// Execute a task with a specific prompt
1601    pub async fn task(&mut self, task_description: &str) -> Result<AgentResponse> {
1602        let prompt = format!(
1603            r#"I need you to complete the following coding task:
1604
1605{}
1606
1607The workspace is at: {}
1608
1609Please:
16101. First explore the codebase to understand the relevant code
16112. Make the necessary changes
16123. Verify the changes compile with `cargo check`
16134. Run relevant tests if applicable
1614
1615Explain your changes as you go."#,
1616            task_description,
1617            self.workspace_root.display()
1618        );
1619
1620        self.execute(&prompt).await
1621    }
1622
1623    /// Generate a commit message for current changes
1624    pub async fn generate_commit_message(&mut self) -> Result<String> {
1625        let prompt = r#"Please:
16261. Run `git status` to see what files are changed
16272. Run `git diff --cached` to see staged changes (or `git diff` for unstaged)
16283. Generate a concise, descriptive commit message following conventional commits format
1629
1630Only output the suggested commit message, nothing else."#;
1631
1632        let response = self.execute(prompt).await?;
1633        Ok(response.content)
1634    }
1635}
1636
1637/// Truncate a tool result JSON value to fit within max_chars.
1638/// Unlike naive string truncation (which breaks JSON), this truncates string
1639/// *values* within the JSON structure, preserving valid JSON output.
1640fn truncate_tool_result(value: Value, max_chars: usize) -> Value {
1641    let serialized = serde_json::to_string(&value).unwrap_or_default();
1642    if serialized.len() <= max_chars {
1643        return value;
1644    }
1645
1646    // Strategy: find the largest string values and truncate them
1647    match value {
1648        Value::Object(map) => {
1649            let mut result = serde_json::Map::new();
1650            let total = serialized.len();
1651            for (k, v) in map {
1652                if let Value::String(s) = &v {
1653                    if s.len() > 500 {
1654                        // Proportional truncation: shrink large strings
1655                        let target = s.len() * max_chars / total;
1656                        let target = target.max(200); // Keep at least 200 chars
1657                        let truncated: String = s.chars().take(target).collect();
1658                        result.insert(k, json!(format!("{}...[truncated from {} chars]", truncated, s.len())));
1659                        continue;
1660                    }
1661                }
1662                // Recursively truncate nested structures
1663                result.insert(k, truncate_tool_result(v, max_chars));
1664            }
1665            Value::Object(result)
1666        }
1667        Value::String(s) if s.len() > max_chars => {
1668            let truncated: String = s.chars().take(max_chars).collect();
1669            json!(format!("{}...[truncated from {} chars]", truncated, s.len()))
1670        }
1671        Value::Array(arr) if serialized.len() > max_chars => {
1672            // Truncate array: keep first N items that fit
1673            let mut result = Vec::new();
1674            let mut running_len = 2; // "[]"
1675            for item in arr {
1676                let item_str = serde_json::to_string(&item).unwrap_or_default();
1677                running_len += item_str.len() + 1; // +1 for comma
1678                if running_len > max_chars {
1679                    result.push(json!(format!("...[{} more items truncated]", 0)));
1680                    break;
1681                }
1682                result.push(item);
1683            }
1684            Value::Array(result)
1685        }
1686        other => other,
1687    }
1688}
1689
1690#[cfg(test)]
1691mod tests {
1692    use super::*;
1693    use std::sync::Arc;
1694    use crate::agent::backend::mock::{MockBackend, MockResponse};
1695    use serial_test::serial;
1696
1697
1698    #[test]
1699    fn test_message_serialization() {
1700        let msg = Message {
1701            role: Role::User,
1702            content: "Hello".to_string(),
1703            tool_calls: vec![],
1704            tool_result: None,
1705        };
1706
1707        let json = serde_json::to_string(&msg).expect("Serialization failed");
1708        assert!(json.contains("user"));
1709        assert!(json.contains("Hello"));
1710    }
1711
1712    #[test]
1713    fn test_tool_call_request() {
1714        let tc = ToolCallRequest {
1715            id: "123".to_string(),
1716            name: "read_file".to_string(),
1717            arguments: json!({"path": "test.txt"}),
1718        };
1719
1720        let json = serde_json::to_string(&tc).expect("Serialization failed");
1721        assert!(json.contains("read_file"));
1722        assert!(json.contains("test.txt"));
1723    }
1724
1725    /// Helper to build an agent with N messages for prune testing.
1726    /// History starts empty; we add a system prompt + (n-1) user/assistant messages = n total.
1727    fn agent_with_messages(n: usize) -> PawanAgent {
1728        let config = PawanConfig::default();
1729        let mut agent = PawanAgent::new(config, PathBuf::from("."));
1730        // Add system prompt as message 0
1731        agent.add_message(Message {
1732            role: Role::System,
1733            content: "System prompt".to_string(),
1734            tool_calls: vec![],
1735            tool_result: None,
1736        });
1737        for i in 1..n {
1738            agent.add_message(Message {
1739                role: if i % 2 == 1 { Role::User } else { Role::Assistant },
1740                content: format!("Message {}", i),
1741                tool_calls: vec![],
1742                tool_result: None,
1743            });
1744        }
1745        assert_eq!(agent.history().len(), n);
1746        agent
1747    }
1748
1749    #[test]
1750    fn test_prune_history_no_op_when_small() {
1751        let mut agent = agent_with_messages(5);
1752        agent.prune_history();
1753        assert_eq!(agent.history().len(), 5, "Should not prune <= 5 messages");
1754    }
1755
1756    #[test]
1757    fn test_prune_history_reduces_messages() {
1758        let mut agent = agent_with_messages(12);
1759        assert_eq!(agent.history().len(), 12);
1760        agent.prune_history();
1761        // Should keep: system prompt (1) + summary (1) + last 4 = 6
1762        assert_eq!(agent.history().len(), 6);
1763    }
1764
1765    #[test]
1766    fn test_prune_history_preserves_system_prompt() {
1767        let mut agent = agent_with_messages(10);
1768        let original_system = agent.history()[0].content.clone();
1769        agent.prune_history();
1770        assert_eq!(agent.history()[0].content, original_system, "System prompt must survive pruning");
1771    }
1772
1773    #[test]
1774    fn test_prune_history_preserves_last_messages() {
1775        let mut agent = agent_with_messages(10);
1776        // Last 4 messages are at indices 6..10 with content "Message 6".."Message 9"
1777        let last4: Vec<String> = agent.history()[6..10].iter().map(|m| m.content.clone()).collect();
1778        agent.prune_history();
1779        // After pruning: [system, summary, msg6, msg7, msg8, msg9]
1780        let after_last4: Vec<String> = agent.history()[2..6].iter().map(|m| m.content.clone()).collect();
1781        assert_eq!(last4, after_last4, "Last 4 messages must be preserved after pruning");
1782    }
1783
1784    #[test]
1785    fn test_prune_history_inserts_summary() {
1786        let mut agent = agent_with_messages(10);
1787        agent.prune_history();
1788        assert_eq!(agent.history()[1].role, Role::System);
1789        assert!(agent.history()[1].content.contains("summary"), "Summary message should contain 'summary'");
1790    }
1791
1792    #[test]
1793    fn test_prune_history_utf8_safe() {
1794        let config = PawanConfig::default();
1795        let mut agent = PawanAgent::new(config, PathBuf::from("."));
1796        // Add system prompt + 10 messages with multi-byte UTF-8 characters
1797        agent.add_message(Message {
1798            role: Role::System, content: "sys".into(), tool_calls: vec![], tool_result: None,
1799        });
1800        for _ in 0..10 {
1801            agent.add_message(Message {
1802                role: Role::User,
1803                content: "こんにちは世界 🌍 ".repeat(50),
1804                tool_calls: vec![],
1805                tool_result: None,
1806            });
1807        }
1808        // This should not panic on char boundary issues
1809        agent.prune_history();
1810        assert!(agent.history().len() < 11, "Should have pruned");
1811        // Verify summary is valid UTF-8
1812        let summary = &agent.history()[1].content;
1813        assert!(summary.is_char_boundary(0));
1814    }
1815
1816    #[test]
1817    fn test_prune_history_exactly_6_messages() {
1818        // 6 messages = 1 more than the no-op threshold of 5
1819        let mut agent = agent_with_messages(6);
1820        agent.prune_history();
1821        // Prunes 1 middle message, replaced by summary: system(1) + summary(1) + last 4 = 6
1822        assert_eq!(agent.history().len(), 6);
1823    }
1824
1825    #[test]
1826    fn test_message_role_roundtrip() {
1827        for role in [Role::User, Role::Assistant, Role::System, Role::Tool] {
1828            let json = serde_json::to_string(&role).unwrap();
1829            let back: Role = serde_json::from_str(&json).unwrap();
1830            assert_eq!(role, back);
1831        }
1832    }
1833
1834    #[test]
1835    fn test_agent_response_construction() {
1836        let resp = AgentResponse {
1837            content: String::new(),
1838            tool_calls: vec![],
1839            iterations: 3,
1840            usage: TokenUsage::default(),
1841        };
1842        assert!(resp.content.is_empty());
1843        assert!(resp.tool_calls.is_empty());
1844        assert_eq!(resp.iterations, 3);
1845    }
1846
1847    // --- truncate_tool_result tests ---
1848
1849    #[test]
1850    fn test_truncate_small_result_unchanged() {
1851        let val = json!({"success": true, "output": "hello"});
1852        let result = truncate_tool_result(val.clone(), 8000);
1853        assert_eq!(result, val);
1854    }
1855
1856    #[test]
1857    fn test_truncate_large_string_value() {
1858        let big = "x".repeat(10000);
1859        let val = json!({"stdout": big, "success": true});
1860        let result = truncate_tool_result(val, 2000);
1861        let stdout = result["stdout"].as_str().unwrap();
1862        assert!(stdout.len() < 10000, "Should be truncated");
1863        assert!(stdout.contains("truncated"), "Should indicate truncation");
1864    }
1865
1866    #[test]
1867    fn test_truncate_preserves_valid_json() {
1868        let big = "x".repeat(20000);
1869        let val = json!({"data": big, "meta": "keep"});
1870        let result = truncate_tool_result(val, 5000);
1871        // Result should be valid JSON (no broken strings)
1872        let serialized = serde_json::to_string(&result).unwrap();
1873        let _reparsed: Value = serde_json::from_str(&serialized).unwrap();
1874        // meta should be preserved (it's small)
1875        assert_eq!(result["meta"], "keep");
1876    }
1877
1878    #[test]
1879    fn test_truncate_bare_string() {
1880        let big = json!("x".repeat(10000));
1881        let result = truncate_tool_result(big, 500);
1882        let s = result.as_str().unwrap();
1883        assert!(s.len() <= 600); // 500 + truncation notice
1884        assert!(s.contains("truncated"));
1885    }
1886
1887    #[test]
1888    fn test_truncate_array() {
1889        let items: Vec<Value> = (0..1000).map(|i| json!(format!("item_{}", i))).collect();
1890        let val = Value::Array(items);
1891        let result = truncate_tool_result(val, 500);
1892        let arr = result.as_array().unwrap();
1893        assert!(arr.len() < 1000, "Array should be truncated");
1894    }
1895
1896    // --- message_importance tests ---
1897
1898    #[test]
1899    fn test_importance_failed_tool_highest() {
1900        let msg = Message {
1901            role: Role::Tool,
1902            content: "error".into(),
1903            tool_calls: vec![],
1904            tool_result: Some(ToolResultMessage {
1905                tool_call_id: "1".into(),
1906                content: json!({"error": "failed"}),
1907                success: false,
1908            }),
1909        };
1910        assert!(PawanAgent::message_importance(&msg) > 0.8, "Failed tools should be high importance");
1911    }
1912
1913    #[test]
1914    fn test_importance_successful_tool_lowest() {
1915        let msg = Message {
1916            role: Role::Tool,
1917            content: "ok".into(),
1918            tool_calls: vec![],
1919            tool_result: Some(ToolResultMessage {
1920                tool_call_id: "1".into(),
1921                content: json!({"success": true}),
1922                success: true,
1923            }),
1924        };
1925        assert!(PawanAgent::message_importance(&msg) < 0.3, "Successful tools should be low importance");
1926    }
1927
1928    #[test]
1929    fn test_importance_user_medium() {
1930        let msg = Message { role: Role::User, content: "hello".into(), tool_calls: vec![], tool_result: None };
1931        let score = PawanAgent::message_importance(&msg);
1932        assert!(score > 0.4 && score < 0.8, "User messages should be medium: {}", score);
1933    }
1934
1935    #[test]
1936    fn test_importance_error_assistant_high() {
1937        let msg = Message { role: Role::Assistant, content: "Error: something failed".into(), tool_calls: vec![], tool_result: None };
1938        assert!(PawanAgent::message_importance(&msg) > 0.7, "Error assistant messages should be high importance");
1939    }
1940
1941    #[test]
1942    fn test_importance_ordering() {
1943        let failed_tool = Message { role: Role::Tool, content: "err".into(), tool_calls: vec![], tool_result: Some(ToolResultMessage { tool_call_id: "1".into(), content: json!({}), success: false }) };
1944        let user = Message { role: Role::User, content: "hi".into(), tool_calls: vec![], tool_result: None };
1945        let ok_tool = Message { role: Role::Tool, content: "ok".into(), tool_calls: vec![], tool_result: Some(ToolResultMessage { tool_call_id: "2".into(), content: json!({}), success: true }) };
1946
1947        let f = PawanAgent::message_importance(&failed_tool);
1948        let u = PawanAgent::message_importance(&user);
1949        let s = PawanAgent::message_importance(&ok_tool);
1950        assert!(f > u && u > s, "Ordering should be: failed({}) > user({}) > success({})", f, u, s);
1951    }
1952
1953    // --- State management tests ---
1954
1955    #[test]
1956    fn test_agent_clear_history_removes_all() {
1957        let mut agent = agent_with_messages(8);
1958        assert_eq!(agent.history().len(), 8);
1959        agent.clear_history();
1960        assert_eq!(agent.history().len(), 0, "clear_history should drop every message");
1961    }
1962
1963    #[test]
1964    fn test_agent_add_message_appends_in_order() {
1965        let config = PawanConfig::default();
1966        let mut agent = PawanAgent::new(config, PathBuf::from("."));
1967        assert_eq!(agent.history().len(), 0);
1968
1969        let first = Message {
1970            role: Role::User,
1971            content: "first".into(),
1972            tool_calls: vec![],
1973            tool_result: None,
1974        };
1975        let second = Message {
1976            role: Role::Assistant,
1977            content: "second".into(),
1978            tool_calls: vec![],
1979            tool_result: None,
1980        };
1981        agent.add_message(first);
1982        agent.add_message(second);
1983
1984        assert_eq!(agent.history().len(), 2);
1985        assert_eq!(agent.history()[0].content, "first");
1986        assert_eq!(agent.history()[1].content, "second");
1987        assert_eq!(agent.history()[0].role, Role::User);
1988        assert_eq!(agent.history()[1].role, Role::Assistant);
1989    }
1990
1991    #[test]
1992    fn test_agent_switch_model_updates_name() {
1993        let config = PawanConfig::default();
1994        let mut agent = PawanAgent::new(config, PathBuf::from("."));
1995        let original = agent.model_name().to_string();
1996
1997        agent.switch_model("gpt-oss-120b");
1998        assert_eq!(agent.model_name(), "gpt-oss-120b");
1999        assert_ne!(
2000            agent.model_name(),
2001            original,
2002            "switch_model should change model_name"
2003        );
2004    }
2005
2006    #[test]
2007    fn test_agent_with_tools_replaces_registry() {
2008        let config = PawanConfig::default();
2009        let agent = PawanAgent::new(config, PathBuf::from("."));
2010        let original_tool_count = agent.get_tool_definitions().len();
2011
2012        // Build a fresh empty registry
2013        let empty = ToolRegistry::new();
2014        let agent = agent.with_tools(empty);
2015        assert_eq!(
2016            agent.get_tool_definitions().len(),
2017            0,
2018            "with_tools(empty) should drop default registry (had {} tools)",
2019            original_tool_count
2020        );
2021    }
2022
2023    #[test]
2024    fn test_agent_get_tool_definitions_returns_deterministic_set() {
2025        // Fresh agent should expose a stable, non-empty default tool set
2026        let config = PawanConfig::default();
2027        let agent_a = PawanAgent::new(config.clone(), PathBuf::from("."));
2028        let agent_b = PawanAgent::new(config, PathBuf::from("."));
2029        let defs_a: Vec<String> = agent_a.get_tool_definitions().iter().map(|d| d.name.clone()).collect();
2030        let defs_b: Vec<String> = agent_b.get_tool_definitions().iter().map(|d| d.name.clone()).collect();
2031
2032        assert!(!defs_a.is_empty(), "default agent should have tools");
2033        assert_eq!(defs_a.len(), defs_b.len(), "two default agents must have same tool count");
2034        // Spot-check a few core tools we know exist
2035        let names: Vec<&str> = defs_a.iter().map(|s| s.as_str()).collect();
2036        assert!(names.contains(&"read_file"), "should have read_file in defaults");
2037        assert!(names.contains(&"bash"), "should have bash in defaults");
2038    }
2039
2040    // ─── Edge cases for truncate_tool_result ─────────────────────────────
2041
2042    #[test]
2043    fn test_truncate_empty_object_unchanged() {
2044        // Regression: empty object passes through early-return (serialized "{}" = 2 chars)
2045        let val = json!({});
2046        let result = truncate_tool_result(val.clone(), 10);
2047        assert_eq!(result, val);
2048    }
2049
2050    #[test]
2051    fn test_truncate_null_value_unchanged() {
2052        // Null values pass through the `other => other` arm
2053        let val = Value::Null;
2054        let result = truncate_tool_result(val.clone(), 10);
2055        assert_eq!(result, val);
2056    }
2057
2058    #[test]
2059    fn test_truncate_numeric_values_pass_through() {
2060        // Numbers and booleans can't be truncated — the fn must leave them intact
2061        let val = json!({"count": 42, "ratio": 2.5, "enabled": true});
2062        let result = truncate_tool_result(val.clone(), 8000);
2063        assert_eq!(result, val);
2064    }
2065
2066    #[test]
2067    fn test_truncate_large_string_is_utf8_safe() {
2068        // Regression: must use chars().take() not byte slicing so multi-byte
2069        // UTF-8 doesn't panic on char boundary (3000 crabs = ~12000 bytes)
2070        let emoji_heavy = "🦀".repeat(3000);
2071        let val = json!({"crabs": emoji_heavy});
2072        let result = truncate_tool_result(val, 1000);
2073        let out = result["crabs"].as_str().unwrap();
2074        assert!(out.contains("truncated"), "truncation marker must be present");
2075        assert!(out.starts_with('🦀'), "must preserve char boundary");
2076    }
2077
2078    #[test]
2079    fn test_truncate_nested_object_remains_valid_json() {
2080        // Recursive case: large string nested inside a sub-object still truncates,
2081        // and the output stays valid parseable JSON.
2082        let inner_big = "y".repeat(5000);
2083        let val = json!({
2084            "meta": "small",
2085            "nested": { "inner": inner_big }
2086        });
2087        let result = truncate_tool_result(val, 1500);
2088        assert_eq!(result["meta"], "small");
2089        let serialized = serde_json::to_string(&result).unwrap();
2090        let _reparsed: Value = serde_json::from_str(&serialized)
2091            .expect("truncated result must be valid JSON");
2092    }
2093
2094    #[test]
2095    fn test_truncate_short_bare_string_unchanged() {
2096        // A bare string under max_chars hits the early-return check
2097        let val = json!("short string");
2098        let result = truncate_tool_result(val.clone(), 1000);
2099        assert_eq!(result, val);
2100    }
2101
2102    #[test]
2103    fn test_session_id_is_unique_per_agent() {
2104        // Two fresh agents must get distinct session_ids so their eruka
2105        // writes don't collide under the same operations/turns/ key.
2106        let a1 = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2107        let a2 = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2108        assert_ne!(a1.session_id, a2.session_id);
2109        assert!(!a1.session_id.is_empty());
2110        // UUID v4 with dashes is 36 chars
2111        assert_eq!(a1.session_id.len(), 36);
2112    }
2113
2114    #[serial(pawan_session_tests)]
2115    #[test]
2116    fn test_resume_session_adopts_loaded_id() {
2117        // resume_session must overwrite self.session_id with the loaded
2118        // session's id so subsequent eruka writes cluster under that id
2119        // rather than the ephemeral one from new().
2120        use std::io::Write;
2121        let tmp = tempfile::TempDir::new().unwrap();
2122        // Minimal valid session file
2123        let sess_dir = tmp.path().join(".pawan").join("sessions");
2124        std::fs::create_dir_all(&sess_dir).unwrap();
2125        let sess_id = "resume-test-xyz";
2126        let sess_path = sess_dir.join(format!("{}.json", sess_id));
2127        let sess_json = serde_json::json!({
2128            "id": sess_id,
2129            "model": "test-model",
2130            "created_at": "2026-04-11T00:00:00Z",
2131            "updated_at": "2026-04-11T00:00:00Z",
2132            "messages": [],
2133            "total_tokens": 0,
2134            "iteration_count": 0
2135        });
2136        let mut f = std::fs::File::create(&sess_path).unwrap();
2137        f.write_all(sess_json.to_string().as_bytes()).unwrap();
2138
2139        // Point HOME at the tmp dir so Session::sessions_dir resolves here
2140        let prev_home = std::env::var("HOME").ok();
2141        std::env::set_var("HOME", tmp.path());
2142
2143        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2144        let orig_id = agent.session_id.clone();
2145        agent.resume_session(sess_id).expect("resume should succeed");
2146        assert_eq!(agent.session_id, sess_id);
2147        assert_ne!(agent.session_id, orig_id);
2148
2149        // Restore HOME to avoid polluting other tests
2150        if let Some(h) = prev_home {
2151            std::env::set_var("HOME", h);
2152        } else {
2153            std::env::remove_var("HOME");
2154        }
2155    }
2156
2157    #[test]
2158    fn test_history_snapshot_for_eruka_bounded() {
2159        // 100 messages of 500 chars each = 50k raw content. Snapshot must
2160        // cap at ~4000 chars so eruka writes never balloon.
2161        let mut history = Vec::new();
2162        for i in 0..100 {
2163            history.push(Message {
2164                role: if i % 2 == 0 { Role::User } else { Role::Assistant },
2165                content: "x".repeat(500),
2166                tool_calls: vec![],
2167                tool_result: None,
2168            });
2169        }
2170        let snapshot = PawanAgent::history_snapshot_for_eruka(&history);
2171        // After the break at >4000, one more line (up to 203 chars) gets
2172        // appended, so total is bounded by ~4200.
2173        assert!(snapshot.len() <= 4400, "snapshot too long: {} chars", snapshot.len());
2174        assert!(snapshot.len() > 200, "snapshot too short: {} chars", snapshot.len());
2175    }
2176
2177    #[test]
2178    fn test_history_snapshot_for_eruka_includes_role_prefixes() {
2179        // Each message must be tagged with its role so the eruka consumer
2180        // can distinguish user questions from assistant answers.
2181        let history = vec![
2182            Message { role: Role::User, content: "hi".into(), tool_calls: vec![], tool_result: None },
2183            Message { role: Role::Assistant, content: "hello".into(), tool_calls: vec![], tool_result: None },
2184            Message { role: Role::Tool, content: "ok".into(), tool_calls: vec![], tool_result: None },
2185            Message { role: Role::System, content: "sys".into(), tool_calls: vec![], tool_result: None },
2186        ];
2187        let snapshot = PawanAgent::history_snapshot_for_eruka(&history);
2188        assert!(snapshot.contains("U: hi"));
2189        assert!(snapshot.contains("A: hello"));
2190        assert!(snapshot.contains("T: ok"));
2191        assert!(snapshot.contains("S: sys"));
2192    }
2193
2194    async fn test_archive_to_eruka_ok_when_disabled() {
2195        // When eruka is disabled (the default), archive_to_eruka must
2196        // return Ok without touching the network — this is the
2197        // fire-and-forget contract the CLI relies on.
2198        let agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2199        assert!(agent.eruka.is_none(), "default config should disable eruka");
2200        let result = agent.archive_to_eruka().await;
2201        assert!(result.is_ok(), "archive_to_eruka should be non-fatal when disabled");
2202    }
2203
2204    // ─── probe_local_endpoint tests ──────────────────────────────────────
2205
2206    #[test]
2207    fn test_probe_local_endpoint_closed_port_returns_false() {
2208        // Port 1999 is almost never in use by Netdata (which uses 19999) 
2209        // or other common services.
2210        assert!(
2211            !probe_local_endpoint("http://localhost:1999/v1"),
2212            "closed port should return false"
2213        );
2214    }
2215
2216    #[test]
2217    fn test_probe_local_endpoint_open_port_returns_true() {
2218        // Bind a real listener on a free OS-assigned port, then probe it.
2219        use std::net::TcpListener;
2220        let listener = TcpListener::bind("127.0.0.1:0").expect("bind failed");
2221        let port = listener.local_addr().unwrap().port();
2222        let url = format!("http://localhost:{port}/v1");
2223        assert!(probe_local_endpoint(&url), "open port should return true");
2224    }
2225
2226    #[test]
2227    fn test_probe_local_endpoint_url_without_explicit_port() {
2228        // Port is absent — probe_local_endpoint must default to 80
2229        // which on CI is normally closed, so this just must not panic.
2230        let _ = probe_local_endpoint("http://localhost/v1");
2231    }
2232
2233    // ─── load_arch_context tests ──────────────────────────────────────────
2234
2235    #[test]
2236    fn test_load_arch_context_absent_returns_none() {
2237        let dir = tempfile::TempDir::new().unwrap();
2238        assert!(load_arch_context(dir.path()).is_none());
2239    }
2240
2241    #[test]
2242    fn test_load_arch_context_reads_file_content() {
2243        let dir = tempfile::TempDir::new().unwrap();
2244        let pawan_dir = dir.path().join(".pawan");
2245        std::fs::create_dir_all(&pawan_dir).unwrap();
2246        std::fs::write(pawan_dir.join("arch.md"), "## Architecture\nUse tokio.\n").unwrap();
2247        let result = load_arch_context(dir.path());
2248        assert!(result.is_some());
2249        assert!(result.unwrap().contains("Use tokio"));
2250    }
2251
2252    #[test]
2253    fn test_load_arch_context_empty_file_returns_none() {
2254        let dir = tempfile::TempDir::new().unwrap();
2255        let pawan_dir = dir.path().join(".pawan");
2256        std::fs::create_dir_all(&pawan_dir).unwrap();
2257        std::fs::write(pawan_dir.join("arch.md"), "   \n").unwrap();
2258        assert!(load_arch_context(dir.path()).is_none(), "whitespace-only file should be None");
2259    }
2260
2261    #[test]
2262    fn test_load_arch_context_truncates_at_2000_chars() {
2263        let dir = tempfile::TempDir::new().unwrap();
2264        let pawan_dir = dir.path().join(".pawan");
2265        std::fs::create_dir_all(&pawan_dir).unwrap();
2266        // Write a file that is exactly 2500 ASCII chars (safe char boundary)
2267        let content = "x".repeat(2_500);
2268        std::fs::write(pawan_dir.join("arch.md"), &content).unwrap();
2269        let result = load_arch_context(dir.path()).unwrap();
2270        assert!(
2271            result.len() < 2_100,
2272            "truncated result should be close to 2000 chars, got {}",
2273            result.len()
2274        );
2275        assert!(result.ends_with("(truncated)"), "truncated output must end with marker");
2276    }
2277
2278    async fn test_tool_idle_timeout_triggered() {
2279        use std::time::Duration;
2280        use tokio::time::sleep;
2281
2282        let mut config = PawanConfig::default();
2283        config.tool_call_idle_timeout_secs = 0; // Trigger on any non-zero elapsed seconds
2284
2285        // Custom backend that is slow on the second call.
2286        // With our fix (moving update before LLM call), this will trigger
2287        // at the start of the THIRD iteration if the second iteration takes time.
2288        struct SlowBackend {
2289            index: Arc<std::sync::atomic::AtomicUsize>,
2290        }
2291
2292        #[async_trait::async_trait]
2293        impl LlmBackend for SlowBackend {
2294            async fn generate(&self, _m: &[Message], _t: &[ToolDefinition], _o: Option<&TokenCallback>) -> Result<LLMResponse> {
2295                let idx = self.index.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
2296                if idx == 0 {
2297                    // First call: return a tool call to ensure we loop again
2298                    Ok(LLMResponse {
2299                        content: String::new(),
2300                        reasoning: None,
2301                        tool_calls: vec![ToolCallRequest {
2302                            id: "1".to_string(),
2303                            name: "read_file".to_string(),
2304                            arguments: json!({"path": "foo"}),
2305                        }],
2306                        finish_reason: "tool_calls".to_string(),
2307                        usage: None,
2308                    })
2309                } else if idx == 1 {
2310                    // Second call: delay then return ANOTHER tool call
2311                    // The delay happens AFTER last_tool_call_time is updated for Iteration 2.
2312                    // So Iteration 3's check will see this 1.1s delay.
2313                    sleep(Duration::from_millis(1100)).await;
2314                    Ok(LLMResponse {
2315                        content: String::new(),
2316                        reasoning: None,
2317                        tool_calls: vec![ToolCallRequest {
2318                            id: "2".to_string(),
2319                            name: "read_file".to_string(),
2320                            arguments: json!({"path": "bar"}),
2321                        }],
2322                        finish_reason: "tool_calls".to_string(),
2323                        usage: None,
2324                    })
2325                } else {
2326                    Ok(LLMResponse {
2327                        content: "Done".to_string(),
2328                        reasoning: None,
2329                        tool_calls: vec![],
2330                        finish_reason: "stop".to_string(),
2331                        usage: None,
2332                    })
2333                }
2334            }
2335        }
2336
2337        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2338        agent.backend = Box::new(SlowBackend { index: Arc::new(std::sync::atomic::AtomicUsize::new(0)) });
2339
2340        let result = agent.execute_with_all_callbacks("test", None, None, None, None).await;
2341        
2342        match result {
2343            Err(PawanError::Agent(msg)) => {
2344                assert!(msg.contains("Tool idle timeout exceeded"), "Error message should contain timeout: {}", msg);
2345            }
2346            Ok(_) => panic!("Expected timeout error, but it succeeded. This means the timeout check didn't catch the delay."),
2347            Err(e) => panic!("Unexpected error: {:?}", e),
2348        }
2349    }
2350
2351    async fn test_tool_idle_timeout_not_triggered() {
2352        let mut config = PawanConfig::default();
2353        config.tool_call_idle_timeout_secs = 10;
2354
2355        let backend = MockBackend::new(vec![
2356            MockResponse::text("Done"),
2357        ]);
2358
2359        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2360        agent.backend = Box::new(backend);
2361
2362        let result = agent.execute_with_all_callbacks("test", None, None, None, None).await;
2363        assert!(result.is_ok());
2364    }
2365
2366    // ─── Backend creation tests ─────────────────────────────────────────────
2367
2368    #[test]
2369    fn test_probe_local_endpoint_with_localhost_replacement() {
2370        // Verify localhost is replaced with 127.0.0.1
2371        let listener = std::net::TcpListener::bind("127.0.0.1:0").expect("bind failed");
2372        let port = listener.local_addr().unwrap().port();
2373        let url = format!("http://localhost:{}/v1", port);
2374        assert!(probe_local_endpoint(&url), "localhost should be resolved to 127.0.0.1");
2375    }
2376
2377    #[test]
2378    fn test_probe_local_endpoint_with_https_defaults_to_443() {
2379        // HTTPS without explicit port should default to 443
2380        let _ = probe_local_endpoint("https://example.com/v1");
2381        // Just verify it doesn't panic
2382    }
2383
2384    #[test]
2385    fn test_probe_local_endpoint_with_http_defaults_to_80() {
2386        // HTTP without explicit port should default to 80
2387        let _ = probe_local_endpoint("http://example.com/v1");
2388        // Just verify it doesn't panic
2389    }
2390
2391    #[test]
2392    fn test_probe_local_endpoint_invalid_address_returns_false() {
2393        // Invalid address should return false without panicking
2394        assert!(!probe_local_endpoint("http://invalid-host-name-that-does-not-exist-12345.com:9999/v1"));
2395    }
2396
2397    // ─── Session management tests ───────────────────────────────────────────
2398
2399    #[serial(pawan_session_tests)]
2400    #[test]
2401    fn test_save_session_creates_valid_session() {
2402        let tmp = tempfile::TempDir::new().unwrap();
2403        let prev_home = std::env::var("HOME").ok();
2404        std::env::set_var("HOME", tmp.path());
2405
2406        let config = PawanConfig::default();
2407        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2408        agent.add_message(Message {
2409            role: Role::User,
2410            content: "test message".to_string(),
2411            tool_calls: vec![],
2412            tool_result: None,
2413        });
2414
2415        let session_id = agent.save_session().expect("save should succeed");
2416        assert!(!session_id.is_empty());
2417
2418        // Verify session file exists
2419        let sess_dir = tmp.path().join(".pawan").join("sessions");
2420        let sess_path = sess_dir.join(format!("{}.json", session_id));
2421        assert!(sess_path.exists(), "session file should be created");
2422
2423        if let Some(h) = prev_home {
2424            std::env::set_var("HOME", h);
2425        } else {
2426            std::env::remove_var("HOME");
2427        }
2428    }
2429
2430    #[serial(pawan_session_tests)]
2431    #[test]
2432    fn test_resume_session_loads_messages() {
2433        let tmp = tempfile::TempDir::new().unwrap();
2434        let prev_home = std::env::var("HOME").ok();
2435        std::env::set_var("HOME", tmp.path());
2436
2437        let sess_dir = tmp.path().join(".pawan").join("sessions");
2438        std::fs::create_dir_all(&sess_dir).unwrap();
2439        let sess_id = "resume-load-test";
2440        let sess_path = sess_dir.join(format!("{}.json", sess_id));
2441
2442        let sess_json = serde_json::json!({
2443            "id": sess_id,
2444            "model": "test-model",
2445            "created_at": "2026-04-11T00:00:00Z",
2446            "updated_at": "2026-04-11T00:00:00Z",
2447            "messages": [
2448                {"role": "user", "content": "test", "tool_calls": [], "tool_result": null}
2449            ],
2450            "total_tokens": 100,
2451            "iteration_count": 1
2452        });
2453        std::fs::write(&sess_path, sess_json.to_string()).unwrap();
2454
2455        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2456        agent.resume_session(sess_id).expect("resume should succeed");
2457
2458        assert_eq!(agent.history().len(), 1);
2459        assert_eq!(agent.history()[0].content, "test");
2460        assert_eq!(agent.context_tokens_estimate, 100);
2461
2462        if let Some(h) = prev_home {
2463            std::env::set_var("HOME", h);
2464        } else {
2465            std::env::remove_var("HOME");
2466        }
2467    }
2468
2469    #[serial(pawan_session_tests)]
2470    #[test]
2471    fn test_resume_session_nonexistent_returns_error() {
2472        let tmp = tempfile::TempDir::new().unwrap();
2473        let prev_home = std::env::var("HOME").ok();
2474        std::env::set_var("HOME", tmp.path());
2475
2476        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2477        let result = agent.resume_session("nonexistent-session");
2478        assert!(result.is_err(), "resuming nonexistent session should fail");
2479
2480        if let Some(h) = prev_home {
2481            std::env::set_var("HOME", h);
2482        } else {
2483            std::env::remove_var("HOME");
2484        }
2485    }
2486
2487    // ─── Execution logic tests ───────────────────────────────────────────────
2488
2489    async fn test_execute_with_callbacks_returns_response() {
2490        let backend = MockBackend::new(vec![
2491            MockResponse::text("Hello world"),
2492        ]);
2493
2494        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2495        agent.backend = Box::new(backend);
2496
2497        let result = agent.execute_with_callbacks("test", None, None, None).await;
2498        assert!(result.is_ok());
2499        let response = result.unwrap();
2500        assert_eq!(response.content, "Hello world");
2501    }
2502
2503    async fn test_execute_with_token_callback() {
2504        let backend = MockBackend::new(vec![
2505            MockResponse::text("Response"),
2506        ]);
2507
2508        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2509        agent.backend = Box::new(backend);
2510
2511        let tokens_received = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
2512        let tokens_clone = tokens_received.clone();
2513
2514        let on_token = Box::new(move |token: &str| {
2515            tokens_received.lock().unwrap().push(token.to_string());
2516        });
2517
2518        let result = agent.execute_with_callbacks("test", Some(on_token), None, None).await;
2519        assert!(result.is_ok());
2520        // Note: MockBackend doesn't actually call token callbacks, but we verify the path works
2521    }
2522
2523    async fn test_execute_with_tool_callback() {
2524        let backend = MockBackend::new(vec![
2525            MockResponse::text("Done"),
2526        ]);
2527
2528        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2529        agent.backend = Box::new(backend);
2530
2531        let tools_called = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
2532        let tools_clone = tools_called.clone();
2533
2534        let on_tool = Box::new(move |record: &ToolCallRecord| {
2535            tools_called.lock().unwrap().push(record.name.clone());
2536        });
2537
2538        let result = agent.execute_with_callbacks("test", None, Some(on_tool), None).await;
2539        assert!(result.is_ok());
2540    }
2541
2542    async fn test_execute_max_iterations_exceeded() {
2543        let mut config = PawanConfig::default();
2544        config.max_tool_iterations = 2;
2545
2546        let backend = MockBackend::with_repeated_tool_call("bash");
2547
2548        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2549        agent.backend = Box::new(backend);
2550
2551        let result = agent.execute("test").await;
2552        assert!(result.is_err());
2553        match result {
2554            Err(PawanError::Agent(msg)) => {
2555                assert!(msg.contains("Max tool iterations"));
2556            }
2557            _ => panic!("Expected max iterations error"),
2558        }
2559    }
2560
2561    async fn test_execute_with_arch_context_injection() {
2562        let tmp = tempfile::TempDir::new().unwrap();
2563        let pawan_dir = tmp.path().join(".pawan");
2564        std::fs::create_dir_all(&pawan_dir).unwrap();
2565        std::fs::write(pawan_dir.join("arch.md"), "## Architecture\nUse Rust.\n").unwrap();
2566
2567        let backend = MockBackend::new(vec![
2568            MockResponse::text("Response"),
2569        ]);
2570
2571        let mut agent = PawanAgent::new(PawanConfig::default(), tmp.path().to_path_buf());
2572        agent.backend = Box::new(backend);
2573
2574        let result = agent.execute("test").await;
2575        assert!(result.is_ok());
2576        // Verify arch context was injected (check history)
2577        let user_msg = agent.history().iter().find(|m| m.role == Role::User);
2578        assert!(user_msg.is_some());
2579        assert!(user_msg.unwrap().content.contains("Workspace Architecture"));
2580    }
2581
2582    async fn test_execute_context_pruning_triggered() {
2583        let mut config = PawanConfig::default();
2584        config.max_context_tokens = 100; // Very low to trigger pruning
2585
2586        let backend = MockBackend::new(vec![
2587            MockResponse::text("Response"),
2588        ]);
2589
2590        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2591        agent.backend = Box::new(backend);
2592
2593        // Add many messages to exceed context limit
2594        for i in 0..50 {
2595            agent.add_message(Message {
2596                role: Role::User,
2597                content: "x".repeat(1000),
2598                tool_calls: vec![],
2599                tool_result: None,
2600            });
2601        }
2602
2603        let result = agent.execute("test").await;
2604        assert!(result.is_ok());
2605        // Verify pruning occurred
2606        assert!(agent.history().len() < 50, "history should be pruned");
2607    }
2608
2609    async fn test_execute_iteration_budget_warning() {
2610        let mut config = PawanConfig::default();
2611        config.max_tool_iterations = 5;
2612
2613        let backend = MockBackend::with_repeated_tool_call("bash");
2614
2615        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2616        agent.backend = Box::new(backend);
2617
2618        let result = agent.execute("test").await;
2619        assert!(result.is_err());
2620        // Check that budget warning was added to history
2621        let budget_warnings = agent.history().iter()
2622            .filter(|m| m.content.contains("tool iterations remaining"))
2623            .count();
2624        assert!(budget_warnings > 0, "should have budget warning in history");
2625    }
2626
2627    // ─── Tool execution tests ───────────────────────────────────────────────
2628
2629    async fn test_execute_tool_timeout() {
2630        let mut config = PawanConfig::default();
2631        config.bash_timeout_secs = 1; // Very short timeout
2632
2633        let backend = MockBackend::with_tool_call(
2634            "call_1",
2635            "bash",
2636            json!({"command": "sleep 10"}),
2637            "Run slow command",
2638        );
2639
2640        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2641        agent.backend = Box::new(backend);
2642
2643        let result = agent.execute("test").await;
2644        // Should complete with error in tool result
2645        assert!(result.is_ok());
2646        let response = result.unwrap();
2647        assert!(!response.tool_calls.is_empty());
2648        let first_tool = &response.tool_calls[0];
2649        assert!(!first_tool.success);
2650        assert!(first_tool.result.get("error").is_some());
2651    }
2652
2653    async fn test_execute_tool_error_handling() {
2654        let backend = MockBackend::with_tool_call(
2655            "call_1",
2656            "read_file",
2657            json!({"path": "/nonexistent/file.txt"}),
2658            "Read file",
2659        );
2660
2661        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2662        agent.backend = Box::new(backend);
2663
2664        let result = agent.execute("test").await;
2665        assert!(result.is_ok());
2666        let response = result.unwrap();
2667        assert!(!response.tool_calls.is_empty());
2668        // Tool should have error result
2669        let first_tool = &response.tool_calls[0];
2670        assert!(!first_tool.success);
2671    }
2672
2673    async fn test_execute_multiple_tool_calls() {
2674        let backend = MockBackend::with_multiple_tool_calls(vec![
2675            ("call_1", "bash", json!({"command": "echo 1"})),
2676            ("call_2", "bash", json!({"command": "echo 2"})),
2677        ]);
2678
2679        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2680        agent.backend = Box::new(backend);
2681
2682        let result = agent.execute("test").await;
2683        assert!(result.is_ok());
2684        let response = result.unwrap();
2685        assert!(response.tool_calls.len() >= 2);
2686    }
2687
2688    async fn test_execute_token_usage_accumulation() {
2689        let backend = MockBackend::with_text_and_usage("Response", 100, 50);
2690
2691        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2692        agent.backend = Box::new(backend);
2693
2694        let result = agent.execute("test").await;
2695        assert!(result.is_ok());
2696        let response = result.unwrap();
2697        assert_eq!(response.usage.prompt_tokens, 100);
2698        assert_eq!(response.usage.completion_tokens, 50);
2699        assert_eq!(response.usage.total_tokens, 150);
2700    }
2701
2702    // ─── Error handling tests ───────────────────────────────────────────────
2703
2704
2705
2706    async fn test_execute_with_permission_callback_denied() {
2707        let backend = MockBackend::with_tool_call(
2708            "call_1",
2709            "bash",
2710            json!({"command": "echo test"}),
2711            "Run command",
2712        );
2713
2714		let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2715		agent.backend = Box::new(backend);
2716
2717        let result = agent.execute("test").await;
2718        assert!(result.is_ok());
2719    }
2720    // ─── Error handling tests ───────────────────────────────────────────────
2721
2722    #[tokio::test]
2723    async fn test_execute_with_empty_history() {
2724        let backend = MockBackend::new(vec![
2725            MockResponse::text("Response"),
2726        ]);
2727
2728        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2729        agent.backend = Box::new(backend);
2730
2731        let result = agent.execute("test").await;
2732        assert!(result.is_ok());
2733    }
2734    async fn test_execute_with_coordinator_basic() {
2735        let mut config = PawanConfig::default();
2736        config.use_coordinator = true;
2737        config.max_tool_iterations = 1;
2738
2739        let agent = PawanAgent::new(config, PathBuf::from("."));
2740        // Verify coordinator flag is set
2741        assert!(agent.config().use_coordinator);
2742    }
2743
2744    async fn test_execute_with_coordinator_ignores_callbacks() {
2745        let mut config = PawanConfig::default();
2746        config.use_coordinator = true;
2747
2748        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2749
2750        let callback_called = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
2751        let called_clone = callback_called.clone();
2752
2753        let on_token = Box::new(move |_token: &str| {
2754            called_clone.store(true, std::sync::atomic::Ordering::SeqCst);
2755        });
2756
2757        // Callbacks should be ignored in coordinator mode
2758        let _ = agent.execute_with_all_callbacks("test", Some(on_token), None, None, None).await;
2759        // Note: This will fail because coordinator needs a real backend, but we verify the path
2760    }
2761
2762    // ─── Agent state tests ───────────────────────────────────────────────────
2763
2764    #[test]
2765    fn test_agent_tools_mut_returns_mutable_registry() {
2766        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2767        let original_count = agent.get_tool_definitions().len();
2768
2769        // tools_mut should allow modification
2770        let _ = agent.tools_mut();
2771        // Just verify we can get mutable access
2772    }
2773
2774    #[test]
2775    fn test_agent_config_returns_reference() {
2776        let config = PawanConfig::default();
2777        let agent = PawanAgent::new(config.clone(), PathBuf::from("."));
2778
2779        let agent_config = agent.config();
2780        assert_eq!(agent_config.model, config.model);
2781    }
2782
2783    #[test]
2784    fn test_agent_clear_history() {
2785        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2786
2787        agent.add_message(Message {
2788            role: Role::User,
2789            content: "test".to_string(),
2790            tool_calls: vec![],
2791            tool_result: None,
2792        });
2793
2794        assert_eq!(agent.history().len(), 1);
2795        agent.clear_history();
2796        assert_eq!(agent.history().len(), 0);
2797    }
2798
2799    #[test]
2800    fn test_agent_with_backend_replaces_backend() {
2801        let agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2802        let original_model = agent.model_name().to_string();
2803
2804        let new_backend = MockBackend::new(vec![MockResponse::text("test")]);
2805        let agent = agent.with_backend(Box::new(new_backend));
2806
2807        // Backend should be replaced
2808        assert_eq!(agent.model_name(), original_model);
2809    }
2810
2811    // ─── Edge case tests ─────────────────────────────────────────────────────
2812
2813    async fn test_execute_empty_prompt() {
2814        let backend = MockBackend::new(vec![
2815            MockResponse::text("Response"),
2816        ]);
2817
2818        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2819        agent.backend = Box::new(backend);
2820
2821        let result = agent.execute("").await;
2822        assert!(result.is_ok());
2823    }
2824
2825    async fn test_execute_very_long_prompt() {
2826        let backend = MockBackend::new(vec![
2827            MockResponse::text("Response"),
2828        ]);
2829
2830        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2831        agent.backend = Box::new(backend);
2832
2833        let long_prompt = "x".repeat(100_000);
2834        let result = agent.execute(&long_prompt).await;
2835        assert!(result.is_ok());
2836    }
2837
2838    async fn test_execute_with_special_characters() {
2839        let backend = MockBackend::new(vec![
2840            MockResponse::text("Response"),
2841        ]);
2842
2843        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2844        agent.backend = Box::new(backend);
2845
2846        let special_prompt = "Test with 🦀 emojis and \n newlines and \t tabs";
2847        let result = agent.execute(special_prompt).await;
2848        assert!(result.is_ok());
2849    }
2850}
2851/// Summarize tool arguments for permission requests
2852fn summarize_args(args: &serde_json::Value) -> String {
2853    match args {
2854        serde_json::Value::Object(map) => {
2855            let mut parts = Vec::new();
2856            for (key, value) in map {
2857                let value_str = match value {
2858                    serde_json::Value::String(s) if s.len() > 50 => {
2859                        format!("\"{}...\"", &s[..47])
2860                    }
2861                    serde_json::Value::String(s) => format!("\"{}\"", s),
2862                    serde_json::Value::Array(arr) if arr.len() > 3 => {
2863                        format!("[... {} items]", arr.len())
2864                    }
2865                    serde_json::Value::Array(arr) => {
2866                        let items: Vec<String> = arr.iter().take(3).map(|v| {
2867                            match v {
2868                                serde_json::Value::String(s) => {
2869                                    if s.len() > 20 {
2870                                        format!("\"{}...\"", &s[..17])
2871                                    } else {
2872                                        format!("\"{}\"", s)
2873                                    }
2874                                }
2875                                _ => v.to_string(),
2876                            }
2877                        }).collect();
2878                        format!("[{}]", items.join(", "))
2879                    }
2880                    _ => value.to_string(),
2881                };
2882                parts.push(format!("{}: {}", key, value_str));
2883            }
2884            parts.join(", ")
2885        }
2886        serde_json::Value::String(s) => {
2887            if s.len() > 100 {
2888                format!("\"{}...\"", &s[..97])
2889            } else {
2890                format!("\"{}\"", s)
2891            }
2892        }
2893        serde_json::Value::Array(arr) => {
2894            format!("[{} items]", arr.len())
2895        }
2896        _ => args.to_string(),
2897    }
2898}
2899
2900// --------------------------------------------------------------------------- Tests for coordinator integration
2901// ----------------------------------------------------------------------------
2902
2903#[cfg(test)]
2904mod coordinator_tests {
2905    use super::*;
2906    use crate::agent::backend::mock::{MockBackend, MockResponse};
2907    use crate::coordinator::{FinishReason, ToolCallingConfig};
2908    use std::sync::Arc;
2909
2910    /// Test that config default has use_coordinator = false
2911    #[test]
2912    fn test_config_default_use_coordinator_false() {
2913        let config = PawanConfig::default();
2914        assert!(!config.use_coordinator);
2915    }
2916
2917    /// Test that config can set use_coordinator = true
2918    #[test]
2919    fn test_config_use_coordinator_true() {
2920        let config = PawanConfig {
2921            use_coordinator: true,
2922            ..Default::default()
2923        };
2924        assert!(config.use_coordinator);
2925    }
2926
2927    #[tokio::test]
2928    /// Test coordinator execution dispatches correctly when flag is set
2929    async fn test_execute_with_coordinator_flag_enabled() {
2930        let config = PawanConfig {
2931            use_coordinator: true,
2932            model: "test-model".to_string(),
2933            ..Default::default()
2934        };
2935        let agent = PawanAgent::new(config, PathBuf::from("."));
2936        // Verify the flag is set
2937        assert!(agent.config().use_coordinator);
2938    }
2939
2940    #[tokio::test]
2941    /// Test that execute_with_coordinator produces valid response
2942    async fn test_execute_with_coordinator_produces_response() {
2943        let config = PawanConfig {
2944            use_coordinator: true,
2945            max_tool_iterations: 1,
2946            model: "test-model".to_string(),
2947            ..Default::default()
2948        };
2949        let agent = PawanAgent::new(config, PathBuf::from("."));
2950let backend = MockBackend::with_text("Hello from coordinator!");
2951        let mut agent = agent.with_backend(Box::new(backend));
2952
2953        // This will fail because the coordinator creates its own backend
2954        // but we can at least verify the flag works
2955        assert!(agent.config().use_coordinator);
2956    }
2957
2958    /// Test ToolCallingConfig default values
2959    #[test]
2960    fn test_tool_calling_config_defaults() {
2961        let cfg = ToolCallingConfig::default();
2962        assert_eq!(cfg.max_iterations, 10);
2963        assert!(cfg.parallel_execution);
2964        assert_eq!(cfg.tool_timeout.as_secs(), 30);
2965        assert!(!cfg.stop_on_error);
2966    }
2967
2968    /// Test custom ToolCallingConfig
2969    #[test]
2970    fn test_tool_calling_config_custom() {
2971        let cfg = ToolCallingConfig {
2972            max_iterations: 5,
2973            parallel_execution: false,
2974            tool_timeout: std::time::Duration::from_secs(60),
2975            stop_on_error: true,
2976        };
2977        assert_eq!(cfg.max_iterations, 5);
2978        assert!(!cfg.parallel_execution);
2979        assert_eq!(cfg.tool_timeout.as_secs(), 60);
2980        assert!(cfg.stop_on_error);
2981    }
2982
2983    #[tokio::test]
2984    /// Test that coordinator dispatch check works correctly
2985    async fn test_coordinator_dispatch_when_flag_is_false() {
2986        let config = PawanConfig::default();
2987        assert!(!config.use_coordinator);
2988        // When flag is false, execute_with_all_callbacks should use built-in loop
2989    }
2990
2991    #[tokio::test]
2992    /// Test error handling when coordinator encounters unknown tool
2993    async fn test_coordinator_error_handling_unknown_tool() {
2994        use crate::coordinator::ToolCoordinator;
2995
2996        let mock_backend = Arc::new(MockBackend::with_tool_call(
2997            "call_1",
2998            "nonexistent_tool",
2999            json!({}),
3000            "Trying to call unknown tool",
3001        ));
3002        let registry = Arc::new(ToolRegistry::new());
3003        let config = ToolCallingConfig::default();
3004        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
3005
3006        let result = coordinator.execute(None, "Use a tool").await.unwrap();
3007        assert!(matches!(result.finish_reason, FinishReason::UnknownTool(_)));
3008    }
3009
3010    #[tokio::test]
3011    /// Test max iterations limit in coordinator
3012    async fn test_coordinator_max_iterations_limit() {
3013        use crate::coordinator::ToolCoordinator;
3014        use crate::tools::Tool;
3015        use async_trait::async_trait;
3016        use serde_json::json;
3017        use std::sync::Arc;
3018
3019        // Dummy tool that always succeeds
3020        struct DummyTool;
3021        #[async_trait]
3022        impl Tool for DummyTool {
3023            fn name(&self) -> &str { "test_tool" }
3024            fn description(&self) -> &str { "Dummy tool for testing" }
3025            fn parameters_schema(&self) -> serde_json::Value { json!({}) }
3026            async fn execute(&self, _args: serde_json::Value) -> crate::Result<serde_json::Value> {
3027                Ok(json!({ "status": "ok" }))
3028            }
3029        }
3030
3031        let mock_backend = Arc::new(MockBackend::with_repeated_tool_call("test_tool"));
3032        let mut registry = ToolRegistry::new();
3033        registry.register(Arc::new(DummyTool));
3034        let registry = Arc::new(registry);
3035        let config = ToolCallingConfig {
3036            max_iterations: 3,
3037            ..Default::default()
3038        };
3039        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
3040
3041        let result = coordinator.execute(None, "Use tools").await.unwrap();
3042        assert_eq!(result.iterations, 3);
3043        assert!(matches!(result.finish_reason, FinishReason::MaxIterations));
3044    }
3045
3046    #[tokio::test]
3047    /// Test timeout handling in coordinator
3048    async fn test_coordinator_timeout_handling() {
3049        use crate::coordinator::ToolCoordinator;
3050
3051        // Create a mock that returns a tool call
3052        let mock_backend = Arc::new(MockBackend::with_tool_call(
3053            "call_1",
3054            "bash",
3055            json!({"command": "sleep 10"}),
3056            "Run slow command",
3057        ));
3058        let registry = Arc::new(ToolRegistry::with_defaults(PathBuf::from(".")));
3059        // Very short timeout
3060        let config = ToolCallingConfig {
3061            tool_timeout: std::time::Duration::from_millis(1),
3062            ..Default::default()
3063        };
3064        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
3065
3066        // This will timeout - coordinator should handle it gracefully
3067        let result = coordinator.execute(None, "Run a command").await.unwrap();
3068        // The tool should have failed with timeout error
3069        assert!(!result.tool_calls.is_empty());
3070        let first_call = &result.tool_calls[0];
3071        assert!(!first_call.success);
3072        assert!(first_call.result.get("error").is_some());
3073    }
3074
3075    #[tokio::test]
3076    /// Test that coordinator accumulates token usage
3077    async fn test_coordinator_token_usage_accumulation() {
3078        use crate::coordinator::ToolCoordinator;
3079
3080        let mock_backend = Arc::new(MockBackend::with_text_and_usage(
3081            "Response",
3082            100,
3083            50,
3084        ));
3085        let registry = Arc::new(ToolRegistry::new());
3086        let config = ToolCallingConfig::default();
3087        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
3088
3089        let result = coordinator.execute(None, "Hello").await.unwrap();
3090        assert_eq!(result.total_usage.prompt_tokens, 100);
3091        assert_eq!(result.total_usage.completion_tokens, 50);
3092        assert_eq!(result.total_usage.total_tokens, 150);
3093    }
3094
3095    #[tokio::test]
3096    /// Test parallel execution in coordinator
3097    async fn test_coordinator_parallel_execution() {
3098        use crate::coordinator::ToolCoordinator;
3099
3100        // Mock that returns multiple tool calls
3101        let mock_backend = Arc::new(MockBackend::with_multiple_tool_calls(vec![
3102            ("call_1", "bash", json!({"command": "echo 1"})),
3103            ("call_2", "bash", json!({"command": "echo 2"})),
3104            ("call_3", "read_file", json!({"path": "test.txt"})),
3105        ]));
3106        let registry = Arc::new(ToolRegistry::with_defaults(PathBuf::from(".")));
3107        let config = ToolCallingConfig {
3108            parallel_execution: true,
3109            ..Default::default()
3110        };
3111        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
3112
3113        let result = coordinator.execute(None, "Run multiple commands").await.unwrap();
3114        // Should have executed multiple tool calls
3115        assert!(result.tool_calls.len() >= 3);
3116    }
3117}
pawan/agent/mod.rs

pawan/agent/
mod.rs