Skip to main content

pawan/agent/
mod.rs

1//! Pawan Agent - The core agent that handles tool-calling loops
2//!
3//! This module provides the main `PawanAgent` which:
4//! - Manages conversation history
5//! - Coordinates tool calling with the LLM via pluggable backends
6//! - Provides streaming responses
7//! - Supports multiple LLM backends (NVIDIA API, Ollama, OpenAI)
8
9pub mod backend;
10mod preflight;
11pub mod events;
12pub mod session;
13pub mod git_session;
14
15// Re-export event types for public API
16pub use events::{
17    AgentEvent, FinishReason, ThinkingDeltaEvent, ToolApprovalEvent,
18    ToolCompleteEvent, ToolStartEvent, TokenUsageInfo, TurnEndEvent,
19    TurnStartEvent, SessionEndEvent,
20};
21
22use crate::config::{LlmProvider, PawanConfig};
23use crate::coordinator::{CoordinatorResult, ToolCallingConfig, ToolCoordinator};
24use crate::credentials;
25use crate::tools::{ToolDefinition, ToolRegistry};
26use crate::{PawanError, Result};
27use backend::openai_compat::{OpenAiCompatBackend, OpenAiCompatConfig};
28use backend::LlmBackend;
29use serde::{Deserialize, Serialize};
30use serde_json::{json, Value};
31use std::path::PathBuf;
32use std::sync::Arc;
33use std::time::Instant;
34
35/// A message in the conversation
36#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
37pub struct Message {
38    /// Role of the message sender
39    pub role: Role,
40    /// Content of the message
41    pub content: String,
42    /// Tool calls (if any)
43    #[serde(default)]
44    pub tool_calls: Vec<ToolCallRequest>,
45    /// Tool results (if this is a tool result message)
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub tool_result: Option<ToolResultMessage>,
48}
49
50/// Role of a message sender
51#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
52#[serde(rename_all = "lowercase")]
53pub enum Role {
54    System,
55    User,
56    Assistant,
57    Tool,
58}
59
60/// A request to call a tool
61#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
62pub struct ToolCallRequest {
63    /// Unique ID for this tool call
64    pub id: String,
65    /// Name of the tool to call
66    pub name: String,
67    /// Arguments for the tool
68    pub arguments: Value,
69}
70
71/// Result from a tool execution
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73pub struct ToolResultMessage {
74    /// ID of the tool call this result is for
75    pub tool_call_id: String,
76    /// The result content
77    pub content: Value,
78    /// Whether the tool executed successfully
79    pub success: bool,
80}
81
82/// Record of a tool call execution
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct ToolCallRecord {
85    /// Unique ID for this tool call
86    pub id: String,
87    /// Name of the tool
88    pub name: String,
89    /// Arguments passed to the tool
90    pub arguments: Value,
91    /// Result from the tool
92    pub result: Value,
93    /// Whether execution was successful
94    pub success: bool,
95    /// Duration in milliseconds
96    pub duration_ms: u64,
97}
98
99/// Token usage from an LLM response
100#[derive(Debug, Clone, Default, Serialize, Deserialize)]
101pub struct TokenUsage {
102    pub prompt_tokens: u64,
103    pub completion_tokens: u64,
104    pub total_tokens: u64,
105    /// Tokens spent on reasoning/thinking (subset of completion_tokens)
106    pub reasoning_tokens: u64,
107    /// Tokens spent on actual content/tool output (completion - reasoning)
108    pub action_tokens: u64,
109}
110
111/// LLM response from a generation request
112#[derive(Debug, Clone)]
113pub struct LLMResponse {
114    /// Text content of the response
115    pub content: String,
116    /// Reasoning/thinking content (separate from visible content)
117    pub reasoning: Option<String>,
118    /// Tool calls requested by the model
119    pub tool_calls: Vec<ToolCallRequest>,
120    /// Reason the response finished
121    pub finish_reason: String,
122    /// Token usage (if available)
123    pub usage: Option<TokenUsage>,
124}
125
126/// Result from a complete agent execution
127#[derive(Debug)]
128pub struct AgentResponse {
129    /// Final text response
130    pub content: String,
131    /// All tool calls made during execution
132    pub tool_calls: Vec<ToolCallRecord>,
133    /// Number of iterations taken
134    pub iterations: usize,
135    /// Cumulative token usage across all iterations
136    pub usage: TokenUsage,
137}
138
139/// Callback for receiving streaming tokens
140pub type TokenCallback = Box<dyn Fn(&str) + Send + Sync>;
141
142/// Callback for receiving tool call updates
143pub type ToolCallback = Box<dyn Fn(&ToolCallRecord) + Send + Sync>;
144
145/// Callback for tool call start notifications
146pub type ToolStartCallback = Box<dyn Fn(&str) + Send + Sync>;
147
148/// A permission request sent from the agent to the UI for approval.
149#[derive(Debug, Clone)]
150pub struct PermissionRequest {
151    /// Tool name requesting permission
152    pub tool_name: String,
153    /// Summary of arguments (e.g. bash command or file path)
154    pub args_summary: String,
155}
156
157/// Callback for requesting tool permission from the user.
158/// Returns true if the tool should be allowed, false to deny.
159pub type PermissionCallback =
160    Box<dyn Fn(PermissionRequest) -> tokio::sync::oneshot::Receiver<bool> + Send + Sync>;
161
162/// The main Pawan agent — handles conversation, tool calling, and self-healing.
163///
164/// This struct represents the core Pawan agent that handles:
165/// - Conversation history management
166/// - Tool calling with the LLM via pluggable backends
167/// - Streaming responses
168/// - Multiple LLM backends (NVIDIA API, Ollama, OpenAI)
169/// - Context management and token counting
170/// - Integration with Eruka for 3-tier memory injection
171pub struct PawanAgent {
172    /// Configuration
173    config: PawanConfig,
174    /// Tool registry
175    tools: ToolRegistry,
176    /// Conversation history
177    history: Vec<Message>,
178    /// Workspace root
179    workspace_root: PathBuf,
180    /// LLM backend
181    backend: Box<dyn LlmBackend>,
182
183    /// Estimated token count for current context
184    context_tokens_estimate: usize,
185
186    /// Eruka bridge for 3-tier memory injection
187    eruka: Option<crate::eruka_bridge::ErukaClient>,
188
189    /// Stable identifier for this agent instance's session — used as the
190    /// key for eruka sync_turn / on_pre_compress writes so turns from one
191    /// conversation cluster under the same path. Generated fresh in new(),
192    /// overwritten by resume_session() when loading an existing session.
193    session_id: String,
194
195    /// Per-turn architecture context loaded from `.pawan/arch.md` at init.
196    /// When present, prepended to every user message so key architectural
197    /// constraints stay visible even as tool-call history grows long.
198    arch_context: Option<String>,
199    /// Timestamp of last tool call completion for idle timeout tracking
200    last_tool_call_time: Option<Instant>,
201}
202
203/// Probe whether a local inference server is reachable at `url`.
204///
205/// Parses `host:port` from the URL and attempts a TCP connect with a 100 ms
206/// timeout. Returns `true` if the port is open, `false` on any error.
207/// This is intentionally cheap (no HTTP round-trip) so it can run at agent
208/// startup without perceptible latency.
209fn probe_local_endpoint(url: &str) -> bool {
210    use std::net::TcpStream;
211    use std::time::Duration;
212
213    // Strip scheme and path — we only need host:port
214    let hostport = url
215        .trim_start_matches("http://")
216        .trim_start_matches("https://")
217        .split('/')
218        .next()
219        .unwrap_or("");
220
221    // Ensure port is present; default http → 80, https → 443
222    let addr = if hostport.contains(':') {
223        hostport.to_string()
224    } else if url.starts_with("https://") {
225        format!("{hostport}:443")
226    } else {
227        format!("{hostport}:80")
228    };
229
230    // Normalise "localhost" → "127.0.0.1" so we don't accidentally resolve
231    // to ::1 (IPv6) when the listener is bound only to IPv4.
232    let addr = addr.replace("localhost", "127.0.0.1");
233
234    let socket_addr = match addr.parse() {
235        Ok(a) => a,
236        Err(_) => return false,
237    };
238
239    TcpStream::connect_timeout(&socket_addr, Duration::from_millis(100)).is_ok()
240}
241
242/// Retrieve an API key with fallback chain:
243/// 1. Environment variable
244/// 2. Secure credential store
245/// 3. Return None (caller should prompt user)
246///
247/// If the key is found in the secure store, it's also set as an env var
248/// for subsequent calls.
249fn get_api_key_with_secure_fallback(env_var: &str, key_name: &str) -> Option<String> {
250    // First, check environment variable
251    if let Ok(key) = std::env::var(env_var) {
252        return Some(key);
253    }
254
255    // Second, try secure credential store
256    match credentials::get_api_key(key_name) {
257        Ok(Some(key)) => {
258            // Cache in env var for subsequent calls
259            std::env::set_var(env_var, &key);
260            Some(key)
261        }
262        Ok(None) => None,
263        Err(e) => {
264            tracing::warn!("Failed to retrieve {} from secure store: {}", key_name, e);
265            None
266        }
267    }
268}
269
270/// Prompt user to enter an API key and store it securely.
271///
272/// This function:
273/// 1. Prompts the user to enter the API key
274/// 2. Stores it in the secure credential store
275/// 3. Sets it as an environment variable for the current session
276///
277/// Returns the entered key on success, or None if the user cancels.
278fn prompt_and_store_api_key(env_var: &str, key_name: &str, provider: &str) -> Option<String> {
279    eprintln!("\n🔑 {} API key not found.", provider);
280    eprintln!("You can set it via:");
281    eprintln!("  - Environment variable: export {}=<your-key>", env_var);
282    eprintln!("  - Interactive entry (recommended for security)");
283    eprintln!("\nEnter your {} API key:", provider);
284    eprintln!("  (Your key will be stored securely in the OS credential store)\n");
285
286    // Read input securely (no echo)
287    #[cfg(unix)]
288    let key = {
289        use std::io::{self, Write};
290        
291        // Use termios to disable echo on Unix
292        let mut stdout = io::stdout();
293        stdout.flush().ok();
294        
295        // Read password without echo
296        rpassword::prompt_password("> ").ok()
297    };
298
299    #[cfg(windows)]
300    let key = {
301        use std::io::{self, Write};
302        
303        let mut stdout = io::stdout();
304        stdout.flush().ok();
305        
306        // On Windows, use a simple prompt (rpassword handles this)
307        rpassword::prompt_password("> ").ok()
308    };
309
310    #[cfg(not(any(unix, windows)))]
311    let key = {
312        use std::io::{self, Write, BufRead};
313        
314        let mut stdout = io::stdout();
315        let mut stdin = io::stdin();
316        stdout.flush().ok();
317        print!("> ");
318        stdout.flush().ok();
319        
320        let mut input = String::new();
321        stdin.lock().read_line(&mut input).ok();
322        Some(input.trim().to_string())
323    };
324
325    match key {
326        Some(k) if !k.trim().is_empty() => {
327            let key = k.trim().to_string();
328            
329            // Store in secure credential store
330            match credentials::store_api_key(key_name, &key) {
331                Ok(()) => {
332                    tracing::info!("{} API key stored securely", provider);
333                    std::env::set_var(env_var, &key);
334                    Some(key)
335                }
336                Err(e) => {
337                    tracing::warn!("Failed to store key securely: {}. Using session-only.", e);
338                    std::env::set_var(env_var, &key);
339                    Some(key)
340                }
341            }
342        }
343        _ => {
344            eprintln!("\n⚠️  No key entered. {} will not work until a key is set.", provider);
345            None
346        }
347    }
348}
349
350/// Load per-turn architecture context from `<workspace_root>/.pawan/arch.md`.
351///
352/// Returns `None` if the file is absent or empty.
353/// Caps content at 2 000 chars to avoid context bloat from large files;
354/// an ellipsis marker is appended when truncation occurs.
355fn load_arch_context(workspace_root: &std::path::Path) -> Option<String> {
356    let path = workspace_root.join(".pawan").join("arch.md");
357    if !path.exists() {
358        return None;
359    }
360    match std::fs::read_to_string(&path) {
361        Ok(content) if !content.trim().is_empty() => {
362            const MAX_CHARS: usize = 2_000;
363            if content.len() > MAX_CHARS {
364                // Truncate on a char boundary
365                let boundary = content
366                    .char_indices()
367                    .map(|(i, _)| i)
368                    .nth(MAX_CHARS)
369                    .unwrap_or(content.len());
370                Some(format!("{}…(truncated)", &content[..boundary]))
371            } else {
372                Some(content)
373            }
374        }
375        _ => None,
376    }
377}
378
379impl PawanAgent {
380    /// Create a new PawanAgent with auto-selected backend
381    pub fn new(config: PawanConfig, workspace_root: PathBuf) -> Self {
382        let tools = ToolRegistry::with_defaults(workspace_root.clone());
383        let system_prompt = config.get_system_prompt();
384        let backend = Self::create_backend(&config, &system_prompt);
385        let eruka = if config.eruka.enabled {
386            Some(crate::eruka_bridge::ErukaClient::new(config.eruka.clone()))
387        } else {
388            None
389        };
390        let arch_context = load_arch_context(&workspace_root);
391
392        Self {
393            config,
394            tools,
395            history: Vec::new(),
396            workspace_root,
397            backend,
398            context_tokens_estimate: 0,
399            eruka,
400            session_id: uuid::Uuid::new_v4().to_string(),
401            arch_context,
402            last_tool_call_time: None,
403        }
404    }
405
406    /// Create the appropriate backend based on config.
407    ///
408    /// If `use_ares_backend` is true and the `ares` feature is compiled in,
409    /// delegates to ares-server's LLMClient (unified provider abstraction with
410    /// connection pooling). Otherwise uses pawan's built-in OpenAI-compatible
411    /// backend (the original path).
412    fn create_backend(config: &PawanConfig, system_prompt: &str) -> Box<dyn LlmBackend> {
413        // Local-inference-first cost guard: if enabled and the local server
414        // responds within 100 ms, route all traffic there instead of cloud.
415        if config.local_first {
416            let local_url = config
417                .local_endpoint
418                .clone()
419                .unwrap_or_else(|| "http://localhost:11434/v1".to_string());
420            if probe_local_endpoint(&local_url) {
421                tracing::info!(
422                    url = %local_url,
423                    model = %config.model,
424                    "local_first: local server reachable, using local inference"
425                );
426                return Box::new(OpenAiCompatBackend::new(
427                    backend::openai_compat::OpenAiCompatConfig {
428                        api_url: local_url,
429                        api_key: None,
430                        model: config.model.clone(),
431                        temperature: config.temperature,
432                        top_p: config.top_p,
433                        max_tokens: config.max_tokens,
434                        system_prompt: system_prompt.to_string(),
435                        use_thinking: false,
436                        max_retries: config.max_retries,
437                        fallback_models: Vec::new(),
438                        cloud: None,
439                    },
440                ));
441            }
442            tracing::info!(
443                url = %local_url,
444                "local_first: local server unreachable, falling back to cloud provider"
445            );
446        }
447
448        // Try ares backend first if requested
449        if config.use_ares_backend {
450            if let Some(backend) = Self::try_create_ares_backend(config, system_prompt) {
451                return backend;
452            }
453            tracing::warn!(
454                "use_ares_backend=true but ares backend creation failed; \
455                 falling back to pawan's native backend"
456            );
457        }
458
459        match config.provider {
460        LlmProvider::Nvidia | LlmProvider::OpenAI | LlmProvider::Mlx => {
461            let (api_url, api_key) = match config.provider {
462                LlmProvider::Nvidia => {
463                    let url = std::env::var("NVIDIA_API_URL")
464                        .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
465                    
466                    // Try to get key from env or secure store
467                    let key = get_api_key_with_secure_fallback("NVIDIA_API_KEY", "nvidia_api_key");
468                    
469                    // If no key found, prompt user
470                    let key = if key.is_none() {
471                        prompt_and_store_api_key("NVIDIA_API_KEY", "nvidia_api_key", "NVIDIA")
472                    } else {
473                        key
474                    };
475                    
476                    if key.is_none() {
477                        tracing::warn!("NVIDIA_API_KEY not set. Model calls will fail until a key is provided.");
478                    }
479                    (url, key)
480                },
481                LlmProvider::OpenAI => {
482                    let url = config.base_url.clone()
483                        .or_else(|| std::env::var("OPENAI_API_URL").ok())
484                        .unwrap_or_else(|| "https://api.openai.com/v1".to_string());
485                    
486                    let key = get_api_key_with_secure_fallback("OPENAI_API_KEY", "openai_api_key");
487                    let key = if key.is_none() {
488                        prompt_and_store_api_key("OPENAI_API_KEY", "openai_api_key", "OpenAI")
489                    } else {
490                        key
491                    };
492                    
493                    (url, key)
494                },
495                LlmProvider::Mlx => {
496                    // MLX LM server — Apple Silicon native, always local
497                    let url = config.base_url.clone()
498                        .unwrap_or_else(|| "http://localhost:8080/v1".to_string());
499                    tracing::info!(url = %url, "Using MLX LM server (Apple Silicon native)");
500                    (url, None) // mlx_lm.server requires no API key
501                },
502                _ => unreachable!(),
503            };
504                
505                // Build cloud fallback if configured
506                let cloud = config.cloud.as_ref().map(|c| {
507                    let (cloud_url, cloud_key) = match c.provider {
508                        LlmProvider::Nvidia => {
509                            let url = std::env::var("NVIDIA_API_URL")
510                                .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
511                            let key = get_api_key_with_secure_fallback("NVIDIA_API_KEY", "nvidia_api_key");
512                            (url, key)
513                        },
514                        LlmProvider::OpenAI => {
515                            let url = std::env::var("OPENAI_API_URL")
516                                .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
517                            let key = get_api_key_with_secure_fallback("OPENAI_API_KEY", "openai_api_key");
518                            (url, key)
519                        },
520                        LlmProvider::Mlx => {
521                            ("http://localhost:8080/v1".to_string(), None)
522                        },
523                        _ => {
524                            tracing::warn!("Cloud fallback only supports nvidia/openai/mlx providers");
525                            ("https://integrate.api.nvidia.com/v1".to_string(), None)
526                        }
527                    };
528                    backend::openai_compat::CloudFallback {
529                        api_url: cloud_url,
530                        api_key: cloud_key,
531                        model: c.model.clone(),
532                        fallback_models: c.fallback_models.clone(),
533                    }
534                });
535
536                Box::new(OpenAiCompatBackend::new(OpenAiCompatConfig {
537                    api_url,
538                    api_key,
539                    model: config.model.clone(),
540                    temperature: config.temperature,
541                    top_p: config.top_p,
542                    max_tokens: config.max_tokens,
543                    system_prompt: system_prompt.to_string(),
544                    // Enforce thinking budget: if set, disable thinking entirely
545                    // and give all tokens to action output
546                    use_thinking: config.thinking_budget == 0 && config.use_thinking_mode(),
547                    max_retries: config.max_retries,
548                    fallback_models: config.fallback_models.clone(),
549                    cloud,
550                }))
551            }
552            LlmProvider::Ollama => {
553                let url = std::env::var("OLLAMA_URL")
554                    .unwrap_or_else(|_| "http://localhost:11434".to_string());
555
556                Box::new(backend::ollama::OllamaBackend::new(
557                    url,
558                    config.model.clone(),
559                    config.temperature,
560                    system_prompt.to_string(),
561                ))
562            }
563        }
564    }
565
566    /// Try to construct an ares-backed LLM backend from pawan config.
567    /// Returns `None` if the provider isn't supported by ares or required
568    /// credentials are missing — the caller should fall back to pawan's
569    /// native backend.
570    fn try_create_ares_backend(
571        config: &PawanConfig,
572        system_prompt: &str,
573    ) -> Option<Box<dyn LlmBackend>> {
574        use ares::llm::client::{ModelParams, Provider};
575
576        // Map pawan LlmProvider → ares Provider variants.
577        // ares supports: OpenAI (with custom base_url), Ollama, LlamaCpp, Anthropic.
578        // Pawan's Nvidia/OpenAI/Mlx all use OpenAI-compatible endpoints, so they
579        // all map to ares Provider::OpenAI with different base URLs.
580        let params = ModelParams {
581            temperature: Some(config.temperature),
582            max_tokens: Some(config.max_tokens as u32),
583            top_p: Some(config.top_p),
584            frequency_penalty: None,
585            presence_penalty: None,
586        };
587
588        let provider = match config.provider {
589            LlmProvider::Nvidia => {
590                let api_base = std::env::var("NVIDIA_API_URL")
591                    .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
592                let api_key = std::env::var("NVIDIA_API_KEY").ok()?;
593                Provider::OpenAI {
594                    api_key,
595                    api_base,
596                    model: config.model.clone(),
597                    params,
598                }
599            }
600            LlmProvider::OpenAI => {
601                let api_base = config
602                    .base_url
603                    .clone()
604                    .or_else(|| std::env::var("OPENAI_API_URL").ok())
605                    .unwrap_or_else(|| "https://api.openai.com/v1".to_string());
606                let api_key = std::env::var("OPENAI_API_KEY").unwrap_or_default();
607                Provider::OpenAI {
608                    api_key,
609                    api_base,
610                    model: config.model.clone(),
611                    params,
612                }
613            }
614            LlmProvider::Mlx => {
615                // MLX LM server is OpenAI-compatible, no API key needed
616                let api_base = config
617                    .base_url
618                    .clone()
619                    .unwrap_or_else(|| "http://localhost:8080/v1".to_string());
620                Provider::OpenAI {
621                    api_key: String::new(),
622                    api_base,
623                    model: config.model.clone(),
624                    params,
625                }
626            }
627            LlmProvider::Ollama => {
628                // Ares Ollama client is async-constructed (async with_params),
629                // which doesn't fit pawan's sync PawanAgent::new path.
630                // Fall back to pawan's native OllamaBackend for now.
631                return None;
632            }
633        };
634
635        // OpenAI variants construct synchronously — we skip the async
636        // Provider::create_client() entirely for sync construction.
637        let client: Box<dyn ares::llm::LLMClient> = match provider {
638            Provider::OpenAI {
639                api_key,
640                api_base,
641                model,
642                params,
643            } => Box::new(ares::llm::openai::OpenAIClient::with_params(
644                api_key, api_base, model, params,
645            )),
646            _ => return None,
647        };
648
649        tracing::info!(
650            provider = ?config.provider,
651            model = %config.model,
652            "Using ares-backed LLM backend"
653        );
654
655        Some(Box::new(backend::ares_backend::AresBackend::new(
656            client,
657            system_prompt.to_string(),
658        )))
659    }
660
661    /// Create with a specific tool registry
662    pub fn with_tools(mut self, tools: ToolRegistry) -> Self {
663        self.tools = tools;
664        self
665    }
666
667    /// Get mutable access to the tool registry (for registering MCP tools)
668    pub fn tools_mut(&mut self) -> &mut ToolRegistry {
669        &mut self.tools
670    }
671
672    /// Create with a custom backend
673    pub fn with_backend(mut self, backend: Box<dyn LlmBackend>) -> Self {
674        self.backend = backend;
675        self
676    }
677
678    /// Get the current conversation history
679    pub fn history(&self) -> &[Message] {
680        &self.history
681    }
682
683    /// Save current conversation as a session, returns session ID
684    pub fn save_session(&self) -> Result<String> {
685        let mut session = session::Session::new(&self.config.model);
686        session.messages = self.history.clone();
687        session.total_tokens = self.context_tokens_estimate as u64;
688        session.save()?;
689        Ok(session.id)
690    }
691
692    /// Resume a saved session by ID
693    pub fn resume_session(&mut self, session_id: &str) -> Result<()> {
694        let session = session::Session::load(session_id)?;
695        self.history = session.messages;
696        self.context_tokens_estimate = session.total_tokens as usize;
697        // Adopt the loaded session's id so eruka writes cluster under the
698        // same key as the on-disk session.
699        self.session_id = session_id.to_string();
700        Ok(())
701    }
702
703    /// Archive the current conversation to Eruka's context store. Safe to
704    /// call from any async context; returns Ok even when eruka is disabled
705    /// or unreachable so callers can fire-and-forget after save_session().
706    pub async fn archive_to_eruka(&self) -> Result<()> {
707        let Some(eruka) = &self.eruka else {
708            return Ok(());
709        };
710        let mut session = session::Session::new(&self.config.model);
711        session.id = self.session_id.clone();
712        session.messages = self.history.clone();
713        session.total_tokens = self.context_tokens_estimate as u64;
714        eruka.archive_session(&session).await
715    }
716
717    /// Build a compact snapshot of the current history for on_pre_compress.
718    /// Keeps message role + first 200 chars per entry so the eruka write
719    /// stays bounded even with huge histories.
720    fn history_snapshot_for_eruka(history: &[Message]) -> String {
721        let mut out = String::with_capacity(2048);
722        for msg in history {
723            let prefix = match msg.role {
724                Role::User => "U: ",
725                Role::Assistant => "A: ",
726                Role::Tool => "T: ",
727                Role::System => "S: ",
728            };
729            let body: String = msg.content.chars().take(200).collect();
730            out.push_str(prefix);
731            out.push_str(&body);
732            out.push('\n');
733            if out.len() > 4000 {
734                break;
735            }
736        }
737        out
738    }
739
740    /// Get the configuration
741    pub fn config(&self) -> &PawanConfig {
742        &self.config
743    }
744
745    /// Clear the conversation history
746    pub fn clear_history(&mut self) {
747        self.history.clear();
748    }
749    /// Prune conversation history to reduce context size.
750    /// Uses importance scoring (inspired by claude-code-rust's consolidation engine):
751    /// - Tool results with errors: high importance (learning from failures)
752    /// - User messages: medium importance (intent context)
753    /// - Successful tool results: low importance (can be re-derived)
754    ///
755    /// Keeps system prompt + last 4 messages, summarizes the rest.
756    fn prune_history(&mut self) {
757        let len = self.history.len();
758        if len <= 5 {
759            return; // Nothing to prune
760        }
761
762        let keep_end = 4;
763        let start = 1; // Skip system prompt at index 0
764        let end = len - keep_end;
765        let pruned_count = end - start;
766
767        // Score messages by importance for summary prioritization
768        let mut scored: Vec<(f32, &Message)> = self.history[start..end]
769            .iter()
770            .map(|msg| {
771                let score = Self::message_importance(msg);
772                (score, msg)
773            })
774            .collect();
775        scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
776
777        // Build summary from highest-importance messages first (UTF-8 safe)
778        let mut summary = String::with_capacity(2048);
779        for (score, msg) in &scored {
780            let prefix = match msg.role {
781                Role::User => "User: ",
782                Role::Assistant => "Assistant: ",
783                Role::Tool => if *score > 0.7 { "Tool error: " } else { "Tool: " },
784                Role::System => "System: ",
785            };
786            let chunk: String = msg.content.chars().take(200).collect();
787            summary.push_str(prefix);
788            summary.push_str(&chunk);
789            summary.push('\n');
790            if summary.len() > 2000 {
791                let safe_end = summary.char_indices()
792                    .take_while(|(i, _)| *i <= 2000)
793                    .last()
794                    .map(|(i, c)| i + c.len_utf8())
795                    .unwrap_or(0);
796                summary.truncate(safe_end);
797                break;
798            }
799        }
800
801        let summary_msg = Message {
802            role: Role::System,
803            content: format!("Previous conversation summary (pruned {} messages, importance-ranked): {}", pruned_count, summary),
804            tool_calls: vec![],
805            tool_result: None,
806        };
807
808        self.history.drain(start..end);
809        self.history.insert(start, summary_msg);
810
811        tracing::info!(pruned = pruned_count, context_estimate = self.context_tokens_estimate, "Pruned messages from history (importance-ranked)");
812    }
813
814    /// Score a message's importance for pruning decisions (0.0-1.0).
815    /// Higher = more important = kept in summary.
816    fn message_importance(msg: &Message) -> f32 {
817        match msg.role {
818            Role::User => 0.6,       // User intent is moderately important
819            Role::System => 0.3,     // System messages are usually ephemeral
820            Role::Assistant => {
821                if msg.content.contains("error") || msg.content.contains("Error") { 0.8 }
822                else { 0.4 }
823            }
824            Role::Tool => {
825                if let Some(ref result) = msg.tool_result {
826                    if !result.success { 0.9 }  // Failed tools are very important (learning)
827                    else { 0.2 }                 // Successful tools can be re-derived
828                } else {
829                    0.3
830                }
831            }
832        }
833    }
834
835    /// Add a message to history
836    pub fn add_message(&mut self, message: Message) {
837        self.history.push(message);
838    }
839
840    /// Switch the LLM model at runtime. Recreates the backend with the new model.
841    pub fn switch_model(&mut self, model: &str) {
842        self.config.model = model.to_string();
843        let system_prompt = self.config.get_system_prompt();
844        self.backend = Self::create_backend(&self.config, &system_prompt);
845        tracing::info!(model = model, "Model switched at runtime");
846    }
847
848    /// Get the current model name
849    pub fn model_name(&self) -> &str {
850        &self.config.model
851    }
852
853    /// Get tool definitions for the LLM
854    pub fn get_tool_definitions(&self) -> Vec<ToolDefinition> {
855        self.tools.get_definitions()
856    }
857
858    /// Execute a single prompt with tool calling support
859    pub async fn execute(&mut self, user_prompt: &str) -> Result<AgentResponse> {
860        self.execute_with_callbacks(user_prompt, None, None, None)
861            .await
862    }
863
864    /// Execute with optional callbacks for streaming
865    pub async fn execute_with_callbacks(
866        &mut self,
867        user_prompt: &str,
868        on_token: Option<TokenCallback>,
869        on_tool: Option<ToolCallback>,
870        on_tool_start: Option<ToolStartCallback>,
871    ) -> Result<AgentResponse> {
872        self.execute_with_all_callbacks(user_prompt, on_token, on_tool, on_tool_start, None)
873            .await
874    }
875
876    /// Execute with all callbacks, including permission prompt.
877    pub async fn execute_with_all_callbacks(
878        &mut self,
879        user_prompt: &str,
880        on_token: Option<TokenCallback>,
881        on_tool: Option<ToolCallback>,
882        on_tool_start: Option<ToolStartCallback>,
883        on_permission: Option<PermissionCallback>,
884    ) -> Result<AgentResponse> {
885        // Check if coordinator mode is enabled
886        if self.config.use_coordinator {
887            // Coordinator mode does not support callbacks or permission prompts
888            if on_token.is_some() || on_tool.is_some() || on_tool_start.is_some() || on_permission.is_some() {
889                tracing::warn!(
890                    "Callbacks and permission prompts are not supported in coordinator mode; ignoring them"
891                );
892            }
893            return self.execute_with_coordinator(user_prompt).await;
894        }
895
896        // Reset idle timeout for the new turn
897        self.last_tool_call_time = None;
898
899        // Inject Eruka core memory before first LLM call
900        if let Some(eruka) = &self.eruka {
901            if let Err(e) = eruka.inject_core_memory(&mut self.history).await {
902                tracing::warn!("Eruka memory injection failed (non-fatal): {}", e);
903            }
904
905            // Prefetch task-relevant context: semantic search + compressed
906            // general context. Inject as a system message so the LLM can
907            // draw on prior-session context for the same query. Non-fatal.
908            match eruka.prefetch(user_prompt, 2000).await {
909                Ok(Some(ctx)) => {
910                    self.history.push(Message {
911                        role: Role::System,
912                        content: ctx,
913                        tool_calls: vec![],
914                        tool_result: None,
915                    });
916                }
917                Ok(None) => {}
918                Err(e) => tracing::warn!("Eruka prefetch failed (non-fatal): {}", e),
919            }
920        }
921
922        // Per-turn architecture context injection: prepend .pawan/arch.md content
923        // so key constraints stay visible even as tool-call history grows long.
924        let effective_prompt = match &self.arch_context {
925            Some(ctx) => format!(
926                "[Workspace Architecture]\n{ctx}\n[/Workspace Architecture]\n\n{user_prompt}"
927            ),
928            None => user_prompt.to_string(),
929        };
930
931        self.history.push(Message {
932            role: Role::User,
933            content: effective_prompt,
934            tool_calls: vec![],
935            tool_result: None,
936        });
937
938        let mut all_tool_calls = Vec::new();
939        let mut total_usage = TokenUsage::default();
940        let mut iterations = 0;
941        let max_iterations = self.config.max_tool_iterations;
942
943        loop {
944            // Check idle timeout
945            if let Some(last_time) = self.last_tool_call_time {
946                let elapsed = last_time.elapsed().as_secs();
947                if elapsed > self.config.tool_call_idle_timeout_secs {
948                    return Err(PawanError::Agent(format!(
949                        "Tool idle timeout exceeded ({}s > {}s)",
950                        elapsed, self.config.tool_call_idle_timeout_secs
951                    )));
952                }
953            }
954
955            iterations += 1;
956            if iterations > max_iterations {
957                return Err(PawanError::Agent(format!(
958                    "Max tool iterations ({}) exceeded",
959                    max_iterations
960                )));
961            }
962
963            // Budget awareness: when running low on iterations, nudge the model
964            let remaining = max_iterations.saturating_sub(iterations);
965            if remaining == 3 && iterations > 1 {
966                self.history.push(Message {
967                    role: Role::User,
968                    content: format!(
969                        "[SYSTEM] You have {} tool iterations remaining. \
970                         Stop exploring and write the most important output now. \
971                         If you have code to write, write it immediately.",
972                        remaining
973                    ),
974                    tool_calls: vec![],
975                    tool_result: None,
976                });
977            }
978            // Estimate context tokens
979            self.context_tokens_estimate = self.history.iter().map(|m| m.content.len()).sum::<usize>() / 4;
980            if self.context_tokens_estimate > self.config.max_context_tokens {
981                // Snapshot pre-compression content to Eruka so the facts
982                // being discarded survive the prune. Non-fatal.
983                if let Some(eruka) = &self.eruka {
984                    let snapshot = Self::history_snapshot_for_eruka(&self.history);
985                    if let Err(e) = eruka.on_pre_compress(&snapshot, &self.session_id).await {
986                        tracing::warn!("Eruka on_pre_compress failed (non-fatal): {}", e);
987                    }
988                }
989                self.prune_history();
990            }
991
992            // Dynamic tool selection: pick the most relevant tools for this query
993            // Extract latest user message for keyword matching
994            let latest_query = self.history.iter().rev()
995                .find(|m| m.role == Role::User)
996                .map(|m| m.content.as_str())
997                .unwrap_or("");
998            let tool_defs = self.tools.select_for_query(latest_query, 12);
999            if iterations == 1 {
1000                let tool_names: Vec<&str> = tool_defs.iter().map(|t| t.name.as_str()).collect();
1001                tracing::info!(tools = ?tool_names, count = tool_defs.len(), "Selected tools for query");
1002            }
1003
1004            // Update idle timeout tracker before LLM call to track time spent in generation
1005            self.last_tool_call_time = Some(Instant::now());
1006
1007            // --- Resilient LLM call: retry on transient failures instead of crashing ---
1008            let response = {
1009                #[allow(unused_assignments)]
1010                let mut last_err = None;
1011                let max_llm_retries = 3;
1012                let mut attempt = 0;
1013                loop {
1014                    attempt += 1;
1015                    match self.backend.generate(&self.history, &tool_defs, on_token.as_ref()).await {
1016                        Ok(resp) => break resp,
1017                        Err(e) => {
1018                            let err_str = e.to_string();
1019                            let is_transient = err_str.contains("timeout")
1020                                || err_str.contains("connection")
1021                                || err_str.contains("429")
1022                                || err_str.contains("500")
1023                                || err_str.contains("502")
1024                                || err_str.contains("503")
1025                                || err_str.contains("504")
1026                                || err_str.contains("reset")
1027                                || err_str.contains("broken pipe");
1028
1029                            if is_transient && attempt <= max_llm_retries {
1030                                let delay = std::time::Duration::from_secs(2u64.pow(attempt as u32));
1031                                tracing::warn!(
1032                                    attempt = attempt,
1033                                    delay_secs = delay.as_secs(),
1034                                    error = err_str.as_str(),
1035                                    "LLM call failed (transient) — retrying"
1036                                );
1037                                tokio::time::sleep(delay).await;
1038
1039                                // If context is too large, prune before retry
1040                                if err_str.contains("context") || err_str.contains("token") {
1041                                    tracing::info!("Pruning history before retry (possible context overflow)");
1042                                    if let Some(eruka) = &self.eruka {
1043                                        let snapshot = Self::history_snapshot_for_eruka(&self.history);
1044                                        if let Err(e) = eruka.on_pre_compress(&snapshot, &self.session_id).await {
1045                                            tracing::warn!("Eruka on_pre_compress failed (non-fatal): {}", e);
1046                                        }
1047                                    }
1048                                    self.prune_history();
1049                                }
1050                                continue;
1051                            }
1052
1053                            // Non-transient or max retries exhausted
1054                            last_err = Some(e);
1055                            break {
1056                                // Return a synthetic "give up" response instead of crashing
1057                                tracing::error!(
1058                                    attempt = attempt,
1059                                    error = last_err.as_ref().map(|e| e.to_string()).unwrap_or_default().as_str(),
1060                                    "LLM call failed permanently — returning error as content"
1061                                );
1062                                LLMResponse {
1063                                    content: format!(
1064                                        "LLM error after {} attempts: {}. The task could not be completed.",
1065                                        attempt,
1066                                        last_err.as_ref().map(|e| e.to_string()).unwrap_or_default()
1067                                    ),
1068                                    reasoning: None,
1069                                    tool_calls: vec![],
1070                                    finish_reason: "error".to_string(),
1071                                    usage: None,
1072                                }
1073                            };
1074                        }
1075                    }
1076                }
1077            };
1078
1079            // Accumulate token usage with thinking/action split
1080            if let Some(ref usage) = response.usage {
1081                total_usage.prompt_tokens += usage.prompt_tokens;
1082                total_usage.completion_tokens += usage.completion_tokens;
1083                total_usage.total_tokens += usage.total_tokens;
1084                total_usage.reasoning_tokens += usage.reasoning_tokens;
1085                total_usage.action_tokens += usage.action_tokens;
1086
1087                // Log token budget split per iteration
1088                if usage.reasoning_tokens > 0 {
1089                    tracing::info!(
1090                        iteration = iterations,
1091                        think = usage.reasoning_tokens,
1092                        act = usage.action_tokens,
1093                        total = usage.completion_tokens,
1094                        "Token budget: think:{} act:{} (total:{})",
1095                        usage.reasoning_tokens, usage.action_tokens, usage.completion_tokens
1096                    );
1097                }
1098
1099                // Thinking budget enforcement
1100                let thinking_budget = self.config.thinking_budget;
1101                if thinking_budget > 0 && usage.reasoning_tokens > thinking_budget as u64 {
1102                    tracing::warn!(
1103                        budget = thinking_budget,
1104                        actual = usage.reasoning_tokens,
1105                        "Thinking budget exceeded ({}/{} tokens)",
1106                        usage.reasoning_tokens, thinking_budget
1107                    );
1108                }
1109            }
1110
1111            // --- Guardrail: strip thinking blocks from content ---
1112            let clean_content = {
1113                let mut s = response.content.clone();
1114                loop {
1115                    let lower = s.to_lowercase();
1116                    let open = lower.find("<think>");
1117                    let close = lower.find("</think>");
1118                    match (open, close) {
1119                        (Some(i), Some(j)) if j > i => {
1120                            let before = s[..i].trim_end().to_string();
1121                            let after = if s.len() > j + 8 { s[j + 8..].trim_start().to_string() } else { String::new() };
1122                            s = if before.is_empty() { after } else if after.is_empty() { before } else { format!("{}\n{}", before, after) };
1123                        }
1124                        _ => break,
1125                    }
1126                }
1127                s
1128            };
1129
1130            if response.tool_calls.is_empty() {
1131                // --- Guardrail: detect chatty no-op (content but no tools on early iterations) ---
1132                // Only nudge if tools are available AND response looks like planning text (not a real answer)
1133                let has_tools = !tool_defs.is_empty();
1134                let lower = clean_content.to_lowercase();
1135                let planning_prefix = lower.starts_with("let me")
1136                    || lower.starts_with("i'll help")
1137                    || lower.starts_with("i will help")
1138                    || lower.starts_with("sure, i")
1139                    || lower.starts_with("okay, i");
1140                let looks_like_planning = clean_content.len() > 200 || (planning_prefix && clean_content.len() > 50);
1141                if has_tools && looks_like_planning && iterations == 1 && iterations < max_iterations && response.finish_reason != "error" {
1142                    tracing::warn!(
1143                        "No tool calls at iteration {} (content: {}B) — nudging model to use tools",
1144                        iterations, clean_content.len()
1145                    );
1146                    self.history.push(Message {
1147                        role: Role::Assistant,
1148                        content: clean_content.clone(),
1149                        tool_calls: vec![],
1150                        tool_result: None,
1151                    });
1152                    self.history.push(Message {
1153                        role: Role::User,
1154                        content: "You must use tools to complete this task. Do NOT just describe what you would do — actually call the tools. Start with bash or read_file.".to_string(),
1155                        tool_calls: vec![],
1156                        tool_result: None,
1157                    });
1158                    continue;
1159                }
1160
1161                // --- Guardrail: detect repeated responses ---
1162                if iterations > 1 {
1163                    let prev_assistant = self.history.iter().rev()
1164                        .find(|m| m.role == Role::Assistant && !m.content.is_empty());
1165                    if let Some(prev) = prev_assistant {
1166                        if prev.content.trim() == clean_content.trim() && iterations < max_iterations {
1167                            tracing::warn!("Repeated response detected at iteration {} — injecting correction", iterations);
1168                            self.history.push(Message {
1169                                role: Role::Assistant,
1170                                content: clean_content.clone(),
1171                                tool_calls: vec![],
1172                                tool_result: None,
1173                            });
1174                            self.history.push(Message {
1175                                role: Role::User,
1176                                content: "You gave the same response as before. Try a different approach. Use anchor_text in edit_file_lines, or use insert_after, or use bash with sed.".to_string(),
1177                                tool_calls: vec![],
1178                                tool_result: None,
1179                            });
1180                            continue;
1181                        }
1182                    }
1183                }
1184
1185                self.history.push(Message {
1186                    role: Role::Assistant,
1187                    content: clean_content.clone(),
1188                    tool_calls: vec![],
1189                    tool_result: None,
1190                });
1191
1192                // Persist this completed turn to Eruka so future prefetches
1193                // and sessions can pull from it. Non-fatal on any error.
1194                if let Some(eruka) = &self.eruka {
1195                    if let Err(e) = eruka
1196                        .sync_turn(user_prompt, &clean_content, &self.session_id)
1197                        .await
1198                    {
1199                        tracing::warn!("Eruka sync_turn failed (non-fatal): {}", e);
1200                    }
1201                }
1202
1203                return Ok(AgentResponse {
1204                    content: clean_content,
1205                    tool_calls: all_tool_calls,
1206                    iterations,
1207                    usage: total_usage,
1208                });
1209            }
1210
1211            self.history.push(Message {
1212                role: Role::Assistant,
1213                content: response.content.clone(),
1214                tool_calls: response.tool_calls.clone(),
1215                tool_result: None,
1216            });
1217
1218            for tool_call in &response.tool_calls {
1219                // Auto-activate extended tools on first use (makes them visible in next iteration)
1220                self.tools.activate(&tool_call.name);
1221
1222                // Check permission (Deny and Prompt-in-headless both block)
1223                let perm = crate::config::ToolPermission::resolve(
1224                    &tool_call.name, &self.config.permissions
1225                );
1226                let denied = match perm {
1227                    crate::config::ToolPermission::Deny => Some("Tool denied by permission policy"),
1228                    crate::config::ToolPermission::Prompt => {
1229                        // For bash: auto-allow read-only commands even under Prompt
1230                        if tool_call.name == "bash" {
1231                            if let Some(cmd) = tool_call.arguments.get("command").and_then(|v| v.as_str()) {
1232                                if crate::tools::bash::is_read_only(cmd) {
1233                                    tracing::debug!(command = cmd, "Auto-allowing read-only bash command under Prompt permission");
1234                                    None
1235                                } else if let Some(ref perm_cb) = on_permission {
1236                                    // Ask TUI for approval
1237                                    let args_summary = cmd.chars().take(120).collect::<String>();
1238                                    let rx = perm_cb(PermissionRequest {
1239                                        tool_name: tool_call.name.clone(),
1240                                        args_summary,
1241                                    });
1242                                    match rx.await {
1243                                        Ok(true) => None,
1244                                        _ => Some("User denied tool execution"),
1245                                    }
1246                                } else {
1247                                    Some("Bash command requires user approval (read-only commands auto-allowed)")
1248                                }
1249                            } else {
1250                                Some("Tool requires user approval")
1251                            }
1252                        } else if let Some(ref perm_cb) = on_permission {
1253                            // Ask TUI for approval
1254                            let args_summary = tool_call.arguments.to_string().chars().take(120).collect::<String>();
1255                            let rx = perm_cb(PermissionRequest {
1256                                tool_name: tool_call.name.clone(),
1257                                args_summary,
1258                            });
1259                            match rx.await {
1260                                Ok(true) => None,
1261                                _ => Some("User denied tool execution"),
1262                            }
1263                        } else {
1264                            // Headless = deny for safety
1265                            Some("Tool requires user approval (set permission to 'allow' or use TUI mode)")
1266                        }
1267                    }
1268                    crate::config::ToolPermission::Allow => None,
1269                };
1270                if let Some(reason) = denied {
1271                    let record = ToolCallRecord {
1272                        id: tool_call.id.clone(),
1273                        name: tool_call.name.clone(),
1274                        arguments: tool_call.arguments.clone(),
1275                        result: json!({"error": reason}),
1276                        success: false,
1277                        duration_ms: 0,
1278                    };
1279
1280                    if let Some(ref callback) = on_tool {
1281                        callback(&record);
1282                    }
1283                    all_tool_calls.push(record);
1284
1285                    self.history.push(Message {
1286                        role: Role::Tool,
1287                        content: format!("{{\"error\": \"{}\"}}", reason),
1288                        tool_calls: vec![],
1289                        tool_result: Some(ToolResultMessage {
1290                            tool_call_id: tool_call.id.clone(),
1291                            content: json!({"error": reason}),
1292                            success: false,
1293                        }),
1294                    });
1295                    continue;
1296                }
1297
1298                // Notify tool start
1299                if let Some(ref callback) = on_tool_start {
1300                    callback(&tool_call.name);
1301                }
1302
1303                // Debug: log tool call args for diagnosis
1304                tracing::debug!(
1305                    tool = tool_call.name.as_str(),
1306                    args_len = serde_json::to_string(&tool_call.arguments).unwrap_or_default().len(),
1307                    "Tool call: {}({})",
1308                    tool_call.name,
1309                    serde_json::to_string(&tool_call.arguments)
1310                        .unwrap_or_default()
1311                        .chars()
1312                        .take(200)
1313                        .collect::<String>()
1314                );
1315
1316                // Validate tool arguments using thulp-core (DRY: reuse thulp's validation)
1317                if let Some(tool) = self.tools.get(&tool_call.name) {
1318                    let schema = tool.parameters_schema();
1319                    if let Ok(params) = thulp_core::ToolDefinition::parse_mcp_input_schema(&schema) {
1320                        let thulp_def = thulp_core::ToolDefinition {
1321                            name: tool_call.name.clone(),
1322                            description: String::new(),
1323                            parameters: params,
1324                        };
1325                        if let Err(e) = thulp_def.validate_args(&tool_call.arguments) {
1326                            tracing::warn!(
1327                                tool = tool_call.name.as_str(),
1328                                error = %e,
1329                                "Tool argument validation failed (continuing anyway)"
1330                            );
1331                        }
1332                    }
1333                }
1334
1335                let start = std::time::Instant::now();
1336
1337                // Check permission for mutating tools
1338                let tool = self.tools.get(&tool_call.name);
1339                let is_mutating = tool.map(|t| t.mutating()).unwrap_or(false);
1340                if is_mutating {
1341                    if let Some(ref callback) = on_permission {
1342                        let args_summary = summarize_args(&tool_call.arguments);
1343                        let request = PermissionRequest {
1344                            tool_name: tool_call.name.clone(),
1345                            args_summary,
1346                        };
1347                        let permission_rx = (callback)(request);
1348                        match permission_rx.await {
1349                            Ok(true) => {
1350                                // Permission granted, continue with execution
1351                            }
1352                            Ok(false) => {
1353                                // Permission denied, skip this tool call
1354                                tracing::info!(tool = tool_call.name.as_str(), "Tool execution denied by user");
1355                                let record = ToolCallRecord {
1356                                    id: tool_call.id.clone(),
1357                                    name: tool_call.name.clone(),
1358                                    arguments: tool_call.arguments.clone(),
1359                                    result: json!({"error": "Tool execution denied by user", "tool": tool_call.name}),
1360                                    success: false,
1361                                    duration_ms: 0,
1362                                };
1363                                if let Some(ref callback) = on_tool {
1364                                    callback(&record);
1365                                }
1366                                continue;
1367                            }
1368                            Err(_) => {
1369                                let record = ToolCallRecord {
1370                                    id: tool_call.id.clone(),
1371                                    name: tool_call.name.clone(),
1372                                    arguments: tool_call.arguments.clone(),
1373                                    result: json!({"error": "Permission channel closed", "tool": tool_call.name}),
1374                                    success: false,
1375                                    duration_ms: 0,
1376                                };
1377                                if let Some(ref callback) = on_tool {
1378                                    callback(&record);
1379                                }
1380                                continue;
1381                            }
1382                        }
1383                    } else {
1384                        tracing::warn!(tool = tool_call.name.as_str(), "No permission callback, auto-approving mutating tool");
1385                    }
1386                }
1387
1388                // Resilient tool execution: catch panics + errors
1389                let result = {
1390                    let tool_future = self.tools.execute(&tool_call.name, tool_call.arguments.clone());
1391                    // Timeout individual tool calls (prevent hangs)
1392                    let timeout_dur = if tool_call.name == "bash" {
1393                        std::time::Duration::from_secs(self.config.bash_timeout_secs)
1394                    } else {
1395                        std::time::Duration::from_secs(30)
1396                    };
1397                    match tokio::time::timeout(timeout_dur, tool_future).await {
1398                        Ok(inner) => inner,
1399                        Err(_) => Err(PawanError::Tool(format!(
1400                            "Tool '{}' timed out after {}s", tool_call.name, timeout_dur.as_secs()
1401                        ))),
1402                    }
1403                };
1404                let duration_ms = start.elapsed().as_millis() as u64;
1405
1406                let (result_value, success) = match result {
1407                    Ok(v) => (v, true),
1408                    Err(e) => {
1409                        tracing::warn!(tool = tool_call.name.as_str(), error = %e, "Tool execution failed");
1410                        (json!({"error": e.to_string(), "tool": tool_call.name, "hint": "Try a different approach or tool"}), false)
1411                    }
1412                };
1413
1414                // Truncate tool results that exceed max chars to prevent context bloat
1415                let max_result_chars = self.config.max_result_chars;
1416                let result_value = truncate_tool_result(result_value, max_result_chars);
1417
1418
1419                let record = ToolCallRecord {
1420                    id: tool_call.id.clone(),
1421                    name: tool_call.name.clone(),
1422                    arguments: tool_call.arguments.clone(),
1423                    result: result_value.clone(),
1424                    success,
1425                    duration_ms,
1426                };
1427
1428                if let Some(ref callback) = on_tool {
1429                    callback(&record);
1430                }
1431
1432                all_tool_calls.push(record);
1433
1434                self.history.push(Message {
1435                    role: Role::Tool,
1436                    content: serde_json::to_string(&result_value).unwrap_or_default(),
1437                    tool_calls: vec![],
1438                    tool_result: Some(ToolResultMessage {
1439                        tool_call_id: tool_call.id.clone(),
1440                        content: result_value,
1441                        success,
1442                    }),
1443                });
1444
1445                // Compile-gated confidence: after writing a .rs file, auto-run cargo check
1446                // and inject the result so the model can self-correct on the same iteration
1447                if success && tool_call.name == "write_file" {
1448                    let wrote_rs = tool_call.arguments.get("path")
1449                        .and_then(|p| p.as_str())
1450                        .map(|p| p.ends_with(".rs"))
1451                        .unwrap_or(false);
1452                    if wrote_rs {
1453                        let ws = self.workspace_root.clone();
1454                        let check_result = tokio::process::Command::new("cargo")
1455                            .arg("check")
1456                            .arg("--message-format=short")
1457                            .current_dir(&ws)
1458                            .output()
1459                            .await;
1460                        match check_result {
1461                            Ok(output) if !output.status.success() => {
1462                                let stderr = String::from_utf8_lossy(&output.stderr);
1463                                // Only inject first 1500 chars of errors to avoid context bloat
1464                                let err_msg: String = stderr.chars().take(1500).collect();
1465                                tracing::info!("Compile-gate: cargo check failed after write_file, injecting errors");
1466                                self.history.push(Message {
1467                                    role: Role::User,
1468                                    content: format!(
1469                                        "[SYSTEM] cargo check failed after your write_file. Fix the errors:\n```\n{}\n```",
1470                                        err_msg
1471                                    ),
1472                                    tool_calls: vec![],
1473                                    tool_result: None,
1474                                });
1475                            }
1476                            Ok(_) => {
1477                                tracing::debug!("Compile-gate: cargo check passed");
1478                            }
1479                            Err(e) => {
1480                                tracing::warn!("Compile-gate: cargo check failed to run: {}", e);
1481                            }
1482                        }
1483                    }
1484                }
1485            }
1486        }
1487    }
1488
1489    /// Execute using the ToolCoordinator instead of the built-in loop.
1490    ///
1491    /// This method provides an alternative implementation that uses the
1492    /// ToolCoordinator for tool-calling loops, which offers:
1493    /// - Parallel tool execution
1494    /// - Per-tool timeout handling
1495    /// - Consistent error handling
1496    /// - Max iteration limits
1497    ///
1498    /// Note: This method does not support streaming callbacks or permission
1499    /// prompts - those are only available in the built-in loop.
1500    async fn execute_with_coordinator(&mut self, user_prompt: &str) -> Result<AgentResponse> {
1501        // Reset idle timeout for the new turn
1502        self.last_tool_call_time = None;
1503
1504        // Inject Eruka core memory before first LLM call
1505        if let Some(eruka) = &self.eruka {
1506            if let Err(e) = eruka.inject_core_memory(&mut self.history).await {
1507                tracing::warn!("Eruka memory injection failed (non-fatal): {}", e);
1508            }
1509
1510            // Prefetch task-relevant context
1511            match eruka.prefetch(user_prompt, 2000).await {
1512                Ok(Some(ctx)) => {
1513                    self.history.push(Message {
1514                        role: Role::System,
1515                        content: ctx,
1516                        tool_calls: vec![],
1517                        tool_result: None,
1518                    });
1519                }
1520                Ok(None) => {}
1521                Err(e) => tracing::warn!("Eruka prefetch failed (non-fatal): {}", e),
1522            }
1523        }
1524
1525        // Per-turn architecture context injection
1526        let effective_prompt = match &self.arch_context {
1527            Some(ctx) => format!(
1528                "[Workspace Architecture]\n{ctx}\n[/Workspace Architecture]\n\n{user_prompt}"
1529            ),
1530            None => user_prompt.to_string(),
1531        };
1532
1533        // Build coordinator config from agent config
1534        let coordinator_config = ToolCallingConfig {
1535            max_iterations: self.config.max_tool_iterations,
1536            parallel_execution: true,
1537            tool_timeout: std::time::Duration::from_secs(self.config.bash_timeout_secs),
1538            stop_on_error: false,
1539        };
1540
1541        // Create a fresh backend for coordinator execution
1542        let system_prompt = self.config.get_system_prompt();
1543        let backend = Self::create_backend(&self.config, &system_prompt);
1544        let backend = Arc::from(backend);
1545
1546        // Create a fresh tool registry for coordinator execution
1547        // Note: This will not include any MCP tools registered at runtime
1548        let registry = Arc::new(ToolRegistry::with_defaults(self.workspace_root.clone()));
1549
1550        // Create coordinator with backend and tool registry
1551        let coordinator = ToolCoordinator::new(backend, registry, coordinator_config);
1552
1553        // Execute with coordinator
1554        let result: CoordinatorResult = coordinator
1555            .execute(Some(&system_prompt), &effective_prompt)
1556            .await
1557            .map_err(|e| PawanError::Agent(format!("Coordinator execution failed: {}", e)))?;
1558
1559        // Convert CoordinatorResult to AgentResponse
1560        let content = result.content.clone();
1561        let agent_response = AgentResponse {
1562            content: result.content,
1563            tool_calls: result.tool_calls,
1564            iterations: result.iterations,
1565            usage: result.total_usage,
1566        };
1567
1568        // Sync turn to Eruka if enabled
1569        if let Some(eruka) = &self.eruka {
1570            if let Err(e) = eruka
1571                .sync_turn(user_prompt, &content, &self.session_id)
1572                .await
1573            {
1574                tracing::warn!("Eruka sync_turn failed (non-fatal): {}", e);
1575            }
1576        }
1577
1578        Ok(agent_response)
1579    }
1580
1581    /// Execute a healing task with real diagnostics
1582    pub async fn heal(&mut self) -> Result<AgentResponse> {
1583        let healer = crate::healing::Healer::new(
1584            self.workspace_root.clone(),
1585            self.config.healing.clone(),
1586        );
1587
1588        let diagnostics = healer.get_diagnostics().await?;
1589        let failed_tests = healer.get_failed_tests().await?;
1590
1591        let mut prompt = format!(
1592            "I need you to heal this Rust project at: {}
1593
1594",
1595            self.workspace_root.display()
1596        );
1597
1598        if !diagnostics.is_empty() {
1599            prompt.push_str(&format!(
1600                "## Compilation Issues ({} found)
1601{}
1602",
1603                diagnostics.len(),
1604                healer.format_diagnostics_for_prompt(&diagnostics)
1605            ));
1606        }
1607
1608        if !failed_tests.is_empty() {
1609            prompt.push_str(&format!(
1610                "## Failed Tests ({} found)
1611{}
1612",
1613                failed_tests.len(),
1614                healer.format_tests_for_prompt(&failed_tests)
1615            ));
1616        }
1617
1618        if diagnostics.is_empty() && failed_tests.is_empty() {
1619            prompt.push_str("No issues found! Run cargo check and cargo test to verify.
1620");
1621        }
1622
1623        prompt.push_str("
1624Fix each issue one at a time. Verify with cargo check after each fix.");
1625
1626        self.execute(&prompt).await
1627    }
1628    /// Execute healing with retries — calls heal(), checks for remaining errors, retries if needed.
1629    ///
1630    /// Two-stage gate:
1631    ///   Stage 1 — `cargo check`: must produce zero errors before proceeding.
1632    ///   Stage 2 — `healing.verify_cmd` (optional): a user-supplied shell command
1633    ///             (e.g. `cargo test --workspace`).  If it exits non-zero the loop
1634    ///             continues so the LLM can address the reported failures.
1635    ///
1636    /// Anti-thrash guard: each Stage-1 error is fingerprinted (kind + code +
1637    /// message prefix).  If the same fingerprint survives `max_attempts`
1638    /// consecutive rounds unchanged the loop halts rather than spinning
1639    /// indefinitely on an error the LLM cannot fix.
1640    pub async fn heal_with_retries(&mut self, max_attempts: usize) -> Result<AgentResponse> {
1641        use std::collections::{HashMap, HashSet};
1642
1643        let mut last_response = self.heal().await?;
1644        // fingerprint → consecutive rounds this error has survived unchanged
1645        let mut stuck_counts: HashMap<u64, usize> = HashMap::new();
1646
1647        for attempt in 1..max_attempts {
1648            // Stage 1: cargo check must be error-free
1649            let fixer = crate::healing::CompilerFixer::new(self.workspace_root.clone());
1650            let remaining = fixer.check().await?;
1651            let errors: Vec<_> = remaining
1652                .iter()
1653                .filter(|d| d.kind == crate::healing::DiagnosticKind::Error)
1654                .collect();
1655
1656            if !errors.is_empty() {
1657                // Update fingerprint counts.
1658                // Drop entries for errors that were fixed; increment survivors.
1659                let current_fps: HashSet<u64> = errors.iter().map(|d| d.fingerprint()).collect();
1660                stuck_counts.retain(|fp, _| current_fps.contains(fp));
1661                for fp in &current_fps {
1662                    *stuck_counts.entry(*fp).or_insert(0) += 1;
1663                }
1664
1665                // Anti-thrash: halt if any error fingerprint has not budged
1666                // after max_attempts consecutive rounds.
1667                let thrashing: Vec<u64> = stuck_counts
1668                    .iter()
1669                    .filter_map(|(&fp, &count)| if count >= max_attempts { Some(fp) } else { None })
1670                    .collect();
1671                if !thrashing.is_empty() {
1672                    tracing::warn!(
1673                        stuck_fingerprints = thrashing.len(),
1674                        attempt,
1675                        "Anti-thrash: {} error(s) unchanged after {} attempts, halting heal loop",
1676                        thrashing.len(),
1677                        max_attempts
1678                    );
1679                    return Ok(last_response);
1680                }
1681
1682                tracing::warn!(
1683                    errors = errors.len(),
1684                    attempt,
1685                    "Stage 1 (cargo check): errors remain, retrying"
1686                );
1687                last_response = self.heal().await?;
1688                continue;
1689            }
1690
1691            // All Stage-1 errors cleared — reset thrash counters.
1692            stuck_counts.clear();
1693
1694            // Stage 2: optional verify_cmd
1695            let verify_cmd = self.config.healing.verify_cmd.clone();
1696            if let Some(ref cmd) = verify_cmd {
1697                match crate::healing::run_verify_cmd(&self.workspace_root, cmd).await {
1698                    Ok(None) => {
1699                        tracing::info!(attempts = attempt, "Stage 2 (verify_cmd) passed, healing complete");
1700                        return Ok(last_response);
1701                    }
1702                    Ok(Some(diag)) => {
1703                        tracing::warn!(
1704                            attempt,
1705                            cmd,
1706                            output = diag.raw,
1707                            "Stage 2 (verify_cmd) failed, retrying"
1708                        );
1709                        last_response = self.heal().await?;
1710                        continue;
1711                    }
1712                    Err(e) => {
1713                        // Cannot spawn the command — don't block healing on this
1714                        tracing::warn!(cmd, error = %e, "verify_cmd could not be run, skipping stage 2");
1715                        return Ok(last_response);
1716                    }
1717                }
1718            } else {
1719                tracing::info!(attempts = attempt, "Stage 1 (cargo check) passed, healing complete");
1720                return Ok(last_response);
1721            }
1722        }
1723
1724        tracing::info!(attempts = max_attempts, "Healing finished (may still have errors)");
1725        Ok(last_response)
1726    }
1727    /// Execute a task with a specific prompt
1728    pub async fn task(&mut self, task_description: &str) -> Result<AgentResponse> {
1729        let prompt = format!(
1730            r#"I need you to complete the following coding task:
1731
1732{}
1733
1734The workspace is at: {}
1735
1736Please:
17371. First explore the codebase to understand the relevant code
17382. Make the necessary changes
17393. Verify the changes compile with `cargo check`
17404. Run relevant tests if applicable
1741
1742Explain your changes as you go."#,
1743            task_description,
1744            self.workspace_root.display()
1745        );
1746
1747        self.execute(&prompt).await
1748    }
1749
1750    /// Generate a commit message for current changes
1751    pub async fn generate_commit_message(&mut self) -> Result<String> {
1752        let prompt = r#"Please:
17531. Run `git status` to see what files are changed
17542. Run `git diff --cached` to see staged changes (or `git diff` for unstaged)
17553. Generate a concise, descriptive commit message following conventional commits format
1756
1757Only output the suggested commit message, nothing else."#;
1758
1759        let response = self.execute(prompt).await?;
1760        Ok(response.content)
1761    }
1762}
1763
1764/// Truncate a tool result JSON value to fit within max_chars.
1765/// Unlike naive string truncation (which breaks JSON), this truncates string
1766/// *values* within the JSON structure, preserving valid JSON output.
1767fn truncate_tool_result(value: Value, max_chars: usize) -> Value {
1768    let serialized = serde_json::to_string(&value).unwrap_or_default();
1769    if serialized.len() <= max_chars {
1770        return value;
1771    }
1772
1773    // Strategy: find the largest string values and truncate them
1774    match value {
1775        Value::Object(map) => {
1776            let mut result = serde_json::Map::new();
1777            let total = serialized.len();
1778            for (k, v) in map {
1779                if let Value::String(s) = &v {
1780                    if s.len() > 500 {
1781                        // Proportional truncation: shrink large strings
1782                        let target = s.len() * max_chars / total;
1783                        let target = target.max(200); // Keep at least 200 chars
1784                        let truncated: String = s.chars().take(target).collect();
1785                        result.insert(k, json!(format!("{}...[truncated from {} chars]", truncated, s.len())));
1786                        continue;
1787                    }
1788                }
1789                // Recursively truncate nested structures
1790                result.insert(k, truncate_tool_result(v, max_chars));
1791            }
1792            Value::Object(result)
1793        }
1794        Value::String(s) if s.len() > max_chars => {
1795            let truncated: String = s.chars().take(max_chars).collect();
1796            json!(format!("{}...[truncated from {} chars]", truncated, s.len()))
1797        }
1798        Value::Array(arr) if serialized.len() > max_chars => {
1799            // Truncate array: keep first N items that fit
1800            let mut result = Vec::new();
1801            let mut running_len = 2; // "[]"
1802            for item in arr {
1803                let item_str = serde_json::to_string(&item).unwrap_or_default();
1804                running_len += item_str.len() + 1; // +1 for comma
1805                if running_len > max_chars {
1806                    result.push(json!(format!("...[{} more items truncated]", 0)));
1807                    break;
1808                }
1809                result.push(item);
1810            }
1811            Value::Array(result)
1812        }
1813        other => other,
1814    }
1815}
1816
1817#[cfg(test)]
1818mod tests {
1819    use super::*;
1820    use std::sync::Arc;
1821    use crate::agent::backend::mock::{MockBackend, MockResponse};
1822
1823    #[test]
1824    fn test_message_serialization() {
1825        let msg = Message {
1826            role: Role::User,
1827            content: "Hello".to_string(),
1828            tool_calls: vec![],
1829            tool_result: None,
1830        };
1831
1832        let json = serde_json::to_string(&msg).expect("Serialization failed");
1833        assert!(json.contains("user"));
1834        assert!(json.contains("Hello"));
1835    }
1836
1837    #[test]
1838    fn test_tool_call_request() {
1839        let tc = ToolCallRequest {
1840            id: "123".to_string(),
1841            name: "read_file".to_string(),
1842            arguments: json!({"path": "test.txt"}),
1843        };
1844
1845        let json = serde_json::to_string(&tc).expect("Serialization failed");
1846        assert!(json.contains("read_file"));
1847        assert!(json.contains("test.txt"));
1848    }
1849
1850    /// Helper to build an agent with N messages for prune testing.
1851    /// History starts empty; we add a system prompt + (n-1) user/assistant messages = n total.
1852    fn agent_with_messages(n: usize) -> PawanAgent {
1853        let config = PawanConfig::default();
1854        let mut agent = PawanAgent::new(config, PathBuf::from("."));
1855        // Add system prompt as message 0
1856        agent.add_message(Message {
1857            role: Role::System,
1858            content: "System prompt".to_string(),
1859            tool_calls: vec![],
1860            tool_result: None,
1861        });
1862        for i in 1..n {
1863            agent.add_message(Message {
1864                role: if i % 2 == 1 { Role::User } else { Role::Assistant },
1865                content: format!("Message {}", i),
1866                tool_calls: vec![],
1867                tool_result: None,
1868            });
1869        }
1870        assert_eq!(agent.history().len(), n);
1871        agent
1872    }
1873
1874    #[test]
1875    fn test_prune_history_no_op_when_small() {
1876        let mut agent = agent_with_messages(5);
1877        agent.prune_history();
1878        assert_eq!(agent.history().len(), 5, "Should not prune <= 5 messages");
1879    }
1880
1881    #[test]
1882    fn test_prune_history_reduces_messages() {
1883        let mut agent = agent_with_messages(12);
1884        assert_eq!(agent.history().len(), 12);
1885        agent.prune_history();
1886        // Should keep: system prompt (1) + summary (1) + last 4 = 6
1887        assert_eq!(agent.history().len(), 6);
1888    }
1889
1890    #[test]
1891    fn test_prune_history_preserves_system_prompt() {
1892        let mut agent = agent_with_messages(10);
1893        let original_system = agent.history()[0].content.clone();
1894        agent.prune_history();
1895        assert_eq!(agent.history()[0].content, original_system, "System prompt must survive pruning");
1896    }
1897
1898    #[test]
1899    fn test_prune_history_preserves_last_messages() {
1900        let mut agent = agent_with_messages(10);
1901        // Last 4 messages are at indices 6..10 with content "Message 6".."Message 9"
1902        let last4: Vec<String> = agent.history()[6..10].iter().map(|m| m.content.clone()).collect();
1903        agent.prune_history();
1904        // After pruning: [system, summary, msg6, msg7, msg8, msg9]
1905        let after_last4: Vec<String> = agent.history()[2..6].iter().map(|m| m.content.clone()).collect();
1906        assert_eq!(last4, after_last4, "Last 4 messages must be preserved after pruning");
1907    }
1908
1909    #[test]
1910    fn test_prune_history_inserts_summary() {
1911        let mut agent = agent_with_messages(10);
1912        agent.prune_history();
1913        assert_eq!(agent.history()[1].role, Role::System);
1914        assert!(agent.history()[1].content.contains("summary"), "Summary message should contain 'summary'");
1915    }
1916
1917    #[test]
1918    fn test_prune_history_utf8_safe() {
1919        let config = PawanConfig::default();
1920        let mut agent = PawanAgent::new(config, PathBuf::from("."));
1921        // Add system prompt + 10 messages with multi-byte UTF-8 characters
1922        agent.add_message(Message {
1923            role: Role::System, content: "sys".into(), tool_calls: vec![], tool_result: None,
1924        });
1925        for _ in 0..10 {
1926            agent.add_message(Message {
1927                role: Role::User,
1928                content: "こんにちは世界 🌍 ".repeat(50),
1929                tool_calls: vec![],
1930                tool_result: None,
1931            });
1932        }
1933        // This should not panic on char boundary issues
1934        agent.prune_history();
1935        assert!(agent.history().len() < 11, "Should have pruned");
1936        // Verify summary is valid UTF-8
1937        let summary = &agent.history()[1].content;
1938        assert!(summary.is_char_boundary(0));
1939    }
1940
1941    #[test]
1942    fn test_prune_history_exactly_6_messages() {
1943        // 6 messages = 1 more than the no-op threshold of 5
1944        let mut agent = agent_with_messages(6);
1945        agent.prune_history();
1946        // Prunes 1 middle message, replaced by summary: system(1) + summary(1) + last 4 = 6
1947        assert_eq!(agent.history().len(), 6);
1948    }
1949
1950    #[test]
1951    fn test_message_role_roundtrip() {
1952        for role in [Role::User, Role::Assistant, Role::System, Role::Tool] {
1953            let json = serde_json::to_string(&role).unwrap();
1954            let back: Role = serde_json::from_str(&json).unwrap();
1955            assert_eq!(role, back);
1956        }
1957    }
1958
1959    #[test]
1960    fn test_agent_response_construction() {
1961        let resp = AgentResponse {
1962            content: String::new(),
1963            tool_calls: vec![],
1964            iterations: 3,
1965            usage: TokenUsage::default(),
1966        };
1967        assert!(resp.content.is_empty());
1968        assert!(resp.tool_calls.is_empty());
1969        assert_eq!(resp.iterations, 3);
1970    }
1971
1972    // --- truncate_tool_result tests ---
1973
1974    #[test]
1975    fn test_truncate_small_result_unchanged() {
1976        let val = json!({"success": true, "output": "hello"});
1977        let result = truncate_tool_result(val.clone(), 8000);
1978        assert_eq!(result, val);
1979    }
1980
1981    #[test]
1982    fn test_truncate_large_string_value() {
1983        let big = "x".repeat(10000);
1984        let val = json!({"stdout": big, "success": true});
1985        let result = truncate_tool_result(val, 2000);
1986        let stdout = result["stdout"].as_str().unwrap();
1987        assert!(stdout.len() < 10000, "Should be truncated");
1988        assert!(stdout.contains("truncated"), "Should indicate truncation");
1989    }
1990
1991    #[test]
1992    fn test_truncate_preserves_valid_json() {
1993        let big = "x".repeat(20000);
1994        let val = json!({"data": big, "meta": "keep"});
1995        let result = truncate_tool_result(val, 5000);
1996        // Result should be valid JSON (no broken strings)
1997        let serialized = serde_json::to_string(&result).unwrap();
1998        let _reparsed: Value = serde_json::from_str(&serialized).unwrap();
1999        // meta should be preserved (it's small)
2000        assert_eq!(result["meta"], "keep");
2001    }
2002
2003    #[test]
2004    fn test_truncate_bare_string() {
2005        let big = json!("x".repeat(10000));
2006        let result = truncate_tool_result(big, 500);
2007        let s = result.as_str().unwrap();
2008        assert!(s.len() <= 600); // 500 + truncation notice
2009        assert!(s.contains("truncated"));
2010    }
2011
2012    #[test]
2013    fn test_truncate_array() {
2014        let items: Vec<Value> = (0..1000).map(|i| json!(format!("item_{}", i))).collect();
2015        let val = Value::Array(items);
2016        let result = truncate_tool_result(val, 500);
2017        let arr = result.as_array().unwrap();
2018        assert!(arr.len() < 1000, "Array should be truncated");
2019    }
2020
2021    // --- message_importance tests ---
2022
2023    #[test]
2024    fn test_importance_failed_tool_highest() {
2025        let msg = Message {
2026            role: Role::Tool,
2027            content: "error".into(),
2028            tool_calls: vec![],
2029            tool_result: Some(ToolResultMessage {
2030                tool_call_id: "1".into(),
2031                content: json!({"error": "failed"}),
2032                success: false,
2033            }),
2034        };
2035        assert!(PawanAgent::message_importance(&msg) > 0.8, "Failed tools should be high importance");
2036    }
2037
2038    #[test]
2039    fn test_importance_successful_tool_lowest() {
2040        let msg = Message {
2041            role: Role::Tool,
2042            content: "ok".into(),
2043            tool_calls: vec![],
2044            tool_result: Some(ToolResultMessage {
2045                tool_call_id: "1".into(),
2046                content: json!({"success": true}),
2047                success: true,
2048            }),
2049        };
2050        assert!(PawanAgent::message_importance(&msg) < 0.3, "Successful tools should be low importance");
2051    }
2052
2053    #[test]
2054    fn test_importance_user_medium() {
2055        let msg = Message { role: Role::User, content: "hello".into(), tool_calls: vec![], tool_result: None };
2056        let score = PawanAgent::message_importance(&msg);
2057        assert!(score > 0.4 && score < 0.8, "User messages should be medium: {}", score);
2058    }
2059
2060    #[test]
2061    fn test_importance_error_assistant_high() {
2062        let msg = Message { role: Role::Assistant, content: "Error: something failed".into(), tool_calls: vec![], tool_result: None };
2063        assert!(PawanAgent::message_importance(&msg) > 0.7, "Error assistant messages should be high importance");
2064    }
2065
2066    #[test]
2067    fn test_importance_ordering() {
2068        let failed_tool = Message { role: Role::Tool, content: "err".into(), tool_calls: vec![], tool_result: Some(ToolResultMessage { tool_call_id: "1".into(), content: json!({}), success: false }) };
2069        let user = Message { role: Role::User, content: "hi".into(), tool_calls: vec![], tool_result: None };
2070        let ok_tool = Message { role: Role::Tool, content: "ok".into(), tool_calls: vec![], tool_result: Some(ToolResultMessage { tool_call_id: "2".into(), content: json!({}), success: true }) };
2071
2072        let f = PawanAgent::message_importance(&failed_tool);
2073        let u = PawanAgent::message_importance(&user);
2074        let s = PawanAgent::message_importance(&ok_tool);
2075        assert!(f > u && u > s, "Ordering should be: failed({}) > user({}) > success({})", f, u, s);
2076    }
2077
2078    // --- State management tests ---
2079
2080    #[test]
2081    fn test_agent_clear_history_removes_all() {
2082        let mut agent = agent_with_messages(8);
2083        assert_eq!(agent.history().len(), 8);
2084        agent.clear_history();
2085        assert_eq!(agent.history().len(), 0, "clear_history should drop every message");
2086    }
2087
2088    #[test]
2089    fn test_agent_add_message_appends_in_order() {
2090        let config = PawanConfig::default();
2091        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2092        assert_eq!(agent.history().len(), 0);
2093
2094        let first = Message {
2095            role: Role::User,
2096            content: "first".into(),
2097            tool_calls: vec![],
2098            tool_result: None,
2099        };
2100        let second = Message {
2101            role: Role::Assistant,
2102            content: "second".into(),
2103            tool_calls: vec![],
2104            tool_result: None,
2105        };
2106        agent.add_message(first);
2107        agent.add_message(second);
2108
2109        assert_eq!(agent.history().len(), 2);
2110        assert_eq!(agent.history()[0].content, "first");
2111        assert_eq!(agent.history()[1].content, "second");
2112        assert_eq!(agent.history()[0].role, Role::User);
2113        assert_eq!(agent.history()[1].role, Role::Assistant);
2114    }
2115
2116    #[test]
2117    fn test_agent_switch_model_updates_name() {
2118        let config = PawanConfig::default();
2119        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2120        let original = agent.model_name().to_string();
2121
2122        agent.switch_model("gpt-oss-120b");
2123        assert_eq!(agent.model_name(), "gpt-oss-120b");
2124        assert_ne!(
2125            agent.model_name(),
2126            original,
2127            "switch_model should change model_name"
2128        );
2129    }
2130
2131    #[test]
2132    fn test_agent_with_tools_replaces_registry() {
2133        let config = PawanConfig::default();
2134        let agent = PawanAgent::new(config, PathBuf::from("."));
2135        let original_tool_count = agent.get_tool_definitions().len();
2136
2137        // Build a fresh empty registry
2138        let empty = ToolRegistry::new();
2139        let agent = agent.with_tools(empty);
2140        assert_eq!(
2141            agent.get_tool_definitions().len(),
2142            0,
2143            "with_tools(empty) should drop default registry (had {} tools)",
2144            original_tool_count
2145        );
2146    }
2147
2148    #[test]
2149    fn test_agent_get_tool_definitions_returns_deterministic_set() {
2150        // Fresh agent should expose a stable, non-empty default tool set
2151        let config = PawanConfig::default();
2152        let agent_a = PawanAgent::new(config.clone(), PathBuf::from("."));
2153        let agent_b = PawanAgent::new(config, PathBuf::from("."));
2154        let defs_a: Vec<String> = agent_a.get_tool_definitions().iter().map(|d| d.name.clone()).collect();
2155        let defs_b: Vec<String> = agent_b.get_tool_definitions().iter().map(|d| d.name.clone()).collect();
2156
2157        assert!(!defs_a.is_empty(), "default agent should have tools");
2158        assert_eq!(defs_a.len(), defs_b.len(), "two default agents must have same tool count");
2159        // Spot-check a few core tools we know exist
2160        let names: Vec<&str> = defs_a.iter().map(|s| s.as_str()).collect();
2161        assert!(names.contains(&"read_file"), "should have read_file in defaults");
2162        assert!(names.contains(&"bash"), "should have bash in defaults");
2163    }
2164
2165    // ─── Edge cases for truncate_tool_result ─────────────────────────────
2166
2167    #[test]
2168    fn test_truncate_empty_object_unchanged() {
2169        // Regression: empty object passes through early-return (serialized "{}" = 2 chars)
2170        let val = json!({});
2171        let result = truncate_tool_result(val.clone(), 10);
2172        assert_eq!(result, val);
2173    }
2174
2175    #[test]
2176    fn test_truncate_null_value_unchanged() {
2177        // Null values pass through the `other => other` arm
2178        let val = Value::Null;
2179        let result = truncate_tool_result(val.clone(), 10);
2180        assert_eq!(result, val);
2181    }
2182
2183    #[test]
2184    fn test_truncate_numeric_values_pass_through() {
2185        // Numbers and booleans can't be truncated — the fn must leave them intact
2186        let val = json!({"count": 42, "ratio": 2.5, "enabled": true});
2187        let result = truncate_tool_result(val.clone(), 8000);
2188        assert_eq!(result, val);
2189    }
2190
2191    #[test]
2192    fn test_truncate_large_string_is_utf8_safe() {
2193        // Regression: must use chars().take() not byte slicing so multi-byte
2194        // UTF-8 doesn't panic on char boundary (3000 crabs = ~12000 bytes)
2195        let emoji_heavy = "🦀".repeat(3000);
2196        let val = json!({"crabs": emoji_heavy});
2197        let result = truncate_tool_result(val, 1000);
2198        let out = result["crabs"].as_str().unwrap();
2199        assert!(out.contains("truncated"), "truncation marker must be present");
2200        assert!(out.starts_with('🦀'), "must preserve char boundary");
2201    }
2202
2203    #[test]
2204    fn test_truncate_nested_object_remains_valid_json() {
2205        // Recursive case: large string nested inside a sub-object still truncates,
2206        // and the output stays valid parseable JSON.
2207        let inner_big = "y".repeat(5000);
2208        let val = json!({
2209            "meta": "small",
2210            "nested": { "inner": inner_big }
2211        });
2212        let result = truncate_tool_result(val, 1500);
2213        assert_eq!(result["meta"], "small");
2214        let serialized = serde_json::to_string(&result).unwrap();
2215        let _reparsed: Value = serde_json::from_str(&serialized)
2216            .expect("truncated result must be valid JSON");
2217    }
2218
2219    #[test]
2220    fn test_truncate_short_bare_string_unchanged() {
2221        // A bare string under max_chars hits the early-return check
2222        let val = json!("short string");
2223        let result = truncate_tool_result(val.clone(), 1000);
2224        assert_eq!(result, val);
2225    }
2226
2227    #[test]
2228    fn test_session_id_is_unique_per_agent() {
2229        // Two fresh agents must get distinct session_ids so their eruka
2230        // writes don't collide under the same operations/turns/ key.
2231        let a1 = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2232        let a2 = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2233        assert_ne!(a1.session_id, a2.session_id);
2234        assert!(!a1.session_id.is_empty());
2235        // UUID v4 with dashes is 36 chars
2236        assert_eq!(a1.session_id.len(), 36);
2237    }
2238
2239    #[test]
2240    fn test_resume_session_adopts_loaded_id() {
2241        // resume_session must overwrite self.session_id with the loaded
2242        // session's id so subsequent eruka writes cluster under that id
2243        // rather than the ephemeral one from new().
2244        use std::io::Write;
2245        let tmp = tempfile::TempDir::new().unwrap();
2246        // Minimal valid session file
2247        let sess_dir = tmp.path().join(".pawan").join("sessions");
2248        std::fs::create_dir_all(&sess_dir).unwrap();
2249        let sess_id = "resume-test-xyz";
2250        let sess_path = sess_dir.join(format!("{}.json", sess_id));
2251        let sess_json = serde_json::json!({
2252            "id": sess_id,
2253            "model": "test-model",
2254            "created_at": "2026-04-11T00:00:00Z",
2255            "updated_at": "2026-04-11T00:00:00Z",
2256            "messages": [],
2257            "total_tokens": 0,
2258            "iteration_count": 0
2259        });
2260        let mut f = std::fs::File::create(&sess_path).unwrap();
2261        f.write_all(sess_json.to_string().as_bytes()).unwrap();
2262
2263        // Point HOME at the tmp dir so Session::sessions_dir resolves here
2264        let prev_home = std::env::var("HOME").ok();
2265        std::env::set_var("HOME", tmp.path());
2266
2267        let mut agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2268        let orig_id = agent.session_id.clone();
2269        agent.resume_session(sess_id).expect("resume should succeed");
2270        assert_eq!(agent.session_id, sess_id);
2271        assert_ne!(agent.session_id, orig_id);
2272
2273        // Restore HOME to avoid polluting other tests
2274        if let Some(h) = prev_home {
2275            std::env::set_var("HOME", h);
2276        } else {
2277            std::env::remove_var("HOME");
2278        }
2279    }
2280
2281    #[test]
2282    fn test_history_snapshot_for_eruka_bounded() {
2283        // 100 messages of 500 chars each = 50k raw content. Snapshot must
2284        // cap at ~4000 chars so eruka writes never balloon.
2285        let mut history = Vec::new();
2286        for i in 0..100 {
2287            history.push(Message {
2288                role: if i % 2 == 0 { Role::User } else { Role::Assistant },
2289                content: "x".repeat(500),
2290                tool_calls: vec![],
2291                tool_result: None,
2292            });
2293        }
2294        let snapshot = PawanAgent::history_snapshot_for_eruka(&history);
2295        // After the break at >4000, one more line (up to 203 chars) gets
2296        // appended, so total is bounded by ~4200.
2297        assert!(snapshot.len() <= 4400, "snapshot too long: {} chars", snapshot.len());
2298        assert!(snapshot.len() > 200, "snapshot too short: {} chars", snapshot.len());
2299    }
2300
2301    #[test]
2302    fn test_history_snapshot_for_eruka_includes_role_prefixes() {
2303        // Each message must be tagged with its role so the eruka consumer
2304        // can distinguish user questions from assistant answers.
2305        let history = vec![
2306            Message { role: Role::User, content: "hi".into(), tool_calls: vec![], tool_result: None },
2307            Message { role: Role::Assistant, content: "hello".into(), tool_calls: vec![], tool_result: None },
2308            Message { role: Role::Tool, content: "ok".into(), tool_calls: vec![], tool_result: None },
2309            Message { role: Role::System, content: "sys".into(), tool_calls: vec![], tool_result: None },
2310        ];
2311        let snapshot = PawanAgent::history_snapshot_for_eruka(&history);
2312        assert!(snapshot.contains("U: hi"));
2313        assert!(snapshot.contains("A: hello"));
2314        assert!(snapshot.contains("T: ok"));
2315        assert!(snapshot.contains("S: sys"));
2316    }
2317
2318    #[tokio::test]
2319    async fn test_archive_to_eruka_ok_when_disabled() {
2320        // When eruka is disabled (the default), archive_to_eruka must
2321        // return Ok without touching the network — this is the
2322        // fire-and-forget contract the CLI relies on.
2323        let agent = PawanAgent::new(PawanConfig::default(), PathBuf::from("."));
2324        assert!(agent.eruka.is_none(), "default config should disable eruka");
2325        let result = agent.archive_to_eruka().await;
2326        assert!(result.is_ok(), "archive_to_eruka should be non-fatal when disabled");
2327    }
2328
2329    // ─── probe_local_endpoint tests ──────────────────────────────────────
2330
2331    #[test]
2332    fn test_probe_local_endpoint_closed_port_returns_false() {
2333        // Port 1999 is almost never in use by Netdata (which uses 19999) 
2334        // or other common services.
2335        assert!(
2336            !probe_local_endpoint("http://localhost:1999/v1"),
2337            "closed port should return false"
2338        );
2339    }
2340
2341    #[test]
2342    fn test_probe_local_endpoint_open_port_returns_true() {
2343        // Bind a real listener on a free OS-assigned port, then probe it.
2344        use std::net::TcpListener;
2345        let listener = TcpListener::bind("127.0.0.1:0").expect("bind failed");
2346        let port = listener.local_addr().unwrap().port();
2347        let url = format!("http://localhost:{port}/v1");
2348        assert!(probe_local_endpoint(&url), "open port should return true");
2349    }
2350
2351    #[test]
2352    fn test_probe_local_endpoint_url_without_explicit_port() {
2353        // Port is absent — probe_local_endpoint must default to 80
2354        // which on CI is normally closed, so this just must not panic.
2355        let _ = probe_local_endpoint("http://localhost/v1");
2356    }
2357
2358    // ─── load_arch_context tests ──────────────────────────────────────────
2359
2360    #[test]
2361    fn test_load_arch_context_absent_returns_none() {
2362        let dir = tempfile::TempDir::new().unwrap();
2363        assert!(load_arch_context(dir.path()).is_none());
2364    }
2365
2366    #[test]
2367    fn test_load_arch_context_reads_file_content() {
2368        let dir = tempfile::TempDir::new().unwrap();
2369        let pawan_dir = dir.path().join(".pawan");
2370        std::fs::create_dir_all(&pawan_dir).unwrap();
2371        std::fs::write(pawan_dir.join("arch.md"), "## Architecture\nUse tokio.\n").unwrap();
2372        let result = load_arch_context(dir.path());
2373        assert!(result.is_some());
2374        assert!(result.unwrap().contains("Use tokio"));
2375    }
2376
2377    #[test]
2378    fn test_load_arch_context_empty_file_returns_none() {
2379        let dir = tempfile::TempDir::new().unwrap();
2380        let pawan_dir = dir.path().join(".pawan");
2381        std::fs::create_dir_all(&pawan_dir).unwrap();
2382        std::fs::write(pawan_dir.join("arch.md"), "   \n").unwrap();
2383        assert!(load_arch_context(dir.path()).is_none(), "whitespace-only file should be None");
2384    }
2385
2386    #[test]
2387    fn test_load_arch_context_truncates_at_2000_chars() {
2388        let dir = tempfile::TempDir::new().unwrap();
2389        let pawan_dir = dir.path().join(".pawan");
2390        std::fs::create_dir_all(&pawan_dir).unwrap();
2391        // Write a file that is exactly 2500 ASCII chars (safe char boundary)
2392        let content = "x".repeat(2_500);
2393        std::fs::write(pawan_dir.join("arch.md"), &content).unwrap();
2394        let result = load_arch_context(dir.path()).unwrap();
2395        assert!(
2396            result.len() < 2_100,
2397            "truncated result should be close to 2000 chars, got {}",
2398            result.len()
2399        );
2400        assert!(result.ends_with("(truncated)"), "truncated output must end with marker");
2401    }
2402
2403    #[tokio::test]
2404    async fn test_tool_idle_timeout_triggered() {
2405        use std::time::Duration;
2406        use tokio::time::sleep;
2407
2408        let mut config = PawanConfig::default();
2409        config.tool_call_idle_timeout_secs = 0; // Trigger on any non-zero elapsed seconds
2410
2411        // Custom backend that is slow on the second call.
2412        // With our fix (moving update before LLM call), this will trigger
2413        // at the start of the THIRD iteration if the second iteration takes time.
2414        struct SlowBackend {
2415            index: Arc<std::sync::atomic::AtomicUsize>,
2416        }
2417
2418        #[async_trait::async_trait]
2419        impl LlmBackend for SlowBackend {
2420            async fn generate(&self, _m: &[Message], _t: &[ToolDefinition], _o: Option<&TokenCallback>) -> Result<LLMResponse> {
2421                let idx = self.index.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
2422                if idx == 0 {
2423                    // First call: return a tool call to ensure we loop again
2424                    Ok(LLMResponse {
2425                        content: String::new(),
2426                        reasoning: None,
2427                        tool_calls: vec![ToolCallRequest {
2428                            id: "1".to_string(),
2429                            name: "read_file".to_string(),
2430                            arguments: json!({"path": "foo"}),
2431                        }],
2432                        finish_reason: "tool_calls".to_string(),
2433                        usage: None,
2434                    })
2435                } else if idx == 1 {
2436                    // Second call: delay then return ANOTHER tool call
2437                    // The delay happens AFTER last_tool_call_time is updated for Iteration 2.
2438                    // So Iteration 3's check will see this 1.1s delay.
2439                    sleep(Duration::from_millis(1100)).await;
2440                    Ok(LLMResponse {
2441                        content: String::new(),
2442                        reasoning: None,
2443                        tool_calls: vec![ToolCallRequest {
2444                            id: "2".to_string(),
2445                            name: "read_file".to_string(),
2446                            arguments: json!({"path": "bar"}),
2447                        }],
2448                        finish_reason: "tool_calls".to_string(),
2449                        usage: None,
2450                    })
2451                } else {
2452                    Ok(LLMResponse {
2453                        content: "Done".to_string(),
2454                        reasoning: None,
2455                        tool_calls: vec![],
2456                        finish_reason: "stop".to_string(),
2457                        usage: None,
2458                    })
2459                }
2460            }
2461        }
2462
2463        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2464        agent.backend = Box::new(SlowBackend { index: Arc::new(std::sync::atomic::AtomicUsize::new(0)) });
2465
2466        let result = agent.execute_with_all_callbacks("test", None, None, None, None).await;
2467        
2468        match result {
2469            Err(PawanError::Agent(msg)) => {
2470                assert!(msg.contains("Tool idle timeout exceeded"), "Error message should contain timeout: {}", msg);
2471            }
2472            Ok(_) => panic!("Expected timeout error, but it succeeded. This means the timeout check didn't catch the delay."),
2473            Err(e) => panic!("Unexpected error: {:?}", e),
2474        }
2475    }
2476
2477    #[tokio::test]
2478    async fn test_tool_idle_timeout_not_triggered() {
2479        let mut config = PawanConfig::default();
2480        config.tool_call_idle_timeout_secs = 10;
2481
2482        let backend = MockBackend::new(vec![
2483            MockResponse::text("Done"),
2484        ]);
2485
2486        let mut agent = PawanAgent::new(config, PathBuf::from("."));
2487        agent.backend = Box::new(backend);
2488
2489        let result = agent.execute_with_all_callbacks("test", None, None, None, None).await;
2490        assert!(result.is_ok());
2491    }
2492}
2493/// Summarize tool arguments for permission requests
2494fn summarize_args(args: &serde_json::Value) -> String {
2495    match args {
2496        serde_json::Value::Object(map) => {
2497            let mut parts = Vec::new();
2498            for (key, value) in map {
2499                let value_str = match value {
2500                    serde_json::Value::String(s) if s.len() > 50 => {
2501                        format!("\"{}...\"", &s[..47])
2502                    }
2503                    serde_json::Value::String(s) => format!("\"{}\"", s),
2504                    serde_json::Value::Array(arr) if arr.len() > 3 => {
2505                        format!("[... {} items]", arr.len())
2506                    }
2507                    serde_json::Value::Array(arr) => {
2508                        let items: Vec<String> = arr.iter().take(3).map(|v| {
2509                            match v {
2510                                serde_json::Value::String(s) => {
2511                                    if s.len() > 20 {
2512                                        format!("\"{}...\"", &s[..17])
2513                                    } else {
2514                                        format!("\"{}\"", s)
2515                                    }
2516                                }
2517                                _ => v.to_string(),
2518                            }
2519                        }).collect();
2520                        format!("[{}]", items.join(", "))
2521                    }
2522                    _ => value.to_string(),
2523                };
2524                parts.push(format!("{}: {}", key, value_str));
2525            }
2526            parts.join(", ")
2527        }
2528        serde_json::Value::String(s) => {
2529            if s.len() > 100 {
2530                format!("\"{}...\"", &s[..97])
2531            } else {
2532                format!("\"{}\"", s)
2533            }
2534        }
2535        serde_json::Value::Array(arr) => {
2536            format!("[{} items]", arr.len())
2537        }
2538        _ => args.to_string(),
2539    }
2540}
2541
2542// --------------------------------------------------------------------------- Tests for coordinator integration
2543// ----------------------------------------------------------------------------
2544
2545#[cfg(test)]
2546mod coordinator_tests {
2547    use super::*;
2548    use crate::agent::backend::mock::{MockBackend, MockResponse};
2549    use crate::coordinator::{FinishReason, ToolCallingConfig};
2550    use std::sync::Arc;
2551
2552    /// Test that config default has use_coordinator = false
2553    #[test]
2554    fn test_config_default_use_coordinator_false() {
2555        let config = PawanConfig::default();
2556        assert!(!config.use_coordinator);
2557    }
2558
2559    /// Test that config can set use_coordinator = true
2560    #[test]
2561    fn test_config_use_coordinator_true() {
2562        let config = PawanConfig {
2563            use_coordinator: true,
2564            ..Default::default()
2565        };
2566        assert!(config.use_coordinator);
2567    }
2568
2569    /// Test coordinator execution dispatches correctly when flag is set
2570    #[tokio::test]
2571    async fn test_execute_with_coordinator_flag_enabled() {
2572        let config = PawanConfig {
2573            use_coordinator: true,
2574            model: "test-model".to_string(),
2575            ..Default::default()
2576        };
2577        let agent = PawanAgent::new(config, PathBuf::from("."));
2578        // Verify the flag is set
2579        assert!(agent.config().use_coordinator);
2580    }
2581
2582    /// Test that execute_with_coordinator produces valid response
2583    #[tokio::test]
2584    async fn test_execute_with_coordinator_produces_response() {
2585        let config = PawanConfig {
2586            use_coordinator: true,
2587            max_tool_iterations: 1,
2588            model: "test-model".to_string(),
2589            ..Default::default()
2590        };
2591        let agent = PawanAgent::new(config, PathBuf::from("."));
2592let backend = MockBackend::with_text("Hello from coordinator!");
2593        let mut agent = agent.with_backend(Box::new(backend));
2594
2595        // This will fail because the coordinator creates its own backend
2596        // but we can at least verify the flag works
2597        assert!(agent.config().use_coordinator);
2598    }
2599
2600    /// Test ToolCallingConfig default values
2601    #[test]
2602    fn test_tool_calling_config_defaults() {
2603        let cfg = ToolCallingConfig::default();
2604        assert_eq!(cfg.max_iterations, 10);
2605        assert!(cfg.parallel_execution);
2606        assert_eq!(cfg.tool_timeout.as_secs(), 30);
2607        assert!(!cfg.stop_on_error);
2608    }
2609
2610    /// Test custom ToolCallingConfig
2611    #[test]
2612    fn test_tool_calling_config_custom() {
2613        let cfg = ToolCallingConfig {
2614            max_iterations: 5,
2615            parallel_execution: false,
2616            tool_timeout: std::time::Duration::from_secs(60),
2617            stop_on_error: true,
2618        };
2619        assert_eq!(cfg.max_iterations, 5);
2620        assert!(!cfg.parallel_execution);
2621        assert_eq!(cfg.tool_timeout.as_secs(), 60);
2622        assert!(cfg.stop_on_error);
2623    }
2624
2625    /// Test that coordinator dispatch check works correctly
2626    #[tokio::test]
2627    async fn test_coordinator_dispatch_when_flag_is_false() {
2628        let config = PawanConfig::default();
2629        assert!(!config.use_coordinator);
2630        // When flag is false, execute_with_all_callbacks should use built-in loop
2631    }
2632
2633    /// Test error handling when coordinator encounters unknown tool
2634    #[tokio::test]
2635    async fn test_coordinator_error_handling_unknown_tool() {
2636        use crate::coordinator::ToolCoordinator;
2637
2638        let mock_backend = Arc::new(MockBackend::with_tool_call(
2639            "call_1",
2640            "nonexistent_tool",
2641            json!({}),
2642            "Trying to call unknown tool",
2643        ));
2644        let registry = Arc::new(ToolRegistry::new());
2645        let config = ToolCallingConfig::default();
2646        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
2647
2648        let result = coordinator.execute(None, "Use a tool").await.unwrap();
2649        assert!(matches!(result.finish_reason, FinishReason::UnknownTool(_)));
2650    }
2651
2652    /// Test max iterations limit in coordinator
2653    #[tokio::test]
2654    async fn test_coordinator_max_iterations_limit() {
2655        use crate::coordinator::ToolCoordinator;
2656        use crate::tools::Tool;
2657        use async_trait::async_trait;
2658        use serde_json::json;
2659        use std::sync::Arc;
2660
2661        // Dummy tool that always succeeds
2662        struct DummyTool;
2663        #[async_trait]
2664        impl Tool for DummyTool {
2665            fn name(&self) -> &str { "test_tool" }
2666            fn description(&self) -> &str { "Dummy tool for testing" }
2667            fn parameters_schema(&self) -> serde_json::Value { json!({}) }
2668            async fn execute(&self, _args: serde_json::Value) -> crate::Result<serde_json::Value> {
2669                Ok(json!({ "status": "ok" }))
2670            }
2671        }
2672
2673        let mock_backend = Arc::new(MockBackend::with_repeated_tool_call("test_tool"));
2674        let mut registry = ToolRegistry::new();
2675        registry.register(Arc::new(DummyTool));
2676        let registry = Arc::new(registry);
2677        let config = ToolCallingConfig {
2678            max_iterations: 3,
2679            ..Default::default()
2680        };
2681        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
2682
2683        let result = coordinator.execute(None, "Use tools").await.unwrap();
2684        assert_eq!(result.iterations, 3);
2685        assert!(matches!(result.finish_reason, FinishReason::MaxIterations));
2686    }
2687
2688    /// Test timeout handling in coordinator
2689    #[tokio::test]
2690    async fn test_coordinator_timeout_handling() {
2691        use crate::coordinator::ToolCoordinator;
2692
2693        // Create a mock that returns a tool call
2694        let mock_backend = Arc::new(MockBackend::with_tool_call(
2695            "call_1",
2696            "bash",
2697            json!({"command": "sleep 10"}),
2698            "Run slow command",
2699        ));
2700        let registry = Arc::new(ToolRegistry::with_defaults(PathBuf::from(".")));
2701        // Very short timeout
2702        let config = ToolCallingConfig {
2703            tool_timeout: std::time::Duration::from_millis(1),
2704            ..Default::default()
2705        };
2706        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
2707
2708        // This will timeout - coordinator should handle it gracefully
2709        let result = coordinator.execute(None, "Run a command").await.unwrap();
2710        // The tool should have failed with timeout error
2711        assert!(!result.tool_calls.is_empty());
2712        let first_call = &result.tool_calls[0];
2713        assert!(!first_call.success);
2714        assert!(first_call.result.get("error").is_some());
2715    }
2716
2717    /// Test that coordinator accumulates token usage
2718    #[tokio::test]
2719    async fn test_coordinator_token_usage_accumulation() {
2720        use crate::coordinator::ToolCoordinator;
2721
2722        let mock_backend = Arc::new(MockBackend::with_text_and_usage(
2723            "Response",
2724            100,
2725            50,
2726        ));
2727        let registry = Arc::new(ToolRegistry::new());
2728        let config = ToolCallingConfig::default();
2729        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
2730
2731        let result = coordinator.execute(None, "Hello").await.unwrap();
2732        assert_eq!(result.total_usage.prompt_tokens, 100);
2733        assert_eq!(result.total_usage.completion_tokens, 50);
2734        assert_eq!(result.total_usage.total_tokens, 150);
2735    }
2736
2737    /// Test parallel execution in coordinator
2738    #[tokio::test]
2739    async fn test_coordinator_parallel_execution() {
2740        use crate::coordinator::ToolCoordinator;
2741
2742        // Mock that returns multiple tool calls
2743        let mock_backend = Arc::new(MockBackend::with_multiple_tool_calls(vec![
2744            ("call_1", "bash", json!({"command": "echo 1"})),
2745            ("call_2", "bash", json!({"command": "echo 2"})),
2746            ("call_3", "read_file", json!({"path": "test.txt"})),
2747        ]));
2748        let registry = Arc::new(ToolRegistry::with_defaults(PathBuf::from(".")));
2749        let config = ToolCallingConfig {
2750            parallel_execution: true,
2751            ..Default::default()
2752        };
2753        let coordinator = ToolCoordinator::new(mock_backend, registry, config);
2754
2755        let result = coordinator.execute(None, "Run multiple commands").await.unwrap();
2756        // Should have executed multiple tool calls
2757        assert!(result.tool_calls.len() >= 3);
2758    }
2759}