Skip to main content

pawan/agent/
mod.rs

1//! Pawan Agent - The core agent that handles tool-calling loops
2//!
3//! This module provides the main `PawanAgent` which:
4//! - Manages conversation history
5//! - Coordinates tool calling with the LLM via pluggable backends
6//! - Provides streaming responses
7//! - Supports multiple LLM backends (NVIDIA API, Ollama, OpenAI)
8
9pub mod backend;
10mod preflight;
11pub mod session;
12pub mod git_session;
13
14use crate::config::{LlmProvider, PawanConfig};
15use crate::tools::{ToolDefinition, ToolRegistry};
16use crate::{PawanError, Result};
17use backend::openai_compat::{OpenAiCompatBackend, OpenAiCompatConfig};
18use backend::LlmBackend;
19use serde::{Deserialize, Serialize};
20use serde_json::{json, Value};
21use std::path::PathBuf;
22
23/// A message in the conversation
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct Message {
26    /// Role of the message sender
27    pub role: Role,
28    /// Content of the message
29    pub content: String,
30    /// Tool calls (if any)
31    #[serde(default)]
32    pub tool_calls: Vec<ToolCallRequest>,
33    /// Tool results (if this is a tool result message)
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub tool_result: Option<ToolResultMessage>,
36}
37
38/// Role of a message sender
39#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
40#[serde(rename_all = "lowercase")]
41pub enum Role {
42    System,
43    User,
44    Assistant,
45    Tool,
46}
47
48/// A request to call a tool
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct ToolCallRequest {
51    /// Unique ID for this tool call
52    pub id: String,
53    /// Name of the tool to call
54    pub name: String,
55    /// Arguments for the tool
56    pub arguments: Value,
57}
58
59/// Result from a tool execution
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ToolResultMessage {
62    /// ID of the tool call this result is for
63    pub tool_call_id: String,
64    /// The result content
65    pub content: Value,
66    /// Whether the tool executed successfully
67    pub success: bool,
68}
69
70/// Record of a tool call execution
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct ToolCallRecord {
73    /// Unique ID for this tool call
74    pub id: String,
75    /// Name of the tool
76    pub name: String,
77    /// Arguments passed to the tool
78    pub arguments: Value,
79    /// Result from the tool
80    pub result: Value,
81    /// Whether execution was successful
82    pub success: bool,
83    /// Duration in milliseconds
84    pub duration_ms: u64,
85}
86
87/// Token usage from an LLM response
88#[derive(Debug, Clone, Default, Serialize, Deserialize)]
89pub struct TokenUsage {
90    pub prompt_tokens: u64,
91    pub completion_tokens: u64,
92    pub total_tokens: u64,
93    /// Tokens spent on reasoning/thinking (subset of completion_tokens)
94    pub reasoning_tokens: u64,
95    /// Tokens spent on actual content/tool output (completion - reasoning)
96    pub action_tokens: u64,
97}
98
99/// LLM response from a generation request
100#[derive(Debug, Clone)]
101pub struct LLMResponse {
102    /// Text content of the response
103    pub content: String,
104    /// Reasoning/thinking content (separate from visible content)
105    pub reasoning: Option<String>,
106    /// Tool calls requested by the model
107    pub tool_calls: Vec<ToolCallRequest>,
108    /// Reason the response finished
109    pub finish_reason: String,
110    /// Token usage (if available)
111    pub usage: Option<TokenUsage>,
112}
113
114/// Result from a complete agent execution
115#[derive(Debug)]
116pub struct AgentResponse {
117    /// Final text response
118    pub content: String,
119    /// All tool calls made during execution
120    pub tool_calls: Vec<ToolCallRecord>,
121    /// Number of iterations taken
122    pub iterations: usize,
123    /// Cumulative token usage across all iterations
124    pub usage: TokenUsage,
125}
126
127/// Callback for receiving streaming tokens
128pub type TokenCallback = Box<dyn Fn(&str) + Send + Sync>;
129
130/// Callback for receiving tool call updates
131pub type ToolCallback = Box<dyn Fn(&ToolCallRecord) + Send + Sync>;
132
133/// Callback for tool call start notifications
134pub type ToolStartCallback = Box<dyn Fn(&str) + Send + Sync>;
135
136/// The main Pawan agent
137/// The main Pawan agent
138///
139/// This struct represents the core Pawan agent that handles:
140/// - Conversation history management
141/// - Tool calling with the LLM via pluggable backends
142/// - Streaming responses
143/// - Multiple LLM backends (NVIDIA API, Ollama, OpenAI)
144/// - Context management and token counting
145/// - Integration with Eruka for 3-tier memory injection
146pub struct PawanAgent {
147    /// Configuration
148    config: PawanConfig,
149    /// Tool registry
150    tools: ToolRegistry,
151    /// Conversation history
152    history: Vec<Message>,
153    /// Workspace root
154    workspace_root: PathBuf,
155    /// LLM backend
156    backend: Box<dyn LlmBackend>,
157
158    /// Estimated token count for current context
159    context_tokens_estimate: usize,
160
161    /// Eruka bridge for 3-tier memory injection
162    eruka: Option<crate::eruka_bridge::ErukaClient>,
163}
164
165impl PawanAgent {
166    /// Create a new PawanAgent with auto-selected backend
167    pub fn new(config: PawanConfig, workspace_root: PathBuf) -> Self {
168        let tools = ToolRegistry::with_defaults(workspace_root.clone());
169        let system_prompt = config.get_system_prompt();
170        let backend = Self::create_backend(&config, &system_prompt);
171        let eruka = if config.eruka.enabled {
172            Some(crate::eruka_bridge::ErukaClient::new(config.eruka.clone()))
173        } else {
174            None
175        };
176
177        Self {
178            config,
179            tools,
180            history: Vec::new(),
181            workspace_root,
182            backend,
183            context_tokens_estimate: 0,
184            eruka,
185        }
186    }
187
188    /// Create the appropriate backend based on config
189    fn create_backend(config: &PawanConfig, system_prompt: &str) -> Box<dyn LlmBackend> {
190        match config.provider {
191            LlmProvider::Nvidia | LlmProvider::OpenAI | LlmProvider::Mlx => {
192                let (api_url, api_key) = match config.provider {
193                    LlmProvider::Nvidia => {
194                        let url = std::env::var("NVIDIA_API_URL")
195                            .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
196                        let key = std::env::var("NVIDIA_API_KEY").ok();
197                        if key.is_none() {
198                            tracing::warn!("NVIDIA_API_KEY not set. Add it to .env or export it.");
199                        }
200                        (url, key)
201                    },
202                    LlmProvider::OpenAI => {
203                        let url = config.base_url.clone()
204                            .or_else(|| std::env::var("OPENAI_API_URL").ok())
205                            .unwrap_or_else(|| "https://api.openai.com/v1".to_string());
206                        let key = std::env::var("OPENAI_API_KEY").ok();
207                        (url, key)
208                    },
209                    LlmProvider::Mlx => {
210                        // MLX LM server — Apple Silicon native, always local
211                        let url = config.base_url.clone()
212                            .unwrap_or_else(|| "http://localhost:8080/v1".to_string());
213                        tracing::info!(url = %url, "Using MLX LM server (Apple Silicon native)");
214                        (url, None) // mlx_lm.server requires no API key
215                    },
216                    _ => unreachable!(),
217                };
218                
219                // Build cloud fallback if configured
220                let cloud = config.cloud.as_ref().map(|c| {
221                    let (cloud_url, cloud_key) = match c.provider {
222                        LlmProvider::Nvidia => {
223                            let url = std::env::var("NVIDIA_API_URL")
224                                .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
225                            let key = std::env::var("NVIDIA_API_KEY").ok();
226                            (url, key)
227                        },
228                        LlmProvider::OpenAI => {
229                            let url = std::env::var("OPENAI_API_URL")
230                                .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
231                            let key = std::env::var("OPENAI_API_KEY").ok();
232                            (url, key)
233                        },
234                        LlmProvider::Mlx => {
235                            ("http://localhost:8080/v1".to_string(), None)
236                        },
237                        _ => {
238                            tracing::warn!("Cloud fallback only supports nvidia/openai/mlx providers");
239                            ("https://integrate.api.nvidia.com/v1".to_string(), None)
240                        }
241                    };
242                    backend::openai_compat::CloudFallback {
243                        api_url: cloud_url,
244                        api_key: cloud_key,
245                        model: c.model.clone(),
246                        fallback_models: c.fallback_models.clone(),
247                    }
248                });
249
250                Box::new(OpenAiCompatBackend::new(OpenAiCompatConfig {
251                    api_url,
252                    api_key,
253                    model: config.model.clone(),
254                    temperature: config.temperature,
255                    top_p: config.top_p,
256                    max_tokens: config.max_tokens,
257                    system_prompt: system_prompt.to_string(),
258                    // Enforce thinking budget: if set, disable thinking entirely
259                    // and give all tokens to action output
260                    use_thinking: config.thinking_budget == 0 && config.use_thinking_mode(),
261                    max_retries: config.max_retries,
262                    fallback_models: config.fallback_models.clone(),
263                    cloud,
264                }))
265            }
266            LlmProvider::Ollama => {
267                let url = std::env::var("OLLAMA_URL")
268                    .unwrap_or_else(|_| "http://localhost:11434".to_string());
269
270                Box::new(backend::ollama::OllamaBackend::new(
271                    url,
272                    config.model.clone(),
273                    config.temperature,
274                    system_prompt.to_string(),
275                ))
276            }
277        }
278    }
279
280    /// Create with a specific tool registry
281    pub fn with_tools(mut self, tools: ToolRegistry) -> Self {
282        self.tools = tools;
283        self
284    }
285
286    /// Get mutable access to the tool registry (for registering MCP tools)
287    pub fn tools_mut(&mut self) -> &mut ToolRegistry {
288        &mut self.tools
289    }
290
291    /// Create with a custom backend
292    pub fn with_backend(mut self, backend: Box<dyn LlmBackend>) -> Self {
293        self.backend = backend;
294        self
295    }
296
297    /// Get the current conversation history
298    pub fn history(&self) -> &[Message] {
299        &self.history
300    }
301
302    /// Save current conversation as a session, returns session ID
303    pub fn save_session(&self) -> Result<String> {
304        let mut session = session::Session::new(&self.config.model);
305        session.messages = self.history.clone();
306        session.total_tokens = self.context_tokens_estimate as u64;
307        session.save()?;
308        Ok(session.id)
309    }
310
311    /// Resume a saved session by ID
312    pub fn resume_session(&mut self, session_id: &str) -> Result<()> {
313        let session = session::Session::load(session_id)?;
314        self.history = session.messages;
315        self.context_tokens_estimate = session.total_tokens as usize;
316        Ok(())
317    }
318
319    /// Get the configuration
320    pub fn config(&self) -> &PawanConfig {
321        &self.config
322    }
323
324    /// Clear the conversation history
325    pub fn clear_history(&mut self) {
326        self.history.clear();
327    }
328    /// Prune conversation history to reduce context size.
329    /// Keeps the first message (system prompt) and last 4 messages,
330    /// replaces everything in between with a summary message.
331    fn prune_history(&mut self) {
332        let len = self.history.len();
333        if len <= 5 {
334            return; // Nothing to prune
335        }
336
337        let keep_end = 4;
338        let start = 1; // Skip system prompt at index 0
339        let end = len - keep_end;
340        let pruned_count = end - start;
341
342        // Build summary from middle messages
343        let mut summary = String::new();
344        for msg in &self.history[start..end] {
345            let chunk = if msg.content.len() > 200 {
346                &msg.content[..200]
347            } else {
348                &msg.content
349            };
350            summary.push_str(chunk);
351            summary.push('\n');
352            if summary.len() > 2000 {
353                summary.truncate(2000);
354                break;
355            }
356        }
357
358        let summary_msg = Message {
359            role: Role::System,
360            content: format!("Previous conversation summary (pruned): {}", summary),
361            tool_calls: vec![],
362            tool_result: None,
363        };
364
365        // Keep first message, insert summary, then last 4
366        let first = self.history[0].clone();
367        let tail: Vec<Message> = self.history[len - keep_end..].to_vec();
368
369        self.history.clear();
370        self.history.push(first);
371        self.history.push(summary_msg);
372        self.history.extend(tail);
373
374        tracing::info!(pruned = pruned_count, context_estimate = self.context_tokens_estimate, "Pruned messages from history");
375    }
376
377    /// Add a message to history
378    pub fn add_message(&mut self, message: Message) {
379        self.history.push(message);
380    }
381
382    /// Get tool definitions for the LLM
383    pub fn get_tool_definitions(&self) -> Vec<ToolDefinition> {
384        self.tools.get_definitions()
385    }
386
387    /// Execute a single prompt with tool calling support
388    pub async fn execute(&mut self, user_prompt: &str) -> Result<AgentResponse> {
389        self.execute_with_callbacks(user_prompt, None, None, None)
390            .await
391    }
392
393    /// Execute with optional callbacks for streaming
394    pub async fn execute_with_callbacks(
395        &mut self,
396        user_prompt: &str,
397        on_token: Option<TokenCallback>,
398        on_tool: Option<ToolCallback>,
399        on_tool_start: Option<ToolStartCallback>,
400    ) -> Result<AgentResponse> {
401        // Inject Eruka core memory before first LLM call
402        if let Some(eruka) = &self.eruka {
403            if let Err(e) = eruka.inject_core_memory(&mut self.history).await {
404                tracing::warn!("Eruka memory injection failed (non-fatal): {}", e);
405            }
406        }
407
408        self.history.push(Message {
409            role: Role::User,
410            content: user_prompt.to_string(),
411            tool_calls: vec![],
412            tool_result: None,
413        });
414
415        let mut all_tool_calls = Vec::new();
416        let mut total_usage = TokenUsage::default();
417        let mut iterations = 0;
418        let max_iterations = self.config.max_tool_iterations;
419
420        loop {
421            iterations += 1;
422            if iterations > max_iterations {
423                return Err(PawanError::Agent(format!(
424                    "Max tool iterations ({}) exceeded",
425                    max_iterations
426                )));
427            }
428
429            // Budget awareness: when running low on iterations, nudge the model
430            let remaining = max_iterations.saturating_sub(iterations);
431            if remaining == 3 && iterations > 1 {
432                self.history.push(Message {
433                    role: Role::User,
434                    content: format!(
435                        "[SYSTEM] You have {} tool iterations remaining. \
436                         Stop exploring and write the most important output now. \
437                         If you have code to write, write it immediately.",
438                        remaining
439                    ),
440                    tool_calls: vec![],
441                    tool_result: None,
442                });
443            }
444            // Estimate context tokens
445            self.context_tokens_estimate = self.history.iter().map(|m| m.content.len()).sum::<usize>() / 4;
446            if self.context_tokens_estimate > self.config.max_context_tokens {
447                self.prune_history();
448            }
449
450            // Dynamic tool selection: pick the most relevant tools for this query
451            // Extract latest user message for keyword matching
452            let latest_query = self.history.iter().rev()
453                .find(|m| m.role == Role::User)
454                .map(|m| m.content.as_str())
455                .unwrap_or("");
456            let tool_defs = self.tools.select_for_query(latest_query, 12);
457            if iterations == 1 {
458                let tool_names: Vec<&str> = tool_defs.iter().map(|t| t.name.as_str()).collect();
459                tracing::info!(tools = ?tool_names, count = tool_defs.len(), "Selected tools for query");
460            }
461
462            // --- Resilient LLM call: retry on transient failures instead of crashing ---
463            let response = {
464                #[allow(unused_assignments)]
465                let mut last_err = None;
466                let max_llm_retries = 3;
467                let mut attempt = 0;
468                loop {
469                    attempt += 1;
470                    match self.backend.generate(&self.history, &tool_defs, on_token.as_ref()).await {
471                        Ok(resp) => break resp,
472                        Err(e) => {
473                            let err_str = e.to_string();
474                            let is_transient = err_str.contains("timeout")
475                                || err_str.contains("connection")
476                                || err_str.contains("429")
477                                || err_str.contains("500")
478                                || err_str.contains("502")
479                                || err_str.contains("503")
480                                || err_str.contains("504")
481                                || err_str.contains("reset")
482                                || err_str.contains("broken pipe");
483
484                            if is_transient && attempt <= max_llm_retries {
485                                let delay = std::time::Duration::from_secs(2u64.pow(attempt as u32));
486                                tracing::warn!(
487                                    attempt = attempt,
488                                    delay_secs = delay.as_secs(),
489                                    error = err_str.as_str(),
490                                    "LLM call failed (transient) — retrying"
491                                );
492                                tokio::time::sleep(delay).await;
493
494                                // If context is too large, prune before retry
495                                if err_str.contains("context") || err_str.contains("token") {
496                                    tracing::info!("Pruning history before retry (possible context overflow)");
497                                    self.prune_history();
498                                }
499                                continue;
500                            }
501
502                            // Non-transient or max retries exhausted
503                            last_err = Some(e);
504                            break {
505                                // Return a synthetic "give up" response instead of crashing
506                                tracing::error!(
507                                    attempt = attempt,
508                                    error = last_err.as_ref().map(|e| e.to_string()).unwrap_or_default().as_str(),
509                                    "LLM call failed permanently — returning error as content"
510                                );
511                                LLMResponse {
512                                    content: format!(
513                                        "LLM error after {} attempts: {}. The task could not be completed.",
514                                        attempt,
515                                        last_err.as_ref().map(|e| e.to_string()).unwrap_or_default()
516                                    ),
517                                    reasoning: None,
518                                    tool_calls: vec![],
519                                    finish_reason: "error".to_string(),
520                                    usage: None,
521                                }
522                            };
523                        }
524                    }
525                }
526            };
527
528            // Accumulate token usage with thinking/action split
529            if let Some(ref usage) = response.usage {
530                total_usage.prompt_tokens += usage.prompt_tokens;
531                total_usage.completion_tokens += usage.completion_tokens;
532                total_usage.total_tokens += usage.total_tokens;
533                total_usage.reasoning_tokens += usage.reasoning_tokens;
534                total_usage.action_tokens += usage.action_tokens;
535
536                // Log token budget split per iteration
537                if usage.reasoning_tokens > 0 {
538                    tracing::info!(
539                        iteration = iterations,
540                        think = usage.reasoning_tokens,
541                        act = usage.action_tokens,
542                        total = usage.completion_tokens,
543                        "Token budget: think:{} act:{} (total:{})",
544                        usage.reasoning_tokens, usage.action_tokens, usage.completion_tokens
545                    );
546                }
547
548                // Thinking budget enforcement
549                let thinking_budget = self.config.thinking_budget;
550                if thinking_budget > 0 && usage.reasoning_tokens > thinking_budget as u64 {
551                    tracing::warn!(
552                        budget = thinking_budget,
553                        actual = usage.reasoning_tokens,
554                        "Thinking budget exceeded ({}/{} tokens)",
555                        usage.reasoning_tokens, thinking_budget
556                    );
557                }
558            }
559
560            // --- Guardrail: strip thinking blocks from content ---
561            let clean_content = {
562                let mut s = response.content.clone();
563                loop {
564                    let lower = s.to_lowercase();
565                    let open = lower.find("<think>");
566                    let close = lower.find("</think>");
567                    match (open, close) {
568                        (Some(i), Some(j)) if j > i => {
569                            let before = s[..i].trim_end().to_string();
570                            let after = if s.len() > j + 8 { s[j + 8..].trim_start().to_string() } else { String::new() };
571                            s = if before.is_empty() { after } else if after.is_empty() { before } else { format!("{}\n{}", before, after) };
572                        }
573                        _ => break,
574                    }
575                }
576                s
577            };
578
579            if response.tool_calls.is_empty() {
580                // --- Guardrail: detect chatty no-op (content but no tools on early iterations) ---
581                // Only nudge if tools are available AND response looks like planning text (not a real answer)
582                let has_tools = !tool_defs.is_empty();
583                let lower = clean_content.to_lowercase();
584                let planning_prefix = lower.starts_with("let me")
585                    || lower.starts_with("i'll help")
586                    || lower.starts_with("i will help")
587                    || lower.starts_with("sure, i")
588                    || lower.starts_with("okay, i");
589                let looks_like_planning = clean_content.len() > 200 || (planning_prefix && clean_content.len() > 50);
590                if has_tools && looks_like_planning && iterations == 1 && iterations < max_iterations && response.finish_reason != "error" {
591                    tracing::warn!(
592                        "No tool calls at iteration {} (content: {}B) — nudging model to use tools",
593                        iterations, clean_content.len()
594                    );
595                    self.history.push(Message {
596                        role: Role::Assistant,
597                        content: clean_content.clone(),
598                        tool_calls: vec![],
599                        tool_result: None,
600                    });
601                    self.history.push(Message {
602                        role: Role::User,
603                        content: "You must use tools to complete this task. Do NOT just describe what you would do — actually call the tools. Start with bash or read_file.".to_string(),
604                        tool_calls: vec![],
605                        tool_result: None,
606                    });
607                    continue;
608                }
609
610                // --- Guardrail: detect repeated responses ---
611                if iterations > 1 {
612                    let prev_assistant = self.history.iter().rev()
613                        .find(|m| m.role == Role::Assistant && !m.content.is_empty());
614                    if let Some(prev) = prev_assistant {
615                        if prev.content.trim() == clean_content.trim() && iterations < max_iterations {
616                            tracing::warn!("Repeated response detected at iteration {} — injecting correction", iterations);
617                            self.history.push(Message {
618                                role: Role::Assistant,
619                                content: clean_content.clone(),
620                                tool_calls: vec![],
621                                tool_result: None,
622                            });
623                            self.history.push(Message {
624                                role: Role::User,
625                                content: "You gave the same response as before. Try a different approach. Use anchor_text in edit_file_lines, or use insert_after, or use bash with sed.".to_string(),
626                                tool_calls: vec![],
627                                tool_result: None,
628                            });
629                            continue;
630                        }
631                    }
632                }
633
634                self.history.push(Message {
635                    role: Role::Assistant,
636                    content: clean_content.clone(),
637                    tool_calls: vec![],
638                    tool_result: None,
639                });
640
641                return Ok(AgentResponse {
642                    content: clean_content,
643                    tool_calls: all_tool_calls,
644                    iterations,
645                    usage: total_usage,
646                });
647            }
648
649            self.history.push(Message {
650                role: Role::Assistant,
651                content: response.content.clone(),
652                tool_calls: response.tool_calls.clone(),
653                tool_result: None,
654            });
655
656            for tool_call in &response.tool_calls {
657                // Auto-activate extended tools on first use (makes them visible in next iteration)
658                self.tools.activate(&tool_call.name);
659
660                // Check permission
661                if let Some(crate::config::ToolPermission::Deny) =
662                    self.config.permissions.get(&tool_call.name)
663                {
664                    let record = ToolCallRecord {
665                        id: tool_call.id.clone(),
666                        name: tool_call.name.clone(),
667                        arguments: tool_call.arguments.clone(),
668                        result: json!({"error": "Tool denied by permission policy"}),
669                        success: false,
670                        duration_ms: 0,
671                    };
672
673                    if let Some(ref callback) = on_tool {
674                        callback(&record);
675                    }
676                    all_tool_calls.push(record);
677
678                    self.history.push(Message {
679                        role: Role::Tool,
680                        content: "{\"error\": \"Tool denied by permission policy\"}".to_string(),
681                        tool_calls: vec![],
682                        tool_result: Some(ToolResultMessage {
683                            tool_call_id: tool_call.id.clone(),
684                            content: json!({"error": "Tool denied by permission policy"}),
685                            success: false,
686                        }),
687                    });
688                    continue;
689                }
690
691                // Notify tool start
692                if let Some(ref callback) = on_tool_start {
693                    callback(&tool_call.name);
694                }
695
696                // Debug: log tool call args for diagnosis
697                tracing::debug!(
698                    tool = tool_call.name.as_str(),
699                    args_len = serde_json::to_string(&tool_call.arguments).unwrap_or_default().len(),
700                    "Tool call: {}({})",
701                    tool_call.name,
702                    serde_json::to_string(&tool_call.arguments)
703                        .unwrap_or_default()
704                        .chars()
705                        .take(200)
706                        .collect::<String>()
707                );
708
709                let start = std::time::Instant::now();
710
711                // Resilient tool execution: catch panics + errors
712                let result = {
713                    let tool_future = self.tools.execute(&tool_call.name, tool_call.arguments.clone());
714                    // Timeout individual tool calls (prevent hangs)
715                    let timeout_dur = if tool_call.name == "bash" {
716                        std::time::Duration::from_secs(self.config.bash_timeout_secs)
717                    } else {
718                        std::time::Duration::from_secs(30)
719                    };
720                    match tokio::time::timeout(timeout_dur, tool_future).await {
721                        Ok(inner) => inner,
722                        Err(_) => Err(PawanError::Tool(format!(
723                            "Tool '{}' timed out after {}s", tool_call.name, timeout_dur.as_secs()
724                        ))),
725                    }
726                };
727                let duration_ms = start.elapsed().as_millis() as u64;
728
729                let (result_value, success) = match result {
730                    Ok(v) => (v, true),
731                    Err(e) => {
732                        tracing::warn!(tool = tool_call.name.as_str(), error = %e, "Tool execution failed");
733                        (json!({"error": e.to_string(), "tool": tool_call.name, "hint": "Try a different approach or tool"}), false)
734                    }
735                };
736
737                // Truncate tool results that exceed max chars to prevent context bloat
738                let max_result_chars = self.config.max_result_chars;
739                let result_value = {
740                    let result_str = serde_json::to_string(&result_value).unwrap_or_default();
741                    if result_str.len() > max_result_chars {
742                        // UTF-8 safe truncation
743                        let truncated: String = result_str.chars().take(max_result_chars).collect();
744                        let truncated = truncated.as_str();
745                        serde_json::from_str(truncated).unwrap_or_else(|_| {
746                            json!({"content": format!("{}...[truncated from {} chars]", truncated, result_str.len())})
747                        })
748                    } else {
749                        result_value
750                    }
751                };
752
753
754                let record = ToolCallRecord {
755                    id: tool_call.id.clone(),
756                    name: tool_call.name.clone(),
757                    arguments: tool_call.arguments.clone(),
758                    result: result_value.clone(),
759                    success,
760                    duration_ms,
761                };
762
763                if let Some(ref callback) = on_tool {
764                    callback(&record);
765                }
766
767                all_tool_calls.push(record);
768
769                self.history.push(Message {
770                    role: Role::Tool,
771                    content: serde_json::to_string(&result_value).unwrap_or_default(),
772                    tool_calls: vec![],
773                    tool_result: Some(ToolResultMessage {
774                        tool_call_id: tool_call.id.clone(),
775                        content: result_value,
776                        success,
777                    }),
778                });
779
780                // Compile-gated confidence: after writing a .rs file, auto-run cargo check
781                // and inject the result so the model can self-correct on the same iteration
782                if success && tool_call.name == "write_file" {
783                    let wrote_rs = tool_call.arguments.get("path")
784                        .and_then(|p| p.as_str())
785                        .map(|p| p.ends_with(".rs"))
786                        .unwrap_or(false);
787                    if wrote_rs {
788                        let ws = self.workspace_root.clone();
789                        let check_result = tokio::process::Command::new("cargo")
790                            .arg("check")
791                            .arg("--message-format=short")
792                            .current_dir(&ws)
793                            .output()
794                            .await;
795                        match check_result {
796                            Ok(output) if !output.status.success() => {
797                                let stderr = String::from_utf8_lossy(&output.stderr);
798                                // Only inject first 1500 chars of errors to avoid context bloat
799                                let err_msg: String = stderr.chars().take(1500).collect();
800                                tracing::info!("Compile-gate: cargo check failed after write_file, injecting errors");
801                                self.history.push(Message {
802                                    role: Role::User,
803                                    content: format!(
804                                        "[SYSTEM] cargo check failed after your write_file. Fix the errors:\n```\n{}\n```",
805                                        err_msg
806                                    ),
807                                    tool_calls: vec![],
808                                    tool_result: None,
809                                });
810                            }
811                            Ok(_) => {
812                                tracing::debug!("Compile-gate: cargo check passed");
813                            }
814                            Err(e) => {
815                                tracing::warn!("Compile-gate: cargo check failed to run: {}", e);
816                            }
817                        }
818                    }
819                }
820            }
821        }
822    }
823
824    /// Execute a healing task with real diagnostics
825    pub async fn heal(&mut self) -> Result<AgentResponse> {
826        let healer = crate::healing::Healer::new(
827            self.workspace_root.clone(),
828            self.config.healing.clone(),
829        );
830
831        let diagnostics = healer.get_diagnostics().await?;
832        let failed_tests = healer.get_failed_tests().await?;
833
834        let mut prompt = format!(
835            "I need you to heal this Rust project at: {}
836
837",
838            self.workspace_root.display()
839        );
840
841        if !diagnostics.is_empty() {
842            prompt.push_str(&format!(
843                "## Compilation Issues ({} found)
844{}
845",
846                diagnostics.len(),
847                healer.format_diagnostics_for_prompt(&diagnostics)
848            ));
849        }
850
851        if !failed_tests.is_empty() {
852            prompt.push_str(&format!(
853                "## Failed Tests ({} found)
854{}
855",
856                failed_tests.len(),
857                healer.format_tests_for_prompt(&failed_tests)
858            ));
859        }
860
861        if diagnostics.is_empty() && failed_tests.is_empty() {
862            prompt.push_str("No issues found! Run cargo check and cargo test to verify.
863");
864        }
865
866        prompt.push_str("
867Fix each issue one at a time. Verify with cargo check after each fix.");
868
869        self.execute(&prompt).await
870    }
871    /// Execute healing with retries — calls heal(), checks for remaining errors, retries if needed
872    pub async fn heal_with_retries(&mut self, max_attempts: usize) -> Result<AgentResponse> {
873        let mut last_response = self.heal().await?;
874
875        for attempt in 1..max_attempts {
876            let fixer = crate::healing::CompilerFixer::new(self.workspace_root.clone());
877            let remaining = fixer.check().await?;
878            let errors: Vec<_> = remaining.iter().filter(|d| d.kind == crate::healing::DiagnosticKind::Error).collect();
879
880            if errors.is_empty() {
881                tracing::info!(attempts = attempt, "Healing complete");
882                return Ok(last_response);
883            }
884
885            tracing::warn!(errors = errors.len(), attempt = attempt, "Errors remain after heal attempt, retrying");
886            last_response = self.heal().await?;
887        }
888
889        tracing::info!(attempts = max_attempts, "Healing finished (may still have errors)");
890        Ok(last_response)
891    }
892    /// Execute a task with a specific prompt
893    pub async fn task(&mut self, task_description: &str) -> Result<AgentResponse> {
894        let prompt = format!(
895            r#"I need you to complete the following coding task:
896
897{}
898
899The workspace is at: {}
900
901Please:
9021. First explore the codebase to understand the relevant code
9032. Make the necessary changes
9043. Verify the changes compile with `cargo check`
9054. Run relevant tests if applicable
906
907Explain your changes as you go."#,
908            task_description,
909            self.workspace_root.display()
910        );
911
912        self.execute(&prompt).await
913    }
914
915    /// Generate a commit message for current changes
916    pub async fn generate_commit_message(&mut self) -> Result<String> {
917        let prompt = r#"Please:
9181. Run `git status` to see what files are changed
9192. Run `git diff --cached` to see staged changes (or `git diff` for unstaged)
9203. Generate a concise, descriptive commit message following conventional commits format
921
922Only output the suggested commit message, nothing else."#;
923
924        let response = self.execute(prompt).await?;
925        Ok(response.content)
926    }
927}
928
929#[cfg(test)]
930mod tests {
931    use super::*;
932
933    #[test]
934    fn test_message_serialization() {
935        let msg = Message {
936            role: Role::User,
937            content: "Hello".to_string(),
938            tool_calls: vec![],
939            tool_result: None,
940        };
941
942        let json = serde_json::to_string(&msg).expect("Serialization failed");
943        assert!(json.contains("user"));
944        assert!(json.contains("Hello"));
945    }
946
947    #[test]
948    fn test_tool_call_request() {
949        let tc = ToolCallRequest {
950            id: "123".to_string(),
951            name: "read_file".to_string(),
952            arguments: json!({"path": "test.txt"}),
953        };
954
955        let json = serde_json::to_string(&tc).expect("Serialization failed");
956        assert!(json.contains("read_file"));
957        assert!(json.contains("test.txt"));
958    }
959}