pawan/agent/
mod.rs

1//! Pawan Agent - The core agent that handles tool-calling loops
2//!
3//! This module provides the main `PawanAgent` which:
4//! - Manages conversation history
5//! - Coordinates tool calling with the LLM via pluggable backends
6//! - Provides streaming responses
7//! - Supports multiple LLM backends (NVIDIA API, Ollama, OpenAI)
8
9pub mod backend;
10mod preflight;
11pub mod session;
12pub mod git_session;
13
14use crate::config::{LlmProvider, PawanConfig};
15use crate::tools::{ToolDefinition, ToolRegistry};
16use crate::{PawanError, Result};
17use backend::openai_compat::{OpenAiCompatBackend, OpenAiCompatConfig};
18use backend::LlmBackend;
19use serde::{Deserialize, Serialize};
20use serde_json::{json, Value};
21use std::path::PathBuf;
22
23/// A message in the conversation
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct Message {
26    /// Role of the message sender
27    pub role: Role,
28    /// Content of the message
29    pub content: String,
30    /// Tool calls (if any)
31    #[serde(default)]
32    pub tool_calls: Vec<ToolCallRequest>,
33    /// Tool results (if this is a tool result message)
34    #[serde(skip_serializing_if = "Option::is_none")]
35    pub tool_result: Option<ToolResultMessage>,
36}
37
38/// Role of a message sender
39#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
40#[serde(rename_all = "lowercase")]
41pub enum Role {
42    System,
43    User,
44    Assistant,
45    Tool,
46}
47
48/// A request to call a tool
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct ToolCallRequest {
51    /// Unique ID for this tool call
52    pub id: String,
53    /// Name of the tool to call
54    pub name: String,
55    /// Arguments for the tool
56    pub arguments: Value,
57}
58
59/// Result from a tool execution
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct ToolResultMessage {
62    /// ID of the tool call this result is for
63    pub tool_call_id: String,
64    /// The result content
65    pub content: Value,
66    /// Whether the tool executed successfully
67    pub success: bool,
68}
69
70/// Record of a tool call execution
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct ToolCallRecord {
73    /// Unique ID for this tool call
74    pub id: String,
75    /// Name of the tool
76    pub name: String,
77    /// Arguments passed to the tool
78    pub arguments: Value,
79    /// Result from the tool
80    pub result: Value,
81    /// Whether execution was successful
82    pub success: bool,
83    /// Duration in milliseconds
84    pub duration_ms: u64,
85}
86
87/// Token usage from an LLM response
88#[derive(Debug, Clone, Default, Serialize, Deserialize)]
89pub struct TokenUsage {
90    pub prompt_tokens: u64,
91    pub completion_tokens: u64,
92    pub total_tokens: u64,
93    /// Tokens spent on reasoning/thinking (subset of completion_tokens)
94    pub reasoning_tokens: u64,
95    /// Tokens spent on actual content/tool output (completion - reasoning)
96    pub action_tokens: u64,
97}
98
99/// LLM response from a generation request
100#[derive(Debug, Clone)]
101pub struct LLMResponse {
102    /// Text content of the response
103    pub content: String,
104    /// Reasoning/thinking content (separate from visible content)
105    pub reasoning: Option<String>,
106    /// Tool calls requested by the model
107    pub tool_calls: Vec<ToolCallRequest>,
108    /// Reason the response finished
109    pub finish_reason: String,
110    /// Token usage (if available)
111    pub usage: Option<TokenUsage>,
112}
113
114/// Result from a complete agent execution
115#[derive(Debug)]
116pub struct AgentResponse {
117    /// Final text response
118    pub content: String,
119    /// All tool calls made during execution
120    pub tool_calls: Vec<ToolCallRecord>,
121    /// Number of iterations taken
122    pub iterations: usize,
123    /// Cumulative token usage across all iterations
124    pub usage: TokenUsage,
125}
126
127/// Callback for receiving streaming tokens
128pub type TokenCallback = Box<dyn Fn(&str) + Send + Sync>;
129
130/// Callback for receiving tool call updates
131pub type ToolCallback = Box<dyn Fn(&ToolCallRecord) + Send + Sync>;
132
133/// Callback for tool call start notifications
134pub type ToolStartCallback = Box<dyn Fn(&str) + Send + Sync>;
135
136/// The main Pawan agent
137/// The main Pawan agent
138///
139/// This struct represents the core Pawan agent that handles:
140/// - Conversation history management
141/// - Tool calling with the LLM via pluggable backends
142/// - Streaming responses
143/// - Multiple LLM backends (NVIDIA API, Ollama, OpenAI)
144/// - Context management and token counting
145/// - Integration with Eruka for 3-tier memory injection
146pub struct PawanAgent {
147    /// Configuration
148    config: PawanConfig,
149    /// Tool registry
150    tools: ToolRegistry,
151    /// Conversation history
152    history: Vec<Message>,
153    /// Workspace root
154    workspace_root: PathBuf,
155    /// LLM backend
156    backend: Box<dyn LlmBackend>,
157
158    /// Estimated token count for current context
159    context_tokens_estimate: usize,
160
161    /// Eruka bridge for 3-tier memory injection
162    eruka: Option<crate::eruka_bridge::ErukaClient>,
163}
164
165impl PawanAgent {
166    /// Create a new PawanAgent with auto-selected backend
167    pub fn new(config: PawanConfig, workspace_root: PathBuf) -> Self {
168        let tools = ToolRegistry::with_defaults(workspace_root.clone());
169        let system_prompt = config.get_system_prompt();
170        let backend = Self::create_backend(&config, &system_prompt);
171        let eruka = if config.eruka.enabled {
172            Some(crate::eruka_bridge::ErukaClient::new(config.eruka.clone()))
173        } else {
174            None
175        };
176
177        Self {
178            config,
179            tools,
180            history: Vec::new(),
181            workspace_root,
182            backend,
183            context_tokens_estimate: 0,
184            eruka,
185        }
186    }
187
188    /// Create the appropriate backend based on config
189    fn create_backend(config: &PawanConfig, system_prompt: &str) -> Box<dyn LlmBackend> {
190        match config.provider {
191            LlmProvider::Nvidia | LlmProvider::OpenAI | LlmProvider::Mlx => {
192                let (api_url, api_key) = match config.provider {
193                    LlmProvider::Nvidia => {
194                        let url = std::env::var("NVIDIA_API_URL")
195                            .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
196                        let key = std::env::var("NVIDIA_API_KEY").ok();
197                        if key.is_none() {
198                            tracing::warn!("NVIDIA_API_KEY not set. Add it to .env or export it.");
199                        }
200                        (url, key)
201                    },
202                    LlmProvider::OpenAI => {
203                        let url = config.base_url.clone()
204                            .or_else(|| std::env::var("OPENAI_API_URL").ok())
205                            .unwrap_or_else(|| "https://api.openai.com/v1".to_string());
206                        let key = std::env::var("OPENAI_API_KEY").ok();
207                        (url, key)
208                    },
209                    LlmProvider::Mlx => {
210                        // MLX LM server — Apple Silicon native, always local
211                        let url = config.base_url.clone()
212                            .unwrap_or_else(|| "http://localhost:8080/v1".to_string());
213                        tracing::info!(url = %url, "Using MLX LM server (Apple Silicon native)");
214                        (url, None) // mlx_lm.server requires no API key
215                    },
216                    _ => unreachable!(),
217                };
218                
219                // Build cloud fallback if configured
220                let cloud = config.cloud.as_ref().map(|c| {
221                    let (cloud_url, cloud_key) = match c.provider {
222                        LlmProvider::Nvidia => {
223                            let url = std::env::var("NVIDIA_API_URL")
224                                .unwrap_or_else(|_| crate::DEFAULT_NVIDIA_API_URL.to_string());
225                            let key = std::env::var("NVIDIA_API_KEY").ok();
226                            (url, key)
227                        },
228                        LlmProvider::OpenAI => {
229                            let url = std::env::var("OPENAI_API_URL")
230                                .unwrap_or_else(|_| "https://api.openai.com/v1".to_string());
231                            let key = std::env::var("OPENAI_API_KEY").ok();
232                            (url, key)
233                        },
234                        LlmProvider::Mlx => {
235                            ("http://localhost:8080/v1".to_string(), None)
236                        },
237                        _ => {
238                            tracing::warn!("Cloud fallback only supports nvidia/openai/mlx providers");
239                            ("https://integrate.api.nvidia.com/v1".to_string(), None)
240                        }
241                    };
242                    backend::openai_compat::CloudFallback {
243                        api_url: cloud_url,
244                        api_key: cloud_key,
245                        model: c.model.clone(),
246                        fallback_models: c.fallback_models.clone(),
247                    }
248                });
249
250                Box::new(OpenAiCompatBackend::new(OpenAiCompatConfig {
251                    api_url,
252                    api_key,
253                    model: config.model.clone(),
254                    temperature: config.temperature,
255                    top_p: config.top_p,
256                    max_tokens: config.max_tokens,
257                    system_prompt: system_prompt.to_string(),
258                    // Enforce thinking budget: if set, disable thinking entirely
259                    // and give all tokens to action output
260                    use_thinking: config.thinking_budget == 0 && config.use_thinking_mode(),
261                    max_retries: config.max_retries,
262                    fallback_models: config.fallback_models.clone(),
263                    cloud,
264                }))
265            }
266            LlmProvider::Ollama => {
267                let url = std::env::var("OLLAMA_URL")
268                    .unwrap_or_else(|_| "http://localhost:11434".to_string());
269
270                Box::new(backend::ollama::OllamaBackend::new(
271                    url,
272                    config.model.clone(),
273                    config.temperature,
274                    system_prompt.to_string(),
275                ))
276            }
277        }
278    }
279
280    /// Create with a specific tool registry
281    pub fn with_tools(mut self, tools: ToolRegistry) -> Self {
282        self.tools = tools;
283        self
284    }
285
286    /// Get mutable access to the tool registry (for registering MCP tools)
287    pub fn tools_mut(&mut self) -> &mut ToolRegistry {
288        &mut self.tools
289    }
290
291    /// Create with a custom backend
292    pub fn with_backend(mut self, backend: Box<dyn LlmBackend>) -> Self {
293        self.backend = backend;
294        self
295    }
296
297    /// Get the current conversation history
298    pub fn history(&self) -> &[Message] {
299        &self.history
300    }
301
302    /// Save current conversation as a session, returns session ID
303    pub fn save_session(&self) -> Result<String> {
304        let mut session = session::Session::new(&self.config.model);
305        session.messages = self.history.clone();
306        session.total_tokens = self.context_tokens_estimate as u64;
307        session.save()?;
308        Ok(session.id)
309    }
310
311    /// Resume a saved session by ID
312    pub fn resume_session(&mut self, session_id: &str) -> Result<()> {
313        let session = session::Session::load(session_id)?;
314        self.history = session.messages;
315        self.context_tokens_estimate = session.total_tokens as usize;
316        Ok(())
317    }
318
319    /// Get the configuration
320    pub fn config(&self) -> &PawanConfig {
321        &self.config
322    }
323
324    /// Clear the conversation history
325    pub fn clear_history(&mut self) {
326        self.history.clear();
327    }
328    /// Prune conversation history to reduce context size.
329    /// Keeps the first message (system prompt) and last 4 messages,
330    /// replaces everything in between with a summary message.
331    fn prune_history(&mut self) {
332        let len = self.history.len();
333        if len <= 5 {
334            return; // Nothing to prune
335        }
336
337        let keep_end = 4;
338        let start = 1; // Skip system prompt at index 0
339        let end = len - keep_end;
340        let pruned_count = end - start;
341
342        // Build summary from middle messages
343        let mut summary = String::new();
344        for msg in &self.history[start..end] {
345            let chunk = if msg.content.len() > 200 {
346                &msg.content[..200]
347            } else {
348                &msg.content
349            };
350            summary.push_str(chunk);
351            summary.push('\n');
352            if summary.len() > 2000 {
353                summary.truncate(2000);
354                break;
355            }
356        }
357
358        let summary_msg = Message {
359            role: Role::System,
360            content: format!("Previous conversation summary (pruned): {}", summary),
361            tool_calls: vec![],
362            tool_result: None,
363        };
364
365        // Keep first message, insert summary, then last 4
366        let first = self.history[0].clone();
367        let tail: Vec<Message> = self.history[len - keep_end..].to_vec();
368
369        self.history.clear();
370        self.history.push(first);
371        self.history.push(summary_msg);
372        self.history.extend(tail);
373
374        tracing::info!(pruned = pruned_count, context_estimate = self.context_tokens_estimate, "Pruned messages from history");
375    }
376
377    /// Add a message to history
378    pub fn add_message(&mut self, message: Message) {
379        self.history.push(message);
380    }
381
382    /// Switch the LLM model at runtime. Recreates the backend with the new model.
383    pub fn switch_model(&mut self, model: &str) {
384        self.config.model = model.to_string();
385        let system_prompt = self.config.get_system_prompt();
386        self.backend = Self::create_backend(&self.config, &system_prompt);
387        tracing::info!(model = model, "Model switched at runtime");
388    }
389
390    /// Get the current model name
391    pub fn model_name(&self) -> &str {
392        &self.config.model
393    }
394
395    /// Get tool definitions for the LLM
396    pub fn get_tool_definitions(&self) -> Vec<ToolDefinition> {
397        self.tools.get_definitions()
398    }
399
400    /// Execute a single prompt with tool calling support
401    pub async fn execute(&mut self, user_prompt: &str) -> Result<AgentResponse> {
402        self.execute_with_callbacks(user_prompt, None, None, None)
403            .await
404    }
405
406    /// Execute with optional callbacks for streaming
407    pub async fn execute_with_callbacks(
408        &mut self,
409        user_prompt: &str,
410        on_token: Option<TokenCallback>,
411        on_tool: Option<ToolCallback>,
412        on_tool_start: Option<ToolStartCallback>,
413    ) -> Result<AgentResponse> {
414        // Inject Eruka core memory before first LLM call
415        if let Some(eruka) = &self.eruka {
416            if let Err(e) = eruka.inject_core_memory(&mut self.history).await {
417                tracing::warn!("Eruka memory injection failed (non-fatal): {}", e);
418            }
419        }
420
421        self.history.push(Message {
422            role: Role::User,
423            content: user_prompt.to_string(),
424            tool_calls: vec![],
425            tool_result: None,
426        });
427
428        let mut all_tool_calls = Vec::new();
429        let mut total_usage = TokenUsage::default();
430        let mut iterations = 0;
431        let max_iterations = self.config.max_tool_iterations;
432
433        loop {
434            iterations += 1;
435            if iterations > max_iterations {
436                return Err(PawanError::Agent(format!(
437                    "Max tool iterations ({}) exceeded",
438                    max_iterations
439                )));
440            }
441
442            // Budget awareness: when running low on iterations, nudge the model
443            let remaining = max_iterations.saturating_sub(iterations);
444            if remaining == 3 && iterations > 1 {
445                self.history.push(Message {
446                    role: Role::User,
447                    content: format!(
448                        "[SYSTEM] You have {} tool iterations remaining. \
449                         Stop exploring and write the most important output now. \
450                         If you have code to write, write it immediately.",
451                        remaining
452                    ),
453                    tool_calls: vec![],
454                    tool_result: None,
455                });
456            }
457            // Estimate context tokens
458            self.context_tokens_estimate = self.history.iter().map(|m| m.content.len()).sum::<usize>() / 4;
459            if self.context_tokens_estimate > self.config.max_context_tokens {
460                self.prune_history();
461            }
462
463            // Dynamic tool selection: pick the most relevant tools for this query
464            // Extract latest user message for keyword matching
465            let latest_query = self.history.iter().rev()
466                .find(|m| m.role == Role::User)
467                .map(|m| m.content.as_str())
468                .unwrap_or("");
469            let tool_defs = self.tools.select_for_query(latest_query, 12);
470            if iterations == 1 {
471                let tool_names: Vec<&str> = tool_defs.iter().map(|t| t.name.as_str()).collect();
472                tracing::info!(tools = ?tool_names, count = tool_defs.len(), "Selected tools for query");
473            }
474
475            // --- Resilient LLM call: retry on transient failures instead of crashing ---
476            let response = {
477                #[allow(unused_assignments)]
478                let mut last_err = None;
479                let max_llm_retries = 3;
480                let mut attempt = 0;
481                loop {
482                    attempt += 1;
483                    match self.backend.generate(&self.history, &tool_defs, on_token.as_ref()).await {
484                        Ok(resp) => break resp,
485                        Err(e) => {
486                            let err_str = e.to_string();
487                            let is_transient = err_str.contains("timeout")
488                                || err_str.contains("connection")
489                                || err_str.contains("429")
490                                || err_str.contains("500")
491                                || err_str.contains("502")
492                                || err_str.contains("503")
493                                || err_str.contains("504")
494                                || err_str.contains("reset")
495                                || err_str.contains("broken pipe");
496
497                            if is_transient && attempt <= max_llm_retries {
498                                let delay = std::time::Duration::from_secs(2u64.pow(attempt as u32));
499                                tracing::warn!(
500                                    attempt = attempt,
501                                    delay_secs = delay.as_secs(),
502                                    error = err_str.as_str(),
503                                    "LLM call failed (transient) — retrying"
504                                );
505                                tokio::time::sleep(delay).await;
506
507                                // If context is too large, prune before retry
508                                if err_str.contains("context") || err_str.contains("token") {
509                                    tracing::info!("Pruning history before retry (possible context overflow)");
510                                    self.prune_history();
511                                }
512                                continue;
513                            }
514
515                            // Non-transient or max retries exhausted
516                            last_err = Some(e);
517                            break {
518                                // Return a synthetic "give up" response instead of crashing
519                                tracing::error!(
520                                    attempt = attempt,
521                                    error = last_err.as_ref().map(|e| e.to_string()).unwrap_or_default().as_str(),
522                                    "LLM call failed permanently — returning error as content"
523                                );
524                                LLMResponse {
525                                    content: format!(
526                                        "LLM error after {} attempts: {}. The task could not be completed.",
527                                        attempt,
528                                        last_err.as_ref().map(|e| e.to_string()).unwrap_or_default()
529                                    ),
530                                    reasoning: None,
531                                    tool_calls: vec![],
532                                    finish_reason: "error".to_string(),
533                                    usage: None,
534                                }
535                            };
536                        }
537                    }
538                }
539            };
540
541            // Accumulate token usage with thinking/action split
542            if let Some(ref usage) = response.usage {
543                total_usage.prompt_tokens += usage.prompt_tokens;
544                total_usage.completion_tokens += usage.completion_tokens;
545                total_usage.total_tokens += usage.total_tokens;
546                total_usage.reasoning_tokens += usage.reasoning_tokens;
547                total_usage.action_tokens += usage.action_tokens;
548
549                // Log token budget split per iteration
550                if usage.reasoning_tokens > 0 {
551                    tracing::info!(
552                        iteration = iterations,
553                        think = usage.reasoning_tokens,
554                        act = usage.action_tokens,
555                        total = usage.completion_tokens,
556                        "Token budget: think:{} act:{} (total:{})",
557                        usage.reasoning_tokens, usage.action_tokens, usage.completion_tokens
558                    );
559                }
560
561                // Thinking budget enforcement
562                let thinking_budget = self.config.thinking_budget;
563                if thinking_budget > 0 && usage.reasoning_tokens > thinking_budget as u64 {
564                    tracing::warn!(
565                        budget = thinking_budget,
566                        actual = usage.reasoning_tokens,
567                        "Thinking budget exceeded ({}/{} tokens)",
568                        usage.reasoning_tokens, thinking_budget
569                    );
570                }
571            }
572
573            // --- Guardrail: strip thinking blocks from content ---
574            let clean_content = {
575                let mut s = response.content.clone();
576                loop {
577                    let lower = s.to_lowercase();
578                    let open = lower.find("<think>");
579                    let close = lower.find("</think>");
580                    match (open, close) {
581                        (Some(i), Some(j)) if j > i => {
582                            let before = s[..i].trim_end().to_string();
583                            let after = if s.len() > j + 8 { s[j + 8..].trim_start().to_string() } else { String::new() };
584                            s = if before.is_empty() { after } else if after.is_empty() { before } else { format!("{}\n{}", before, after) };
585                        }
586                        _ => break,
587                    }
588                }
589                s
590            };
591
592            if response.tool_calls.is_empty() {
593                // --- Guardrail: detect chatty no-op (content but no tools on early iterations) ---
594                // Only nudge if tools are available AND response looks like planning text (not a real answer)
595                let has_tools = !tool_defs.is_empty();
596                let lower = clean_content.to_lowercase();
597                let planning_prefix = lower.starts_with("let me")
598                    || lower.starts_with("i'll help")
599                    || lower.starts_with("i will help")
600                    || lower.starts_with("sure, i")
601                    || lower.starts_with("okay, i");
602                let looks_like_planning = clean_content.len() > 200 || (planning_prefix && clean_content.len() > 50);
603                if has_tools && looks_like_planning && iterations == 1 && iterations < max_iterations && response.finish_reason != "error" {
604                    tracing::warn!(
605                        "No tool calls at iteration {} (content: {}B) — nudging model to use tools",
606                        iterations, clean_content.len()
607                    );
608                    self.history.push(Message {
609                        role: Role::Assistant,
610                        content: clean_content.clone(),
611                        tool_calls: vec![],
612                        tool_result: None,
613                    });
614                    self.history.push(Message {
615                        role: Role::User,
616                        content: "You must use tools to complete this task. Do NOT just describe what you would do — actually call the tools. Start with bash or read_file.".to_string(),
617                        tool_calls: vec![],
618                        tool_result: None,
619                    });
620                    continue;
621                }
622
623                // --- Guardrail: detect repeated responses ---
624                if iterations > 1 {
625                    let prev_assistant = self.history.iter().rev()
626                        .find(|m| m.role == Role::Assistant && !m.content.is_empty());
627                    if let Some(prev) = prev_assistant {
628                        if prev.content.trim() == clean_content.trim() && iterations < max_iterations {
629                            tracing::warn!("Repeated response detected at iteration {} — injecting correction", iterations);
630                            self.history.push(Message {
631                                role: Role::Assistant,
632                                content: clean_content.clone(),
633                                tool_calls: vec![],
634                                tool_result: None,
635                            });
636                            self.history.push(Message {
637                                role: Role::User,
638                                content: "You gave the same response as before. Try a different approach. Use anchor_text in edit_file_lines, or use insert_after, or use bash with sed.".to_string(),
639                                tool_calls: vec![],
640                                tool_result: None,
641                            });
642                            continue;
643                        }
644                    }
645                }
646
647                self.history.push(Message {
648                    role: Role::Assistant,
649                    content: clean_content.clone(),
650                    tool_calls: vec![],
651                    tool_result: None,
652                });
653
654                return Ok(AgentResponse {
655                    content: clean_content,
656                    tool_calls: all_tool_calls,
657                    iterations,
658                    usage: total_usage,
659                });
660            }
661
662            self.history.push(Message {
663                role: Role::Assistant,
664                content: response.content.clone(),
665                tool_calls: response.tool_calls.clone(),
666                tool_result: None,
667            });
668
669            for tool_call in &response.tool_calls {
670                // Auto-activate extended tools on first use (makes them visible in next iteration)
671                self.tools.activate(&tool_call.name);
672
673                // Check permission
674                if let Some(crate::config::ToolPermission::Deny) =
675                    self.config.permissions.get(&tool_call.name)
676                {
677                    let record = ToolCallRecord {
678                        id: tool_call.id.clone(),
679                        name: tool_call.name.clone(),
680                        arguments: tool_call.arguments.clone(),
681                        result: json!({"error": "Tool denied by permission policy"}),
682                        success: false,
683                        duration_ms: 0,
684                    };
685
686                    if let Some(ref callback) = on_tool {
687                        callback(&record);
688                    }
689                    all_tool_calls.push(record);
690
691                    self.history.push(Message {
692                        role: Role::Tool,
693                        content: "{\"error\": \"Tool denied by permission policy\"}".to_string(),
694                        tool_calls: vec![],
695                        tool_result: Some(ToolResultMessage {
696                            tool_call_id: tool_call.id.clone(),
697                            content: json!({"error": "Tool denied by permission policy"}),
698                            success: false,
699                        }),
700                    });
701                    continue;
702                }
703
704                // Notify tool start
705                if let Some(ref callback) = on_tool_start {
706                    callback(&tool_call.name);
707                }
708
709                // Debug: log tool call args for diagnosis
710                tracing::debug!(
711                    tool = tool_call.name.as_str(),
712                    args_len = serde_json::to_string(&tool_call.arguments).unwrap_or_default().len(),
713                    "Tool call: {}({})",
714                    tool_call.name,
715                    serde_json::to_string(&tool_call.arguments)
716                        .unwrap_or_default()
717                        .chars()
718                        .take(200)
719                        .collect::<String>()
720                );
721
722                let start = std::time::Instant::now();
723
724                // Resilient tool execution: catch panics + errors
725                let result = {
726                    let tool_future = self.tools.execute(&tool_call.name, tool_call.arguments.clone());
727                    // Timeout individual tool calls (prevent hangs)
728                    let timeout_dur = if tool_call.name == "bash" {
729                        std::time::Duration::from_secs(self.config.bash_timeout_secs)
730                    } else {
731                        std::time::Duration::from_secs(30)
732                    };
733                    match tokio::time::timeout(timeout_dur, tool_future).await {
734                        Ok(inner) => inner,
735                        Err(_) => Err(PawanError::Tool(format!(
736                            "Tool '{}' timed out after {}s", tool_call.name, timeout_dur.as_secs()
737                        ))),
738                    }
739                };
740                let duration_ms = start.elapsed().as_millis() as u64;
741
742                let (result_value, success) = match result {
743                    Ok(v) => (v, true),
744                    Err(e) => {
745                        tracing::warn!(tool = tool_call.name.as_str(), error = %e, "Tool execution failed");
746                        (json!({"error": e.to_string(), "tool": tool_call.name, "hint": "Try a different approach or tool"}), false)
747                    }
748                };
749
750                // Truncate tool results that exceed max chars to prevent context bloat
751                let max_result_chars = self.config.max_result_chars;
752                let result_value = {
753                    let result_str = serde_json::to_string(&result_value).unwrap_or_default();
754                    if result_str.len() > max_result_chars {
755                        // UTF-8 safe truncation
756                        let truncated: String = result_str.chars().take(max_result_chars).collect();
757                        let truncated = truncated.as_str();
758                        serde_json::from_str(truncated).unwrap_or_else(|_| {
759                            json!({"content": format!("{}...[truncated from {} chars]", truncated, result_str.len())})
760                        })
761                    } else {
762                        result_value
763                    }
764                };
765
766
767                let record = ToolCallRecord {
768                    id: tool_call.id.clone(),
769                    name: tool_call.name.clone(),
770                    arguments: tool_call.arguments.clone(),
771                    result: result_value.clone(),
772                    success,
773                    duration_ms,
774                };
775
776                if let Some(ref callback) = on_tool {
777                    callback(&record);
778                }
779
780                all_tool_calls.push(record);
781
782                self.history.push(Message {
783                    role: Role::Tool,
784                    content: serde_json::to_string(&result_value).unwrap_or_default(),
785                    tool_calls: vec![],
786                    tool_result: Some(ToolResultMessage {
787                        tool_call_id: tool_call.id.clone(),
788                        content: result_value,
789                        success,
790                    }),
791                });
792
793                // Compile-gated confidence: after writing a .rs file, auto-run cargo check
794                // and inject the result so the model can self-correct on the same iteration
795                if success && tool_call.name == "write_file" {
796                    let wrote_rs = tool_call.arguments.get("path")
797                        .and_then(|p| p.as_str())
798                        .map(|p| p.ends_with(".rs"))
799                        .unwrap_or(false);
800                    if wrote_rs {
801                        let ws = self.workspace_root.clone();
802                        let check_result = tokio::process::Command::new("cargo")
803                            .arg("check")
804                            .arg("--message-format=short")
805                            .current_dir(&ws)
806                            .output()
807                            .await;
808                        match check_result {
809                            Ok(output) if !output.status.success() => {
810                                let stderr = String::from_utf8_lossy(&output.stderr);
811                                // Only inject first 1500 chars of errors to avoid context bloat
812                                let err_msg: String = stderr.chars().take(1500).collect();
813                                tracing::info!("Compile-gate: cargo check failed after write_file, injecting errors");
814                                self.history.push(Message {
815                                    role: Role::User,
816                                    content: format!(
817                                        "[SYSTEM] cargo check failed after your write_file. Fix the errors:\n```\n{}\n```",
818                                        err_msg
819                                    ),
820                                    tool_calls: vec![],
821                                    tool_result: None,
822                                });
823                            }
824                            Ok(_) => {
825                                tracing::debug!("Compile-gate: cargo check passed");
826                            }
827                            Err(e) => {
828                                tracing::warn!("Compile-gate: cargo check failed to run: {}", e);
829                            }
830                        }
831                    }
832                }
833            }
834        }
835    }
836
837    /// Execute a healing task with real diagnostics
838    pub async fn heal(&mut self) -> Result<AgentResponse> {
839        let healer = crate::healing::Healer::new(
840            self.workspace_root.clone(),
841            self.config.healing.clone(),
842        );
843
844        let diagnostics = healer.get_diagnostics().await?;
845        let failed_tests = healer.get_failed_tests().await?;
846
847        let mut prompt = format!(
848            "I need you to heal this Rust project at: {}
849
850",
851            self.workspace_root.display()
852        );
853
854        if !diagnostics.is_empty() {
855            prompt.push_str(&format!(
856                "## Compilation Issues ({} found)
857{}
858",
859                diagnostics.len(),
860                healer.format_diagnostics_for_prompt(&diagnostics)
861            ));
862        }
863
864        if !failed_tests.is_empty() {
865            prompt.push_str(&format!(
866                "## Failed Tests ({} found)
867{}
868",
869                failed_tests.len(),
870                healer.format_tests_for_prompt(&failed_tests)
871            ));
872        }
873
874        if diagnostics.is_empty() && failed_tests.is_empty() {
875            prompt.push_str("No issues found! Run cargo check and cargo test to verify.
876");
877        }
878
879        prompt.push_str("
880Fix each issue one at a time. Verify with cargo check after each fix.");
881
882        self.execute(&prompt).await
883    }
884    /// Execute healing with retries — calls heal(), checks for remaining errors, retries if needed
885    pub async fn heal_with_retries(&mut self, max_attempts: usize) -> Result<AgentResponse> {
886        let mut last_response = self.heal().await?;
887
888        for attempt in 1..max_attempts {
889            let fixer = crate::healing::CompilerFixer::new(self.workspace_root.clone());
890            let remaining = fixer.check().await?;
891            let errors: Vec<_> = remaining.iter().filter(|d| d.kind == crate::healing::DiagnosticKind::Error).collect();
892
893            if errors.is_empty() {
894                tracing::info!(attempts = attempt, "Healing complete");
895                return Ok(last_response);
896            }
897
898            tracing::warn!(errors = errors.len(), attempt = attempt, "Errors remain after heal attempt, retrying");
899            last_response = self.heal().await?;
900        }
901
902        tracing::info!(attempts = max_attempts, "Healing finished (may still have errors)");
903        Ok(last_response)
904    }
905    /// Execute a task with a specific prompt
906    pub async fn task(&mut self, task_description: &str) -> Result<AgentResponse> {
907        let prompt = format!(
908            r#"I need you to complete the following coding task:
909
910{}
911
912The workspace is at: {}
913
914Please:
9151. First explore the codebase to understand the relevant code
9162. Make the necessary changes
9173. Verify the changes compile with `cargo check`
9184. Run relevant tests if applicable
919
920Explain your changes as you go."#,
921            task_description,
922            self.workspace_root.display()
923        );
924
925        self.execute(&prompt).await
926    }
927
928    /// Generate a commit message for current changes
929    pub async fn generate_commit_message(&mut self) -> Result<String> {
930        let prompt = r#"Please:
9311. Run `git status` to see what files are changed
9322. Run `git diff --cached` to see staged changes (or `git diff` for unstaged)
9333. Generate a concise, descriptive commit message following conventional commits format
934
935Only output the suggested commit message, nothing else."#;
936
937        let response = self.execute(prompt).await?;
938        Ok(response.content)
939    }
940}
941
942#[cfg(test)]
943mod tests {
944    use super::*;
945
946    #[test]
947    fn test_message_serialization() {
948        let msg = Message {
949            role: Role::User,
950            content: "Hello".to_string(),
951            tool_calls: vec![],
952            tool_result: None,
953        };
954
955        let json = serde_json::to_string(&msg).expect("Serialization failed");
956        assert!(json.contains("user"));
957        assert!(json.contains("Hello"));
958    }
959
960    #[test]
961    fn test_tool_call_request() {
962        let tc = ToolCallRequest {
963            id: "123".to_string(),
964            name: "read_file".to_string(),
965            arguments: json!({"path": "test.txt"}),
966        };
967
968        let json = serde_json::to_string(&tc).expect("Serialization failed");
969        assert!(json.contains("read_file"));
970        assert!(json.contains("test.txt"));
971    }
972}
pawan/agent/mod.rs

pawan/agent/
mod.rs