rustant_core/
agent.rs

1//! Agent orchestrator implementing the Think → Act → Observe event loop.
2//!
3//! The `Agent` struct ties together the Brain, ToolRegistry, Memory, and Safety
4//! Guardian to autonomously execute tasks through LLM-powered reasoning.
5
6use crate::brain::{Brain, LlmProvider};
7use crate::config::{AgentConfig, MessagePriority};
8use crate::error::{AgentError, LlmError, RustantError, ToolError};
9use crate::explanation::{DecisionExplanation, DecisionType, ExplanationBuilder, FactorInfluence};
10use crate::memory::MemorySystem;
11use crate::safety::{
12    ActionDetails, ActionRequest, ApprovalContext, ApprovalDecision, ContractCheckResult,
13    PermissionResult, ReversibilityInfo, SafetyGuardian,
14};
15use crate::scheduler::{CronScheduler, HeartbeatManager, JobManager};
16use crate::summarizer::ContextSummarizer;
17use crate::types::{
18    AgentState, AgentStatus, CompletionResponse, Content, CostEstimate, Message, ProgressUpdate,
19    RiskLevel, Role, StreamEvent, TaskClassification, TokenUsage, ToolDefinition, ToolOutput,
20};
21use std::collections::HashMap;
22use std::sync::Arc;
23use std::time::Instant;
24use tokio::sync::{mpsc, oneshot};
25use tokio_util::sync::CancellationToken;
26use tracing::{debug, info, warn};
27use uuid::Uuid;
28
29/// Truncate a string to at most `max_chars` characters, respecting UTF-8 boundaries.
30fn truncate_str(s: &str, max_chars: usize) -> &str {
31    match s.char_indices().nth(max_chars) {
32        Some((idx, _)) => &s[..idx],
33        None => s,
34    }
35}
36
37/// Messages sent to the agent loop via the handle.
38pub enum AgentMessage {
39    ProcessTask {
40        task: String,
41        reply: oneshot::Sender<TaskResult>,
42    },
43    Cancel {
44        task_id: Uuid,
45    },
46    GetStatus {
47        reply: oneshot::Sender<AgentStatus>,
48    },
49    Shutdown,
50}
51
52/// The result of a completed task.
53#[derive(Debug, Clone)]
54pub struct TaskResult {
55    pub task_id: Uuid,
56    pub success: bool,
57    pub response: String,
58    pub iterations: usize,
59    pub total_usage: TokenUsage,
60    pub total_cost: CostEstimate,
61}
62
63/// Severity of a budget warning or exceeded condition.
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum BudgetSeverity {
66    /// Budget usage is approaching the limit.
67    Warning,
68    /// Budget limit has been exceeded.
69    Exceeded,
70}
71
72/// Event emitted for context window health monitoring.
73#[derive(Debug, Clone)]
74pub enum ContextHealthEvent {
75    /// Context usage is approaching the limit (>= 70%).
76    Warning {
77        usage_percent: u8,
78        total_tokens: usize,
79        context_window: usize,
80        /// Actionable hint for the user (e.g. "Use /compact to compress context").
81        hint: String,
82    },
83    /// Context usage is critical (>= 90%).
84    Critical {
85        usage_percent: u8,
86        total_tokens: usize,
87        context_window: usize,
88        /// Actionable hint for the user.
89        hint: String,
90    },
91    /// Context compression just occurred.
92    Compressed {
93        messages_compressed: usize,
94        was_llm_summarized: bool,
95        pinned_preserved: usize,
96    },
97}
98
99/// Callback trait for user interaction (approval, display).
100#[async_trait::async_trait]
101pub trait AgentCallback: Send + Sync {
102    /// Display a message from the assistant to the user.
103    async fn on_assistant_message(&self, message: &str);
104
105    /// Display a streaming token from the assistant.
106    async fn on_token(&self, token: &str);
107
108    /// Request approval for an action. Returns the user's decision.
109    async fn request_approval(&self, action: &ActionRequest) -> ApprovalDecision;
110
111    /// Notify about a tool execution.
112    async fn on_tool_start(&self, tool_name: &str, args: &serde_json::Value);
113
114    /// Notify about a tool result.
115    async fn on_tool_result(&self, tool_name: &str, output: &ToolOutput, duration_ms: u64);
116
117    /// Notify about agent status changes.
118    async fn on_status_change(&self, status: AgentStatus);
119
120    /// Notify about token usage and cost after each LLM call.
121    async fn on_usage_update(&self, usage: &TokenUsage, cost: &CostEstimate);
122
123    /// Notify about a decision explanation for a tool selection.
124    async fn on_decision_explanation(&self, explanation: &DecisionExplanation);
125
126    /// Notify about a budget warning or exceeded condition.
127    /// Default is a no-op for backward compatibility.
128    async fn on_budget_warning(&self, _message: &str, _severity: BudgetSeverity) {}
129
130    /// Notify about progress during tool execution (streaming output, file operations, etc.).
131    /// Default is a no-op for backward compatibility.
132    async fn on_progress(&self, _progress: &ProgressUpdate) {}
133
134    /// Request clarification from the user. Returns the user's answer.
135    /// Called when the agent needs more information to proceed.
136    /// Default returns empty string for backward compatibility.
137    async fn on_clarification_request(&self, _question: &str) -> String {
138        String::new()
139    }
140
141    /// Called at the start of each ReAct loop iteration with the current iteration
142    /// number and the configured maximum. Used by the TUI sidebar to show live progress.
143    /// Default is a no-op for backward compatibility.
144    async fn on_iteration_start(&self, _iteration: usize, _max_iterations: usize) {}
145
146    /// Called before an LLM call with estimated token count and cost.
147    /// Only called when estimated cost exceeds $0.05 to avoid noise.
148    /// Default is a no-op for backward compatibility.
149    async fn on_cost_prediction(&self, _estimated_tokens: usize, _estimated_cost: f64) {}
150
151    /// Notify about context window health changes (warnings, compression events).
152    /// Default is a no-op for backward compatibility.
153    async fn on_context_health(&self, _event: &ContextHealthEvent) {}
154
155    /// A channel digest has been generated and is ready for review.
156    /// Called when the digest system completes a summary for the configured period.
157    /// Default is a no-op for backward compatibility.
158    async fn on_channel_digest(&self, _digest: &serde_json::Value) {}
159
160    /// A message on a channel needs immediate user attention (escalation).
161    ///
162    /// Called when the intelligence layer classifies a message at or above the
163    /// escalation threshold. Uses `&str` parameters rather than `ClassifiedMessage`
164    /// to keep the callback trait decoupled from the classification system — callers
165    /// can format the alert data however they choose.
166    ///
167    /// Default is a no-op for backward compatibility.
168    async fn on_channel_alert(&self, _channel: &str, _sender: &str, _summary: &str) {}
169
170    /// A scheduled follow-up reminder has been triggered.
171    /// Called when a cron-scheduled reminder fires for a previously classified
172    /// message that requires follow-up.
173    /// Default is a no-op for backward compatibility.
174    async fn on_reminder(&self, _reminder: &serde_json::Value) {}
175
176    // --- Plan mode callbacks ---
177
178    /// Called when plan generation starts.
179    /// Default is a no-op for backward compatibility.
180    async fn on_plan_generating(&self, _goal: &str) {}
181
182    /// Called when a plan is ready for user review.
183    /// Returns the user's decision on the plan.
184    /// Default auto-approves for backward compatibility.
185    async fn on_plan_review(
186        &self,
187        _plan: &crate::plan::ExecutionPlan,
188    ) -> crate::plan::PlanDecision {
189        crate::plan::PlanDecision::Approve
190    }
191
192    /// Called when a plan step starts executing.
193    /// Default is a no-op for backward compatibility.
194    async fn on_plan_step_start(&self, _step_index: usize, _step: &crate::plan::PlanStep) {}
195
196    /// Called when a plan step finishes (success or failure).
197    /// Default is a no-op for backward compatibility.
198    async fn on_plan_step_complete(&self, _step_index: usize, _step: &crate::plan::PlanStep) {}
199}
200
201/// A tool executor function type. The agent holds tool executors and their definitions.
202pub type ToolExecutor = Box<
203    dyn Fn(
204            serde_json::Value,
205        ) -> std::pin::Pin<
206            Box<dyn std::future::Future<Output = Result<ToolOutput, ToolError>> + Send>,
207        > + Send
208        + Sync,
209>;
210
211/// A registered tool with its definition and executor.
212pub struct RegisteredTool {
213    pub definition: ToolDefinition,
214    pub risk_level: RiskLevel,
215    pub executor: ToolExecutor,
216}
217
218/// The Agent orchestrator running the Think → Act → Observe loop.
219pub struct Agent {
220    brain: Brain,
221    memory: MemorySystem,
222    safety: SafetyGuardian,
223    tools: HashMap<String, RegisteredTool>,
224    state: AgentState,
225    #[allow(dead_code)]
226    config: AgentConfig,
227    cancellation: CancellationToken,
228    callback: Arc<dyn AgentCallback>,
229    /// LLM-based context summarizer for intelligent compression.
230    summarizer: ContextSummarizer,
231    /// Token budget manager for cost control.
232    budget: crate::brain::TokenBudgetManager,
233    /// Cross-session knowledge distiller for learning from corrections/facts.
234    knowledge: crate::memory::KnowledgeDistiller,
235    /// Per-tool token usage tracking for budget breakdown.
236    tool_token_usage: HashMap<String, usize>,
237    /// Optional cron scheduler for time-based task triggers.
238    cron_scheduler: Option<CronScheduler>,
239    /// Optional heartbeat manager for periodic task triggers.
240    heartbeat_manager: Option<HeartbeatManager>,
241    /// Background job manager for long-running tasks.
242    job_manager: JobManager,
243    /// Consecutive failure tracker: (tool_name, failure_count).
244    /// Resets when a different tool succeeds or a different tool is called.
245    consecutive_failures: (String, usize),
246    /// Recent decision explanations for transparency (capped at 50).
247    recent_explanations: Vec<DecisionExplanation>,
248    /// Whether plan mode is active (generate plan before executing).
249    plan_mode: bool,
250    /// The current plan being generated, reviewed, or executed.
251    current_plan: Option<crate::plan::ExecutionPlan>,
252}
253
254impl Agent {
255    pub fn new(
256        provider: Arc<dyn LlmProvider>,
257        config: AgentConfig,
258        callback: Arc<dyn AgentCallback>,
259    ) -> Self {
260        let summarizer = ContextSummarizer::new(Arc::clone(&provider));
261        let brain = Brain::new(provider, crate::brain::DEFAULT_SYSTEM_PROMPT);
262        let memory = MemorySystem::new(config.memory.window_size);
263        let safety = SafetyGuardian::new(config.safety.clone());
264        let max_iter = config.safety.max_iterations;
265        let budget = crate::brain::TokenBudgetManager::new(config.budget.as_ref());
266        let knowledge = crate::memory::KnowledgeDistiller::new(config.knowledge.as_ref());
267
268        let cron_scheduler = config.scheduler.as_ref().and_then(|sc| {
269            if sc.enabled {
270                let mut scheduler = CronScheduler::new();
271                for job_config in &sc.cron_jobs {
272                    if let Err(e) = scheduler.add_job(job_config.clone()) {
273                        warn!("Failed to add cron job '{}': {}", job_config.name, e);
274                    }
275                }
276                Some(scheduler)
277            } else {
278                None
279            }
280        });
281        let heartbeat_manager = config.scheduler.as_ref().and_then(|sc| {
282            sc.heartbeat
283                .as_ref()
284                .map(|hb| HeartbeatManager::new(hb.clone()))
285        });
286        let max_bg_jobs = config
287            .scheduler
288            .as_ref()
289            .map(|sc| sc.max_background_jobs)
290            .unwrap_or(10);
291        let job_manager = JobManager::new(max_bg_jobs);
292        let plan_mode_enabled = config.plan.as_ref().map(|p| p.enabled).unwrap_or(false);
293
294        Self {
295            brain,
296            memory,
297            safety,
298            tools: HashMap::new(),
299            state: AgentState::new(max_iter),
300            config,
301            cancellation: CancellationToken::new(),
302            callback,
303            summarizer,
304            budget,
305            knowledge,
306            tool_token_usage: HashMap::new(),
307            cron_scheduler,
308            heartbeat_manager,
309            job_manager,
310            consecutive_failures: (String::new(), 0),
311            recent_explanations: Vec::new(),
312            plan_mode: plan_mode_enabled,
313            current_plan: None,
314        }
315    }
316
317    /// Register a tool with the agent.
318    pub fn register_tool(&mut self, tool: RegisteredTool) {
319        self.tools.insert(tool.definition.name.clone(), tool);
320    }
321
322    /// Get tool definitions for the LLM.
323    pub fn tool_definitions(&self) -> Vec<ToolDefinition> {
324        let mut defs: Vec<ToolDefinition> =
325            self.tools.values().map(|t| t.definition.clone()).collect();
326
327        // Add the ask_user pseudo-tool so the LLM knows it can ask clarifying questions.
328        defs.push(ToolDefinition {
329            name: "ask_user".to_string(),
330            description: "Ask the user a clarifying question when you need more information to proceed. Use this when the task is ambiguous or you need to confirm something before taking action.".to_string(),
331            parameters: serde_json::json!({
332                "type": "object",
333                "properties": {
334                    "question": {
335                        "type": "string",
336                        "description": "The question to ask the user"
337                    }
338                },
339                "required": ["question"]
340            }),
341        });
342
343        defs
344    }
345
346    /// Process a user task through the agent loop.
347    pub async fn process_task(&mut self, task: &str) -> Result<TaskResult, RustantError> {
348        // Plan mode: generate and review plan before executing
349        if self.plan_mode {
350            return self.process_task_with_plan(task).await;
351        }
352
353        let task_id = Uuid::new_v4();
354        info!(task_id = %task_id, task = task, "Starting task processing");
355
356        self.state.start_task(task);
357        self.state.task_id = Some(task_id);
358        self.memory.start_new_task(task);
359        self.budget.reset_task();
360        self.tool_token_usage.clear();
361
362        // Run knowledge distillation from long-term memory and inject into brain
363        self.knowledge.distill(&self.memory.long_term);
364        let mut knowledge_addendum = self.knowledge.rules_for_prompt();
365
366        // Inject a tool-routing hint based on the cached task classification.
367        // Appended to the knowledge addendum (system prompt) instead of persisted
368        // in memory, so it never gets displaced by compression and can't end up
369        // between tool_call and tool_result messages.
370        if let Some(ref classification) = self.state.task_classification {
371            if let Some(hint) = Self::tool_routing_hint_from_classification(classification) {
372                knowledge_addendum.push_str("\n\n");
373                knowledge_addendum.push_str(&hint);
374            }
375        }
376        self.brain.set_knowledge_addendum(knowledge_addendum);
377
378        self.memory.add_message(Message::user(task));
379
380        self.callback.on_status_change(AgentStatus::Thinking).await;
381
382        let mut final_response = String::new();
383
384        loop {
385            // Check cancellation
386            if self.cancellation.is_cancelled() {
387                self.state.set_error();
388                return Err(RustantError::Agent(AgentError::Cancelled));
389            }
390
391            // Check iteration limit
392            if !self.state.increment_iteration() {
393                warn!(
394                    task_id = %task_id,
395                    iterations = self.state.iteration,
396                    "Maximum iterations reached"
397                );
398                self.state.set_error();
399                return Err(RustantError::Agent(AgentError::MaxIterationsReached {
400                    max: self.state.max_iterations,
401                }));
402            }
403
404            debug!(
405                task_id = %task_id,
406                iteration = self.state.iteration,
407                "Agent loop iteration"
408            );
409
410            // Notify about iteration progress for live UI updates
411            self.callback
412                .on_iteration_start(self.state.iteration, self.state.max_iterations)
413                .await;
414
415            // --- THINK ---
416            self.state.status = AgentStatus::Thinking;
417            self.callback.on_status_change(AgentStatus::Thinking).await;
418
419            let conversation = self.memory.context_messages();
420            let tools = Some(self.tool_definitions());
421
422            // Context health check before LLM call
423            {
424                let context_window = self.brain.provider().context_window();
425                let breakdown = self.memory.context_breakdown(context_window);
426                let usage_percent = (breakdown.usage_ratio() * 100.0) as u8;
427                if usage_percent >= 90 {
428                    self.callback
429                        .on_context_health(&ContextHealthEvent::Critical {
430                            usage_percent,
431                            total_tokens: breakdown.total_tokens,
432                            context_window: breakdown.context_window,
433                            hint: "Context nearly full — auto-compression imminent. Use /pin to protect important messages.".to_string(),
434                        })
435                        .await;
436                } else if usage_percent >= 70 {
437                    self.callback
438                        .on_context_health(&ContextHealthEvent::Warning {
439                            usage_percent,
440                            total_tokens: breakdown.total_tokens,
441                            context_window: breakdown.context_window,
442                            hint: "Context filling up. Use /compact to compress now, or /pin to protect key messages.".to_string(),
443                        })
444                        .await;
445                }
446            }
447
448            // Pre-call budget check
449            let estimated_tokens = self.brain.estimate_tokens(&conversation);
450            let (input_rate, output_rate) = self.brain.provider_cost_rates();
451            let budget_result = self
452                .budget
453                .check_budget(estimated_tokens, input_rate, output_rate);
454            match &budget_result {
455                crate::brain::BudgetCheckResult::Exceeded { message } => {
456                    let top = self.top_tool_consumers(3);
457                    let enriched = if top.is_empty() {
458                        message.clone()
459                    } else {
460                        format!("{}. Top consumers: {}", message, top)
461                    };
462                    self.callback
463                        .on_budget_warning(&enriched, BudgetSeverity::Exceeded)
464                        .await;
465                    if self.budget.should_halt_on_exceed() {
466                        warn!("Budget exceeded, halting: {}", enriched);
467                        return Err(RustantError::Agent(AgentError::BudgetExceeded {
468                            message: enriched,
469                        }));
470                    }
471                    warn!("Budget warning (soft limit): {}", enriched);
472                }
473                crate::brain::BudgetCheckResult::Warning { message, .. } => {
474                    let top = self.top_tool_consumers(3);
475                    let enriched = if top.is_empty() {
476                        message.clone()
477                    } else {
478                        format!("{}. Top consumers: {}", message, top)
479                    };
480                    self.callback
481                        .on_budget_warning(&enriched, BudgetSeverity::Warning)
482                        .await;
483                    debug!("Budget warning: {}", enriched);
484                }
485                crate::brain::BudgetCheckResult::Ok => {}
486            }
487
488            // Cost prediction before LLM call
489            {
490                let est_tokens = estimated_tokens + 500; // +500 for expected response
491                let est_cost = est_tokens as f64 * input_rate;
492                if est_cost > 0.05 {
493                    self.callback.on_cost_prediction(est_tokens, est_cost).await;
494                }
495            }
496
497            let response = if self.config.llm.use_streaming {
498                self.think_streaming(&conversation, tools).await?
499            } else {
500                self.brain.think_with_retry(&conversation, tools, 3).await?
501            };
502
503            // Record usage in budget manager and emit live update
504            self.budget.record_usage(
505                &response.usage,
506                &CostEstimate {
507                    input_cost: response.usage.input_tokens as f64 * input_rate,
508                    output_cost: response.usage.output_tokens as f64 * output_rate,
509                },
510            );
511            self.callback
512                .on_usage_update(self.brain.total_usage(), self.brain.total_cost())
513                .await;
514
515            // --- DECIDE ---
516            self.state.status = AgentStatus::Deciding;
517            match &response.message.content {
518                Content::Text { text } => {
519                    // LLM produced a text response — task may be complete
520                    info!(task_id = %task_id, "Agent produced text response");
521                    self.callback.on_assistant_message(text).await;
522                    self.memory.add_message(response.message.clone());
523                    final_response = text.clone();
524                    // Text response means the agent is done thinking
525                    break;
526                }
527                Content::ToolCall {
528                    id,
529                    name,
530                    arguments,
531                } => {
532                    // LLM wants to call a tool
533                    info!(
534                        task_id = %task_id,
535                        tool = name,
536                        "Agent requesting tool execution"
537                    );
538                    self.memory.add_message(response.message.clone());
539
540                    // Build and emit decision explanation
541                    let explanation = self.build_decision_explanation(name, arguments);
542                    self.callback.on_decision_explanation(&explanation).await;
543                    self.record_explanation(explanation);
544
545                    // --- Tool call auto-correction ---
546                    // When the LLM calls a generic tool (document_read, file_read,
547                    // shell_exec) but the task clearly maps to a specific macOS tool,
548                    // reroute immediately. This is necessary because some LLMs
549                    // (notably gpt-4o) stubbornly call the wrong tool even with
550                    // explicit system prompt instructions.
551                    let (actual_name, actual_args) = if let Some((corrected_name, corrected_args)) =
552                        Self::auto_correct_tool_call(name, arguments, &self.state)
553                    {
554                        if corrected_name != *name {
555                            info!(
556                                original_tool = name,
557                                corrected_tool = corrected_name,
558                                "Auto-routing to correct macOS tool"
559                            );
560                            self.callback
561                                .on_assistant_message(&format!(
562                                    "[Routed: {} → {}]",
563                                    name, corrected_name
564                                ))
565                                .await;
566                            (corrected_name, corrected_args)
567                        } else {
568                            (name.to_string(), arguments.clone())
569                        }
570                    } else {
571                        (name.to_string(), arguments.clone())
572                    };
573
574                    // --- ACT ---
575                    let result = self.execute_tool(id, &actual_name, &actual_args).await;
576                    if let Err(ref e) = result {
577                        debug!(tool = %actual_name, error = %e, "Tool execution failed");
578                    }
579
580                    // --- OBSERVE ---
581                    let result_tokens = match &result {
582                        Ok(output) => {
583                            let result_msg = Message::tool_result(id, &output.content, false);
584                            let tokens = output.content.len() / 4; // rough estimate
585                            self.memory.add_message(result_msg);
586                            tokens
587                        }
588                        Err(e) => {
589                            let error_msg = format!("Tool error: {}", e);
590                            let tokens = error_msg.len() / 4;
591                            let result_msg = Message::tool_result(id, &error_msg, true);
592                            self.memory.add_message(result_msg);
593                            tokens
594                        }
595                    };
596                    *self.tool_token_usage.entry(name.to_string()).or_insert(0) += result_tokens;
597
598                    // Track consecutive failures for circuit breaker
599                    if result.is_err() {
600                        if self.consecutive_failures.0 == *name {
601                            self.consecutive_failures.1 += 1;
602                        } else {
603                            self.consecutive_failures = (name.to_string(), 1);
604                        }
605                    } else {
606                        self.consecutive_failures = (String::new(), 0);
607                    }
608
609                    // Check context compression
610                    self.check_and_compress().await;
611
612                    // Continue loop — agent needs to observe and think again
613                }
614                Content::MultiPart { parts } => {
615                    // Handle multi-part responses (text + tool calls)
616                    self.memory.add_message(response.message.clone());
617
618                    let mut has_tool_call = false;
619                    for part in parts {
620                        match part {
621                            Content::Text { text } => {
622                                self.callback.on_assistant_message(text).await;
623                                final_response = text.clone();
624                            }
625                            Content::ToolCall {
626                                id,
627                                name,
628                                arguments,
629                            } => {
630                                has_tool_call = true;
631
632                                // Build and emit decision explanation (same as single ToolCall path)
633                                let explanation = self.build_decision_explanation(name, arguments);
634                                self.callback.on_decision_explanation(&explanation).await;
635                                self.record_explanation(explanation);
636
637                                // Auto-routing (same as single ToolCall path)
638                                let (actual_name, actual_args) = if let Some((cn, ca)) =
639                                    Self::auto_correct_tool_call(name, arguments, &self.state)
640                                {
641                                    if cn != *name {
642                                        info!(
643                                            original_tool = name,
644                                            corrected_tool = cn,
645                                            "Auto-routing to correct macOS tool (multipart)"
646                                        );
647                                        self.callback
648                                            .on_assistant_message(&format!(
649                                                "[Routed: {} → {}]",
650                                                name, cn
651                                            ))
652                                            .await;
653                                        (cn, ca)
654                                    } else {
655                                        (name.to_string(), arguments.clone())
656                                    }
657                                } else {
658                                    (name.to_string(), arguments.clone())
659                                };
660
661                                let result =
662                                    self.execute_tool(id, &actual_name, &actual_args).await;
663                                let result_tokens = match &result {
664                                    Ok(output) => {
665                                        let msg = Message::tool_result(id, &output.content, false);
666                                        let tokens = output.content.len() / 4;
667                                        self.memory.add_message(msg);
668                                        tokens
669                                    }
670                                    Err(e) => {
671                                        let error_msg = format!("Tool error: {}", e);
672                                        let tokens = error_msg.len() / 4;
673                                        let msg = Message::tool_result(id, &error_msg, true);
674                                        self.memory.add_message(msg);
675                                        tokens
676                                    }
677                                };
678
679                                // Track failures and token usage
680                                if result.is_err() {
681                                    if self.consecutive_failures.0 == *name {
682                                        self.consecutive_failures.1 += 1;
683                                    } else {
684                                        self.consecutive_failures = (name.to_string(), 1);
685                                    }
686                                } else {
687                                    self.consecutive_failures = (String::new(), 0);
688                                }
689                                *self.tool_token_usage.entry(name.to_string()).or_insert(0) +=
690                                    result_tokens;
691                            }
692                            _ => {}
693                        }
694                    }
695
696                    if !has_tool_call {
697                        break; // Only text, we're done
698                    }
699
700                    // Check context compression after multipart tool calls
701                    self.check_and_compress().await;
702
703                    // Continue loop — agent needs to observe and think again
704                }
705                Content::ToolResult { .. } => {
706                    // Shouldn't happen from LLM directly, but handle gracefully
707                    warn!("Received unexpected ToolResult from LLM");
708                    break;
709                }
710            }
711        }
712
713        self.state.complete();
714        self.callback.on_status_change(AgentStatus::Complete).await;
715
716        info!(
717            task_id = %task_id,
718            iterations = self.state.iteration,
719            total_tokens = self.brain.total_usage().total(),
720            total_cost = format!("${:.4}", self.brain.total_cost().total()),
721            "Task completed"
722        );
723
724        Ok(TaskResult {
725            task_id,
726            success: true,
727            response: final_response,
728            iterations: self.state.iteration,
729            total_usage: *self.brain.total_usage(),
730            total_cost: *self.brain.total_cost(),
731        })
732    }
733
734    /// Perform a streaming think operation, sending tokens to the callback as they arrive.
735    /// Returns a CompletionResponse equivalent to the non-streaming path.
736    /// Includes retry logic with exponential backoff for transient errors
737    /// (rate limits, timeouts, connection failures).
738    async fn think_streaming(
739        &mut self,
740        conversation: &[Message],
741        tools: Option<Vec<ToolDefinition>>,
742    ) -> Result<CompletionResponse, LlmError> {
743        const MAX_RETRIES: usize = 3;
744        let mut last_error: Option<LlmError> = None;
745
746        for attempt in 0..=MAX_RETRIES {
747            match self.think_streaming_once(conversation, tools.clone()).await {
748                Ok(response) => return Ok(response),
749                Err(e) if Self::is_streaming_retryable(&e) => {
750                    if attempt < MAX_RETRIES {
751                        let backoff_secs = std::cmp::min(1u64 << attempt, 32);
752                        let wait = match &e {
753                            LlmError::RateLimited { retry_after_secs } => {
754                                std::cmp::max(*retry_after_secs, backoff_secs)
755                            }
756                            _ => backoff_secs,
757                        };
758                        info!(
759                            attempt = attempt + 1,
760                            max_retries = MAX_RETRIES,
761                            backoff_secs = wait,
762                            error = %e,
763                            "Retrying streaming after transient error"
764                        );
765                        self.callback
766                            .on_token(&format!("\n[Retrying in {}s due to: {}]\n", wait, e))
767                            .await;
768                        tokio::time::sleep(std::time::Duration::from_secs(wait)).await;
769                        last_error = Some(e);
770                    } else {
771                        return Err(e);
772                    }
773                }
774                Err(e) => return Err(e),
775            }
776        }
777
778        Err(last_error.unwrap_or(LlmError::Connection {
779            message: "Max streaming retries exceeded".to_string(),
780        }))
781    }
782
783    /// Check if a streaming error is transient and should be retried.
784    fn is_streaming_retryable(error: &LlmError) -> bool {
785        if Brain::is_retryable(error) {
786            return true;
787        }
788        // Streaming errors may wrap retryable conditions as strings
789        if let LlmError::Streaming { message } = error {
790            let msg = message.to_lowercase();
791            return msg.contains("rate limit")
792                || msg.contains("429")
793                || msg.contains("timeout")
794                || msg.contains("timed out")
795                || msg.contains("connection")
796                || msg.contains("temporarily unavailable")
797                || msg.contains("503")
798                || msg.contains("502");
799        }
800        false
801    }
802
803    /// Single attempt at streaming think — extracted for retry wrapping.
804    async fn think_streaming_once(
805        &mut self,
806        conversation: &[Message],
807        tools: Option<Vec<ToolDefinition>>,
808    ) -> Result<CompletionResponse, LlmError> {
809        let (tx, mut rx) = mpsc::channel(64);
810
811        // Build messages and request manually to avoid double borrow
812        let messages = self.brain.build_messages(conversation);
813        let token_estimate = self.brain.provider().estimate_tokens(&messages);
814        let context_limit = self.brain.provider().context_window();
815
816        if token_estimate > context_limit {
817            return Err(LlmError::ContextOverflow {
818                used: token_estimate,
819                limit: context_limit,
820            });
821        }
822
823        let request = crate::types::CompletionRequest {
824            messages,
825            tools,
826            temperature: 0.7,
827            max_tokens: None,
828            stop_sequences: Vec::new(),
829            model: None,
830        };
831
832        // Run the streaming completion in a background task so the producer
833        // (complete_streaming) and consumer (rx.recv loop) run concurrently.
834        // Without this, awaiting complete_streaming drops the tx sender before
835        // the consumer reads any events, resulting in empty text.
836        let provider = self.brain.provider_arc();
837        let producer = tokio::spawn(async move { provider.complete_streaming(request, tx).await });
838
839        // Consume events from the channel concurrently with the producer
840        let mut text_parts = String::new();
841        let mut usage = TokenUsage::default();
842        // Track streaming tool calls: id -> (name, accumulated_arguments)
843        let mut tool_calls: std::collections::HashMap<String, (String, String)> =
844            std::collections::HashMap::new();
845        let mut tool_call_order: Vec<String> = Vec::new(); // preserve order
846                                                           // Raw provider-specific function call data (e.g., Gemini thought_signature)
847        let mut raw_function_calls: std::collections::HashMap<String, serde_json::Value> =
848            std::collections::HashMap::new();
849
850        while let Some(event) = rx.recv().await {
851            match event {
852                StreamEvent::Token(token) => {
853                    self.callback.on_token(&token).await;
854                    text_parts.push_str(&token);
855                }
856                StreamEvent::ToolCallStart {
857                    id,
858                    name,
859                    raw_function_call,
860                } => {
861                    tool_call_order.push(id.clone());
862                    tool_calls.insert(id.clone(), (name, String::new()));
863                    if let Some(raw_fc) = raw_function_call {
864                        raw_function_calls.insert(id, raw_fc);
865                    }
866                }
867                StreamEvent::ToolCallDelta {
868                    id,
869                    arguments_delta,
870                } => {
871                    if let Some((_, ref mut args)) = tool_calls.get_mut(&id) {
872                        args.push_str(&arguments_delta);
873                    }
874                }
875                StreamEvent::ToolCallEnd { id: _ } => {
876                    // Tool call complete — arguments are now fully accumulated
877                }
878                StreamEvent::Done { usage: u } => {
879                    usage = u;
880                    break;
881                }
882                StreamEvent::Error(e) => {
883                    return Err(LlmError::Streaming { message: e });
884                }
885            }
886        }
887
888        // Wait for the producer to finish and propagate errors
889        producer.await.map_err(|e| LlmError::Streaming {
890            message: format!("Streaming task panicked: {}", e),
891        })??;
892
893        // Track usage in brain
894        self.brain.track_usage(&usage);
895
896        // Build raw provider-specific parts (e.g., Gemini thought_signature) BEFORE
897        // consuming text_parts, since we need to reference it for the raw parts array.
898        let raw_parts_metadata = if !raw_function_calls.is_empty() {
899            let mut raw_parts = Vec::new();
900            if !text_parts.is_empty() {
901                raw_parts.push(serde_json::json!({"text": &text_parts}));
902            }
903            for id in &tool_call_order {
904                if let Some(raw_fc) = raw_function_calls.get(id) {
905                    raw_parts.push(raw_fc.clone());
906                }
907            }
908            Some(serde_json::Value::Array(raw_parts))
909        } else {
910            None
911        };
912
913        // Build the response content based on what was streamed
914        let content = if !tool_call_order.is_empty() {
915            // Use the first tool call (single tool call is most common)
916            let first_id = &tool_call_order[0];
917            if let Some((name, args_str)) = tool_calls.get(first_id) {
918                let arguments: serde_json::Value =
919                    serde_json::from_str(args_str).unwrap_or(serde_json::json!({}));
920                if text_parts.is_empty() {
921                    Content::tool_call(first_id, name, arguments)
922                } else {
923                    Content::MultiPart {
924                        parts: vec![
925                            Content::text(&text_parts),
926                            Content::tool_call(first_id, name, arguments),
927                        ],
928                    }
929                }
930            } else {
931                Content::text(text_parts)
932            }
933        } else {
934            Content::text(text_parts)
935        };
936        let finish_reason = if tool_call_order.is_empty() {
937            "stop"
938        } else {
939            "tool_calls"
940        };
941
942        let mut message = Message::new(Role::Assistant, content);
943
944        // Attach raw provider-specific function call data (e.g., Gemini thought_signature)
945        // so the provider can echo it back in subsequent requests.
946        if let Some(raw_parts) = raw_parts_metadata {
947            message = message.with_metadata("gemini_raw_parts", raw_parts);
948        }
949
950        Ok(CompletionResponse {
951            message,
952            usage,
953            model: self.brain.model_name().to_string(),
954            finish_reason: Some(finish_reason.to_string()),
955        })
956    }
957
958    /// Execute a tool with safety checks.
959    async fn execute_tool(
960        &mut self,
961        _call_id: &str,
962        tool_name: &str,
963        arguments: &serde_json::Value,
964    ) -> Result<ToolOutput, ToolError> {
965        // Handle ask_user pseudo-tool before regular tool lookup.
966        // This bypasses safety checks since it's read-only user interaction.
967        if tool_name == "ask_user" {
968            self.state.status = AgentStatus::WaitingForClarification;
969            self.callback
970                .on_status_change(AgentStatus::WaitingForClarification)
971                .await;
972            let question = arguments
973                .get("question")
974                .and_then(|v| v.as_str())
975                .unwrap_or("Can you provide more details?");
976            let answer = self.callback.on_clarification_request(question).await;
977            self.state.status = AgentStatus::Executing;
978            self.callback.on_status_change(AgentStatus::Executing).await;
979            return Ok(ToolOutput::text(answer));
980        }
981
982        // Look up the tool
983        let tool = self
984            .tools
985            .get(tool_name)
986            .ok_or_else(|| ToolError::NotFound {
987                name: tool_name.to_string(),
988            })?;
989
990        // Build rich approval context from action details
991        let details = Self::parse_action_details(tool_name, arguments);
992        let approval_context = Self::build_approval_context(tool_name, &details, tool.risk_level);
993
994        // Build action request with rich context
995        let action = SafetyGuardian::create_rich_action_request(
996            tool_name,
997            tool.risk_level,
998            format!("Execute tool: {}", tool_name),
999            details,
1000            approval_context,
1001        );
1002
1003        // Check permissions
1004        let perm = self.safety.check_permission(&action);
1005        match perm {
1006            PermissionResult::Allowed => {
1007                // Proceed
1008            }
1009            PermissionResult::Denied { reason } => {
1010                // Emit explanation for safety denial decision
1011                let mut builder = ExplanationBuilder::new(DecisionType::ErrorRecovery {
1012                    error: format!("Permission denied for tool '{}'", tool_name),
1013                    strategy: "Returning error to LLM for re-planning".to_string(),
1014                });
1015                builder.add_reasoning_step(format!("Denied: {}", reason), None);
1016                builder.set_confidence(1.0);
1017                let explanation = builder.build();
1018                self.callback.on_decision_explanation(&explanation).await;
1019                self.record_explanation(explanation);
1020
1021                return Err(ToolError::PermissionDenied {
1022                    name: tool_name.to_string(),
1023                    reason,
1024                });
1025            }
1026            PermissionResult::RequiresApproval { context: _ } => {
1027                self.state.status = AgentStatus::WaitingForApproval;
1028                self.callback
1029                    .on_status_change(AgentStatus::WaitingForApproval)
1030                    .await;
1031
1032                let decision = self.callback.request_approval(&action).await;
1033                let approved = decision != ApprovalDecision::Deny;
1034                self.safety.log_approval_decision(tool_name, approved);
1035
1036                match decision {
1037                    ApprovalDecision::Approve => {
1038                        // Single approval, proceed
1039                    }
1040                    ApprovalDecision::ApproveAllSimilar => {
1041                        // Add to session allowlist for future auto-approval
1042                        self.safety
1043                            .add_session_allowlist(tool_name.to_string(), tool.risk_level);
1044                        info!(
1045                            tool = tool_name,
1046                            risk = %tool.risk_level,
1047                            "Added tool to session allowlist (approve all similar)"
1048                        );
1049                    }
1050                    ApprovalDecision::Deny => {
1051                        // Emit explanation for user denial decision
1052                        let mut builder = ExplanationBuilder::new(DecisionType::ErrorRecovery {
1053                            error: format!("User denied approval for tool '{}'", tool_name),
1054                            strategy: "Returning error to LLM for re-planning".to_string(),
1055                        });
1056                        builder.add_reasoning_step(
1057                            "User rejected the action in approval dialog".to_string(),
1058                            None,
1059                        );
1060                        builder.set_confidence(1.0);
1061                        let explanation = builder.build();
1062                        self.callback.on_decision_explanation(&explanation).await;
1063                        self.record_explanation(explanation);
1064
1065                        // Record correction for cross-session learning:
1066                        // the agent's proposed action was rejected by the user.
1067                        self.memory.long_term.add_correction(
1068                            format!(
1069                                "Attempted tool '{}' with args: {}",
1070                                tool_name,
1071                                arguments.to_string().chars().take(200).collect::<String>()
1072                            ),
1073                            "User denied this action".to_string(),
1074                            format!(
1075                                "Tool '{}' denied by user; goal: {:?}",
1076                                tool_name, self.memory.working.current_goal
1077                            ),
1078                        );
1079
1080                        return Err(ToolError::PermissionDenied {
1081                            name: tool_name.to_string(),
1082                            reason: "User rejected the action".to_string(),
1083                        });
1084                    }
1085                }
1086            }
1087        }
1088
1089        // Check safety contract pre-conditions
1090        let tool_entry = self
1091            .tools
1092            .get(tool_name)
1093            .ok_or_else(|| ToolError::NotFound {
1094                name: tool_name.to_string(),
1095            })?;
1096        let risk_level = tool_entry.risk_level;
1097        let contract_result = self
1098            .safety
1099            .contract_enforcer_mut()
1100            .check_pre(tool_name, risk_level, arguments);
1101        if contract_result != ContractCheckResult::Satisfied {
1102            warn!(
1103                tool = tool_name,
1104                result = ?contract_result,
1105                "Safety contract violation (pre-check)"
1106            );
1107
1108            // Emit explanation for contract violation
1109            let mut builder = ExplanationBuilder::new(DecisionType::ErrorRecovery {
1110                error: format!("Contract violation: {:?}", contract_result),
1111                strategy: "Returning error to LLM for re-planning".to_string(),
1112            });
1113            builder.set_confidence(1.0);
1114            let explanation = builder.build();
1115            self.callback.on_decision_explanation(&explanation).await;
1116            self.record_explanation(explanation);
1117
1118            return Err(ToolError::PermissionDenied {
1119                name: tool_name.to_string(),
1120                reason: format!("Safety contract violation: {:?}", contract_result),
1121            });
1122        }
1123
1124        // Execute the tool
1125        self.state.status = AgentStatus::Executing;
1126        self.callback.on_status_change(AgentStatus::Executing).await;
1127        self.callback.on_tool_start(tool_name, arguments).await;
1128
1129        let start = Instant::now();
1130
1131        // Re-fetch the executor (borrow checker requires separate borrow from the one above)
1132        let executor = &self
1133            .tools
1134            .get(tool_name)
1135            .ok_or_else(|| ToolError::NotFound {
1136                name: tool_name.to_string(),
1137            })?
1138            .executor;
1139        let result = (executor)(arguments.clone()).await;
1140        let duration_ms = start.elapsed().as_millis() as u64;
1141
1142        // Record execution in contract enforcer
1143        self.safety
1144            .contract_enforcer_mut()
1145            .record_execution(risk_level, 0.0);
1146
1147        match &result {
1148            Ok(output) => {
1149                self.safety.log_execution(tool_name, true, duration_ms);
1150                self.safety
1151                    .record_behavioral_outcome(tool_name, risk_level, true);
1152                self.callback
1153                    .on_tool_result(tool_name, output, duration_ms)
1154                    .await;
1155
1156                // Record fact from successful tool execution for cross-session learning.
1157                // Only record non-trivial (>10 chars) and non-huge (<5000 chars) outputs
1158                // to avoid noise and memory bloat.
1159                if output.content.len() > 10 && output.content.len() < 5000 {
1160                    let summary = if output.content.chars().count() > 200 {
1161                        format!("{}...", truncate_str(&output.content, 200))
1162                    } else {
1163                        output.content.clone()
1164                    };
1165                    self.memory.long_term.add_fact(
1166                        crate::memory::Fact::new(
1167                            format!("Tool '{}' result: {}", tool_name, summary),
1168                            format!("tool:{}", tool_name),
1169                        )
1170                        .with_tags(vec!["tool_result".to_string(), tool_name.to_string()]),
1171                    );
1172                }
1173            }
1174            Err(e) => {
1175                self.safety.log_execution(tool_name, false, duration_ms);
1176                self.safety
1177                    .record_behavioral_outcome(tool_name, risk_level, false);
1178                let error_output = ToolOutput::error(e.to_string());
1179                self.callback
1180                    .on_tool_result(tool_name, &error_output, duration_ms)
1181                    .await;
1182            }
1183        }
1184
1185        result
1186    }
1187
1188    /// Record a decision explanation, capping at 50 entries.
1189    fn record_explanation(&mut self, explanation: DecisionExplanation) {
1190        if self.recent_explanations.len() >= 50 {
1191            self.recent_explanations.remove(0);
1192        }
1193        self.recent_explanations.push(explanation);
1194    }
1195
1196    /// Build rich approval context from action details, providing users with
1197    /// reasoning, consequences, and reversibility information.
1198    fn build_approval_context(
1199        tool_name: &str,
1200        details: &ActionDetails,
1201        risk_level: RiskLevel,
1202    ) -> ApprovalContext {
1203        let mut ctx = ApprovalContext::new();
1204
1205        // Derive consequences from action details
1206        match details {
1207            ActionDetails::FileWrite { path, size_bytes } => {
1208                ctx = ctx
1209                    .with_reasoning(format!(
1210                        "Writing {} bytes to {}",
1211                        size_bytes,
1212                        path.display()
1213                    ))
1214                    .with_consequence(format!(
1215                        "File '{}' will be created or overwritten",
1216                        path.display()
1217                    ))
1218                    .with_reversibility(ReversibilityInfo {
1219                        is_reversible: true,
1220                        undo_description: Some(
1221                            "Revert via git checkout or checkpoint restore".to_string(),
1222                        ),
1223                        undo_window: None,
1224                    });
1225            }
1226            ActionDetails::FileDelete { path } => {
1227                ctx = ctx
1228                    .with_reasoning(format!("Deleting file {}", path.display()))
1229                    .with_consequence(format!(
1230                        "File '{}' will be permanently removed",
1231                        path.display()
1232                    ))
1233                    .with_reversibility(ReversibilityInfo {
1234                        is_reversible: true,
1235                        undo_description: Some(
1236                            "Restore via git checkout or checkpoint".to_string(),
1237                        ),
1238                        undo_window: None,
1239                    });
1240            }
1241            ActionDetails::ShellCommand { command } => {
1242                ctx = ctx
1243                    .with_reasoning(format!("Executing shell command: {}", command))
1244                    .with_consequence("Shell command will run in the agent workspace".to_string());
1245                if risk_level >= RiskLevel::Execute {
1246                    ctx = ctx.with_consequence(
1247                        "Command may modify system state or produce side effects".to_string(),
1248                    );
1249                }
1250            }
1251            ActionDetails::NetworkRequest { host, method } => {
1252                ctx = ctx
1253                    .with_reasoning(format!("Making {} request to {}", method, host))
1254                    .with_consequence(format!("Network request will be sent to {}", host));
1255            }
1256            ActionDetails::GitOperation { operation } => {
1257                ctx = ctx
1258                    .with_reasoning(format!("Git operation: {}", operation))
1259                    .with_reversibility(ReversibilityInfo {
1260                        is_reversible: true,
1261                        undo_description: Some(
1262                            "Git operations are generally reversible via reflog".to_string(),
1263                        ),
1264                        undo_window: None,
1265                    });
1266            }
1267            _ => {
1268                ctx = ctx.with_reasoning(format!("Executing {} tool", tool_name));
1269            }
1270        }
1271
1272        // Add preview for destructive tools
1273        ctx = ctx.with_preview_from_tool(tool_name, details);
1274
1275        ctx
1276    }
1277
1278    /// Parse tool arguments into a specific `ActionDetails` variant based on tool name.
1279    /// This enables `build_approval_context()` to produce rich reasoning, consequences,
1280    /// and reversibility info instead of always falling through to the `Other` catch-all.
1281    fn parse_action_details(tool_name: &str, arguments: &serde_json::Value) -> ActionDetails {
1282        match tool_name {
1283            "file_read" | "file_list" | "file_search" => {
1284                if let Some(path) = arguments.get("path").and_then(|v| v.as_str()) {
1285                    ActionDetails::FileRead { path: path.into() }
1286                } else {
1287                    ActionDetails::Other {
1288                        info: arguments.to_string(),
1289                    }
1290                }
1291            }
1292            "file_write" | "file_patch" => {
1293                let path = arguments
1294                    .get("path")
1295                    .and_then(|v| v.as_str())
1296                    .unwrap_or("unknown");
1297                let size = arguments
1298                    .get("content")
1299                    .and_then(|v| v.as_str())
1300                    .map(|s| s.len())
1301                    .unwrap_or(0);
1302                ActionDetails::FileWrite {
1303                    path: path.into(),
1304                    size_bytes: size,
1305                }
1306            }
1307            "shell_exec" => {
1308                let cmd = arguments
1309                    .get("command")
1310                    .and_then(|v| v.as_str())
1311                    .unwrap_or("(unknown)");
1312                ActionDetails::ShellCommand {
1313                    command: cmd.to_string(),
1314                }
1315            }
1316            "git_status" | "git_diff" => ActionDetails::GitOperation {
1317                operation: tool_name.to_string(),
1318            },
1319            "git_commit" => {
1320                let msg = arguments
1321                    .get("message")
1322                    .and_then(|v| v.as_str())
1323                    .unwrap_or("");
1324                let truncated = truncate_str(msg, 80);
1325                ActionDetails::GitOperation {
1326                    operation: format!("commit: {}", truncated),
1327                }
1328            }
1329            // macOS native tools
1330            "macos_calendar" | "macos_reminders" | "macos_notes" => {
1331                let action = arguments
1332                    .get("action")
1333                    .and_then(|v| v.as_str())
1334                    .unwrap_or("list");
1335                let title = arguments
1336                    .get("title")
1337                    .and_then(|v| v.as_str())
1338                    .unwrap_or("");
1339                ActionDetails::Other {
1340                    info: format!("{} {} {}", tool_name, action, title)
1341                        .trim()
1342                        .to_string(),
1343                }
1344            }
1345            "macos_app_control" => {
1346                let action = arguments
1347                    .get("action")
1348                    .and_then(|v| v.as_str())
1349                    .unwrap_or("list_running");
1350                let app = arguments
1351                    .get("app_name")
1352                    .and_then(|v| v.as_str())
1353                    .unwrap_or("");
1354                ActionDetails::ShellCommand {
1355                    command: format!("{} {}", action, app).trim().to_string(),
1356                }
1357            }
1358            "macos_clipboard" => {
1359                let action = arguments
1360                    .get("action")
1361                    .and_then(|v| v.as_str())
1362                    .unwrap_or("read");
1363                ActionDetails::Other {
1364                    info: format!("clipboard {}", action),
1365                }
1366            }
1367            "macos_screenshot" => {
1368                let path = arguments
1369                    .get("path")
1370                    .and_then(|v| v.as_str())
1371                    .unwrap_or("screenshot.png");
1372                ActionDetails::FileWrite {
1373                    path: path.into(),
1374                    size_bytes: 0,
1375                }
1376            }
1377            "macos_finder" => {
1378                let action = arguments
1379                    .get("action")
1380                    .and_then(|v| v.as_str())
1381                    .unwrap_or("reveal");
1382                let path = arguments
1383                    .get("path")
1384                    .and_then(|v| v.as_str())
1385                    .unwrap_or(".");
1386                if action == "trash" {
1387                    ActionDetails::FileDelete { path: path.into() }
1388                } else {
1389                    ActionDetails::Other {
1390                        info: format!("Finder: {} {}", action, path),
1391                    }
1392                }
1393            }
1394            "macos_notification" | "macos_system_info" | "macos_spotlight" => {
1395                ActionDetails::Other {
1396                    info: arguments
1397                        .as_object()
1398                        .map(|o| {
1399                            o.iter()
1400                                .map(|(k, v)| {
1401                                    format!("{}={}", k, v.as_str().unwrap_or(&v.to_string()))
1402                                })
1403                                .collect::<Vec<_>>()
1404                                .join(", ")
1405                        })
1406                        .unwrap_or_default(),
1407                }
1408            }
1409            "macos_mail" => {
1410                let action = arguments["action"]
1411                    .as_str()
1412                    .unwrap_or("unknown")
1413                    .to_string();
1414                if action == "send" {
1415                    let to = arguments["to"].as_str().unwrap_or("unknown").to_string();
1416                    let subject = arguments["subject"]
1417                        .as_str()
1418                        .unwrap_or("(no subject)")
1419                        .to_string();
1420                    ActionDetails::Other {
1421                        info: format!("SEND EMAIL to {} — subject: {}", to, subject),
1422                    }
1423                } else {
1424                    ActionDetails::Other {
1425                        info: format!("macos_mail: {}", action),
1426                    }
1427                }
1428            }
1429            "macos_safari" => {
1430                let action = arguments["action"]
1431                    .as_str()
1432                    .unwrap_or("unknown")
1433                    .to_string();
1434                if action == "run_javascript" {
1435                    ActionDetails::ShellCommand {
1436                        command: format!(
1437                            "Safari JS: {}",
1438                            arguments["script"].as_str().unwrap_or("(unknown)")
1439                        ),
1440                    }
1441                } else if action == "navigate" {
1442                    ActionDetails::BrowserAction {
1443                        action: "navigate".to_string(),
1444                        url: arguments["url"].as_str().map(|s| s.to_string()),
1445                        selector: None,
1446                    }
1447                } else {
1448                    ActionDetails::Other {
1449                        info: format!("macos_safari: {}", action),
1450                    }
1451                }
1452            }
1453            "macos_screen_analyze" => {
1454                let action = arguments["action"].as_str().unwrap_or("ocr").to_string();
1455                let app = arguments["app_name"]
1456                    .as_str()
1457                    .map(|s| s.to_string())
1458                    .unwrap_or_else(|| "screen".to_string());
1459                ActionDetails::GuiAction {
1460                    app_name: app,
1461                    action,
1462                    element: None,
1463                }
1464            }
1465            "macos_contacts" => {
1466                let action = arguments["action"].as_str().unwrap_or("search").to_string();
1467                let query = arguments["query"]
1468                    .as_str()
1469                    .or_else(|| arguments["name"].as_str())
1470                    .map(|q| format!("'{}'", q))
1471                    .unwrap_or_default();
1472                ActionDetails::Other {
1473                    info: format!("Contacts: {} {}", action, query),
1474                }
1475            }
1476            "macos_gui_scripting" | "macos_accessibility" => {
1477                let app_name = arguments["app_name"]
1478                    .as_str()
1479                    .unwrap_or("unknown")
1480                    .to_string();
1481                let action = arguments["action"]
1482                    .as_str()
1483                    .unwrap_or("unknown")
1484                    .to_string();
1485                let element = arguments["element_description"]
1486                    .as_str()
1487                    .map(|s| s.to_string());
1488                ActionDetails::GuiAction {
1489                    app_name,
1490                    action,
1491                    element,
1492                }
1493            }
1494            // Browser automation tools → BrowserAction for rich approval context.
1495            name if name.starts_with("browser_") => {
1496                let action = name.strip_prefix("browser_").unwrap_or(name).to_string();
1497                let url = arguments["url"].as_str().map(|s| s.to_string());
1498                let selector = arguments["selector"]
1499                    .as_str()
1500                    .or_else(|| arguments["ref"].as_str())
1501                    .map(|s| s.to_string());
1502                ActionDetails::BrowserAction {
1503                    action,
1504                    url,
1505                    selector,
1506                }
1507            }
1508            // Web tools → NetworkRequest for approval context.
1509            "web_search" | "web_fetch" => {
1510                let host = if tool_name == "web_search" {
1511                    "api.duckduckgo.com".to_string()
1512                } else {
1513                    // Extract hostname from URL for web_fetch
1514                    let url_str = arguments["url"].as_str().unwrap_or("unknown URL");
1515                    url_str
1516                        .strip_prefix("https://")
1517                        .or_else(|| url_str.strip_prefix("http://"))
1518                        .and_then(|s| s.split('/').next())
1519                        .unwrap_or(url_str)
1520                        .to_string()
1521                };
1522                ActionDetails::NetworkRequest {
1523                    host,
1524                    method: if tool_name == "web_search" {
1525                        "SEARCH".to_string()
1526                    } else {
1527                        "GET".to_string()
1528                    },
1529                }
1530            }
1531            // iMessage send → ChannelReply for approval gating.
1532            "imessage_send" => {
1533                let recipient = arguments["recipient"]
1534                    .as_str()
1535                    .unwrap_or("unknown")
1536                    .to_string();
1537                let preview = arguments["message"]
1538                    .as_str()
1539                    .map(|s| {
1540                        if s.len() > 100 {
1541                            format!("{}...", &s[..97])
1542                        } else {
1543                            s.to_string()
1544                        }
1545                    })
1546                    .unwrap_or_default();
1547                ActionDetails::ChannelReply {
1548                    channel: "iMessage".to_string(),
1549                    recipient,
1550                    preview,
1551                    priority: MessagePriority::Normal,
1552                }
1553            }
1554            // Slack tool → ChannelReply for send/reply, Other for reads.
1555            "slack" => {
1556                let action = arguments
1557                    .get("action")
1558                    .and_then(|v| v.as_str())
1559                    .unwrap_or("send_message");
1560                match action {
1561                    "send_message" | "reply_thread" => {
1562                        let recipient = arguments["channel"]
1563                            .as_str()
1564                            .unwrap_or("unknown")
1565                            .to_string();
1566                        let preview = arguments["message"]
1567                            .as_str()
1568                            .map(|s| {
1569                                if s.len() > 100 {
1570                                    format!("{}...", &s[..97])
1571                                } else {
1572                                    s.to_string()
1573                                }
1574                            })
1575                            .unwrap_or_default();
1576                        ActionDetails::ChannelReply {
1577                            channel: "Slack".to_string(),
1578                            recipient,
1579                            preview,
1580                            priority: MessagePriority::Normal,
1581                        }
1582                    }
1583                    "add_reaction" => ActionDetails::ChannelReply {
1584                        channel: "Slack".to_string(),
1585                        recipient: arguments["channel"]
1586                            .as_str()
1587                            .unwrap_or("unknown")
1588                            .to_string(),
1589                        preview: format!(":{}:", arguments["emoji"].as_str().unwrap_or("?")),
1590                        priority: MessagePriority::Normal,
1591                    },
1592                    _ => ActionDetails::Other {
1593                        info: format!("slack:{}", action),
1594                    },
1595                }
1596            }
1597            // ArXiv research → NetworkRequest for search/fetch, FileWrite for save.
1598            "arxiv_research" => {
1599                let action = arguments
1600                    .get("action")
1601                    .and_then(|v| v.as_str())
1602                    .unwrap_or("search");
1603                match action {
1604                    "save" | "remove" | "collections" | "digest_config" => {
1605                        ActionDetails::FileWrite {
1606                            path: ".rustant/arxiv/library.json".into(),
1607                            size_bytes: 0,
1608                        }
1609                    }
1610                    _ => ActionDetails::NetworkRequest {
1611                        host: "export.arxiv.org".to_string(),
1612                        method: "GET".to_string(),
1613                    },
1614                }
1615            }
1616            // Knowledge graph — write actions modify state file
1617            "knowledge_graph" => {
1618                let action = arguments
1619                    .get("action")
1620                    .and_then(|v| v.as_str())
1621                    .unwrap_or("list");
1622                match action {
1623                    "add_node" | "update_node" | "remove_node" | "add_edge" | "remove_edge"
1624                    | "import_arxiv" => ActionDetails::FileWrite {
1625                        path: ".rustant/knowledge/graph.json".into(),
1626                        size_bytes: 0,
1627                    },
1628                    _ => ActionDetails::FileRead {
1629                        path: ".rustant/knowledge/graph.json".into(),
1630                    },
1631                }
1632            }
1633            // Experiment tracker — write actions modify state file
1634            "experiment_tracker" => {
1635                let action = arguments
1636                    .get("action")
1637                    .and_then(|v| v.as_str())
1638                    .unwrap_or("list_experiments");
1639                match action {
1640                    "add_hypothesis"
1641                    | "update_hypothesis"
1642                    | "add_experiment"
1643                    | "start_experiment"
1644                    | "complete_experiment"
1645                    | "fail_experiment"
1646                    | "record_evidence" => ActionDetails::FileWrite {
1647                        path: ".rustant/experiments/tracker.json".into(),
1648                        size_bytes: 0,
1649                    },
1650                    _ => ActionDetails::FileRead {
1651                        path: ".rustant/experiments/tracker.json".into(),
1652                    },
1653                }
1654            }
1655            // Code intelligence — read-only analysis tool
1656            "code_intelligence" => {
1657                let path = arguments
1658                    .get("path")
1659                    .and_then(|v| v.as_str())
1660                    .unwrap_or(".");
1661                ActionDetails::FileRead { path: path.into() }
1662            }
1663            // Content engine — write actions modify state file
1664            "content_engine" => {
1665                let action = arguments
1666                    .get("action")
1667                    .and_then(|v| v.as_str())
1668                    .unwrap_or("list");
1669                match action {
1670                    "create" | "update" | "set_status" | "delete" | "schedule" | "calendar_add"
1671                    | "calendar_remove" => ActionDetails::FileWrite {
1672                        path: ".rustant/content/library.json".into(),
1673                        size_bytes: 0,
1674                    },
1675                    _ => ActionDetails::FileRead {
1676                        path: ".rustant/content/library.json".into(),
1677                    },
1678                }
1679            }
1680            // Skill tracker — write actions modify state file
1681            "skill_tracker" => {
1682                let action = arguments
1683                    .get("action")
1684                    .and_then(|v| v.as_str())
1685                    .unwrap_or("list_skills");
1686                match action {
1687                    "add_skill" | "log_practice" | "learning_path" => ActionDetails::FileWrite {
1688                        path: ".rustant/skills/tracker.json".into(),
1689                        size_bytes: 0,
1690                    },
1691                    _ => ActionDetails::FileRead {
1692                        path: ".rustant/skills/tracker.json".into(),
1693                    },
1694                }
1695            }
1696            // Career intel — write actions modify state file
1697            "career_intel" => {
1698                let action = arguments
1699                    .get("action")
1700                    .and_then(|v| v.as_str())
1701                    .unwrap_or("progress_report");
1702                match action {
1703                    "set_goal" | "log_achievement" | "add_portfolio" | "network_note" => {
1704                        ActionDetails::FileWrite {
1705                            path: ".rustant/career/intel.json".into(),
1706                            size_bytes: 0,
1707                        }
1708                    }
1709                    _ => ActionDetails::FileRead {
1710                        path: ".rustant/career/intel.json".into(),
1711                    },
1712                }
1713            }
1714            // System monitor — health_check uses network, others modify state
1715            "system_monitor" => {
1716                let action = arguments
1717                    .get("action")
1718                    .and_then(|v| v.as_str())
1719                    .unwrap_or("list_services");
1720                match action {
1721                    "health_check" => ActionDetails::NetworkRequest {
1722                        host: "service health check".to_string(),
1723                        method: "GET".to_string(),
1724                    },
1725                    "add_service" | "log_incident" => ActionDetails::FileWrite {
1726                        path: ".rustant/monitoring/topology.json".into(),
1727                        size_bytes: 0,
1728                    },
1729                    _ => ActionDetails::FileRead {
1730                        path: ".rustant/monitoring/topology.json".into(),
1731                    },
1732                }
1733            }
1734            // Life planner — write actions modify state file
1735            "life_planner" => {
1736                let action = arguments
1737                    .get("action")
1738                    .and_then(|v| v.as_str())
1739                    .unwrap_or("daily_plan");
1740                match action {
1741                    "set_energy_profile" | "add_deadline" | "log_habit" | "context_switch_log" => {
1742                        ActionDetails::FileWrite {
1743                            path: ".rustant/life/planner.json".into(),
1744                            size_bytes: 0,
1745                        }
1746                    }
1747                    _ => ActionDetails::FileRead {
1748                        path: ".rustant/life/planner.json".into(),
1749                    },
1750                }
1751            }
1752            // Privacy manager — delete_data is destructive, others vary
1753            "privacy_manager" => {
1754                let action = arguments
1755                    .get("action")
1756                    .and_then(|v| v.as_str())
1757                    .unwrap_or("list_boundaries");
1758                match action {
1759                    "delete_data" => {
1760                        let domain = arguments
1761                            .get("domain")
1762                            .and_then(|v| v.as_str())
1763                            .unwrap_or("unknown");
1764                        ActionDetails::FileDelete {
1765                            path: format!(".rustant/{}/", domain).into(),
1766                        }
1767                    }
1768                    "set_boundary" | "encrypt_store" => ActionDetails::FileWrite {
1769                        path: ".rustant/privacy/config.json".into(),
1770                        size_bytes: 0,
1771                    },
1772                    _ => ActionDetails::FileRead {
1773                        path: ".rustant/privacy/config.json".into(),
1774                    },
1775                }
1776            }
1777            // Self-improvement — some actions write, others read
1778            "self_improvement" => {
1779                let action = arguments
1780                    .get("action")
1781                    .and_then(|v| v.as_str())
1782                    .unwrap_or("analyze_patterns");
1783                match action {
1784                    "set_preference" | "feedback" | "reset_baseline" => ActionDetails::FileWrite {
1785                        path: ".rustant/meta/improvement.json".into(),
1786                        size_bytes: 0,
1787                    },
1788                    _ => ActionDetails::FileRead {
1789                        path: ".rustant/meta/improvement.json".into(),
1790                    },
1791                }
1792            }
1793            _ => ActionDetails::Other {
1794                info: arguments.to_string(),
1795            },
1796        }
1797    }
1798
1799    /// Provide a tool-routing hint based on the cached task classification.
1800    /// Returns Some(hint) if the classification maps to a specific tool or workflow.
1801    /// This prevents the LLM from choosing generic tools (shell_exec, document_read)
1802    /// for tasks that have purpose-built tools.
1803    ///
1804    /// Uses the pre-computed `TaskClassification` from `AgentState` instead of
1805    /// running ~300 `.contains()` calls on every invocation.
1806    ///
1807    /// Match a cached task classification to a workflow template routing hint.
1808    /// This is platform-independent (workflows work on all platforms).
1809    fn workflow_routing_hint(classification: &TaskClassification) -> Option<String> {
1810        let workflow = match classification {
1811            TaskClassification::Workflow(name) => name.as_str(),
1812            _ => return None,
1813        };
1814
1815        Some(format!(
1816            "WORKFLOW ROUTING: For this task, run the '{}' workflow. \
1817             Use shell_exec to run: `rustant workflow run {}` — or accomplish \
1818             the task directly step by step using available tools.",
1819            workflow, workflow
1820        ))
1821    }
1822
1823    #[cfg(target_os = "macos")]
1824    fn tool_routing_hint_from_classification(
1825        classification: &TaskClassification,
1826    ) -> Option<String> {
1827        // Workflow routing (platform-independent, checked first)
1828        if let Some(hint) = Self::workflow_routing_hint(classification) {
1829            return Some(hint);
1830        }
1831
1832        let tool_hint = match classification {
1833            TaskClassification::Clipboard => "For this task, call the 'macos_clipboard' tool with {\"action\":\"read\"} to read the clipboard or {\"action\":\"write\",\"content\":\"...\"} to write to it.",
1834            TaskClassification::SystemInfo => "For this task, call the 'macos_system_info' tool with the appropriate action: \"battery\", \"disk\", \"memory\", \"cpu\", \"network\", or \"version\".",
1835            TaskClassification::AppControl => "For this task, call the 'macos_app_control' tool with the appropriate action: \"list_running\", \"open\", \"quit\", or \"activate\".",
1836            TaskClassification::Meeting => "For this task, call 'macos_meeting_recorder'. Use action 'record_and_transcribe' to start (announces via TTS, records with silence detection, auto-transcribes to Notes.app). Use 'stop' to stop manually. Use 'status' to check state.",
1837            TaskClassification::Calendar => "For this task, call the 'macos_calendar' tool with the appropriate action.",
1838            TaskClassification::Reminders => "For this task, call the 'macos_reminders' tool with the appropriate action.",
1839            TaskClassification::Notes => "For this task, call the 'macos_notes' tool with the appropriate action.",
1840            TaskClassification::Screenshot => "For this task, call the 'macos_screenshot' tool with the appropriate action.",
1841            TaskClassification::Notification => "For this task, call the 'macos_notification' tool.",
1842            TaskClassification::Spotlight => "For this task, call the 'macos_spotlight' tool to search files using Spotlight.",
1843            TaskClassification::FocusMode => "For this task, call the 'macos_focus_mode' tool.",
1844            TaskClassification::Music => "For this task, call the 'macos_music' tool with the appropriate action.",
1845            TaskClassification::Email => "For this task, call the 'macos_mail' tool with the appropriate action.",
1846            TaskClassification::Finder => "For this task, call the 'macos_finder' tool with the appropriate action.",
1847            TaskClassification::Contacts => "For this task, call the 'macos_contacts' tool with the appropriate action.",
1848            TaskClassification::WebSearch => "For this task, call the 'web_search' tool with {\"query\": \"your search terms\"}. Do NOT use macos_safari or shell_exec for web searches — use the dedicated web_search tool which queries DuckDuckGo.",
1849            TaskClassification::WebFetch => "For this task, call the 'web_fetch' tool with {\"url\": \"https://...\"} to retrieve page content. Do NOT use macos_safari or shell_exec — use the dedicated web_fetch tool.",
1850            TaskClassification::Safari => "For this task, call the 'macos_safari' tool with the appropriate action. Note: for simple web searches use 'web_search' instead, and for fetching page content use 'web_fetch' instead.",
1851            TaskClassification::Slack => "For this task, call the 'slack' tool with the appropriate action (send_message, read_messages, list_channels, reply_thread, list_users, add_reaction). Do NOT use macos_gui_scripting or macos_app_control to interact with Slack.",
1852            TaskClassification::Messaging => "For this task, call the appropriate iMessage tool: 'imessage_read', 'imessage_send', or 'imessage_contacts'.",
1853            TaskClassification::ArxivResearch => "For this task, call the 'arxiv_research' tool with {\"action\": \"search\", \"query\": \"your search terms\", \"max_results\": 10}. This tool uses the arXiv API directly — do NOT use macos_safari, shell_exec, or curl. Other actions: fetch (get by ID), analyze (LLM summary), trending (recent papers), paper_to_code, paper_to_notebook, save/library/remove, export_bibtex.",
1854            TaskClassification::KnowledgeGraph => "For this task, call the 'knowledge_graph' tool. Actions: add_node, get_node, update_node, remove_node, add_edge, remove_edge, neighbors, search, list, path, stats, import_arxiv, export_dot.",
1855            TaskClassification::ExperimentTracking => "For this task, call the 'experiment_tracker' tool. Actions: add_hypothesis, update_hypothesis, list_hypotheses, get_hypothesis, add_experiment, start_experiment, complete_experiment, fail_experiment, get_experiment, list_experiments, record_evidence, compare_experiments, summary, export_markdown.",
1856            TaskClassification::CodeIntelligence => "For this task, call the 'code_intelligence' tool. Actions: analyze_architecture, detect_patterns, translate_snippet, compare_implementations, tech_debt_report, api_surface, dependency_map.",
1857            TaskClassification::ContentEngine => "For this task, call the 'content_engine' tool. Actions: create, update, set_status, get, list, search, delete, schedule, calendar_add, calendar_list, calendar_remove, stats, adapt, export_markdown.",
1858            TaskClassification::SkillTracker => "For this task, call the 'skill_tracker' tool. Actions: add_skill, log_practice, assess, list_skills, knowledge_gaps, learning_path, progress_report, daily_practice.",
1859            TaskClassification::CareerIntel => "For this task, call the 'career_intel' tool. Actions: set_goal, log_achievement, add_portfolio, gap_analysis, market_scan, network_note, progress_report, strategy_review.",
1860            TaskClassification::SystemMonitor => "For this task, call the 'system_monitor' tool. Actions: add_service, topology, health_check, log_incident, correlate, generate_runbook, impact_analysis, list_services.",
1861            TaskClassification::LifePlanner => "For this task, call the 'life_planner' tool. Actions: set_energy_profile, add_deadline, log_habit, daily_plan, weekly_review, context_switch_log, balance_report, optimize_schedule.",
1862            TaskClassification::PrivacyManager => "For this task, call the 'privacy_manager' tool. Actions: set_boundary, list_boundaries, audit_access, compliance_check, export_data, delete_data, encrypt_store, privacy_report.",
1863            TaskClassification::SelfImprovement => "For this task, call the 'self_improvement' tool. Actions: analyze_patterns, performance_report, suggest_improvements, set_preference, get_preferences, cognitive_load, feedback, reset_baseline.",
1864            _ => return None,
1865        };
1866
1867        Some(format!("TOOL ROUTING: {}", tool_hint))
1868    }
1869
1870    /// Non-macOS fallback — workflow routing + cross-platform tool routing.
1871    #[cfg(not(target_os = "macos"))]
1872    fn tool_routing_hint_from_classification(
1873        classification: &TaskClassification,
1874    ) -> Option<String> {
1875        // Workflow routing (platform-independent, checked first)
1876        if let Some(hint) = Self::workflow_routing_hint(classification) {
1877            return Some(hint);
1878        }
1879
1880        let tool_hint = match classification {
1881            TaskClassification::WebSearch => "For this task, call the 'web_search' tool with {\"query\": \"your search terms\"}. Do NOT use shell_exec for web searches — use the dedicated web_search tool which queries DuckDuckGo.",
1882            TaskClassification::WebFetch => "For this task, call the 'web_fetch' tool with {\"url\": \"https://...\"} to retrieve page content. Do NOT use shell_exec — use the dedicated web_fetch tool.",
1883            TaskClassification::Slack => "For this task, call the 'slack' tool with the appropriate action (send_message, read_messages, list_channels, reply_thread, list_users, add_reaction). Do NOT use shell_exec to interact with Slack.",
1884            TaskClassification::ArxivResearch => "For this task, call the 'arxiv_research' tool with {\"action\": \"search\", \"query\": \"your search terms\", \"max_results\": 10}. This tool uses the arXiv API directly — do NOT use shell_exec, or curl. Other actions: fetch (get by ID), analyze (LLM summary), trending (recent papers), paper_to_code, paper_to_notebook, save/library/remove, export_bibtex.",
1885            TaskClassification::KnowledgeGraph => "For this task, call the 'knowledge_graph' tool. Actions: add_node, get_node, update_node, remove_node, add_edge, remove_edge, neighbors, search, list, path, stats, import_arxiv, export_dot.",
1886            TaskClassification::ExperimentTracking => "For this task, call the 'experiment_tracker' tool. Actions: add_hypothesis, update_hypothesis, list_hypotheses, get_hypothesis, add_experiment, start_experiment, complete_experiment, fail_experiment, get_experiment, list_experiments, record_evidence, compare_experiments, summary, export_markdown.",
1887            TaskClassification::CodeIntelligence => "For this task, call the 'code_intelligence' tool. Actions: analyze_architecture, detect_patterns, translate_snippet, compare_implementations, tech_debt_report, api_surface, dependency_map.",
1888            TaskClassification::ContentEngine => "For this task, call the 'content_engine' tool. Actions: create, update, set_status, get, list, search, delete, schedule, calendar_add, calendar_list, calendar_remove, stats, adapt, export_markdown.",
1889            TaskClassification::SkillTracker => "For this task, call the 'skill_tracker' tool. Actions: add_skill, log_practice, assess, list_skills, knowledge_gaps, learning_path, progress_report, daily_practice.",
1890            TaskClassification::CareerIntel => "For this task, call the 'career_intel' tool. Actions: set_goal, log_achievement, add_portfolio, gap_analysis, market_scan, network_note, progress_report, strategy_review.",
1891            TaskClassification::SystemMonitor => "For this task, call the 'system_monitor' tool. Actions: add_service, topology, health_check, log_incident, correlate, generate_runbook, impact_analysis, list_services.",
1892            TaskClassification::LifePlanner => "For this task, call the 'life_planner' tool. Actions: set_energy_profile, add_deadline, log_habit, daily_plan, weekly_review, context_switch_log, balance_report, optimize_schedule.",
1893            TaskClassification::PrivacyManager => "For this task, call the 'privacy_manager' tool. Actions: set_boundary, list_boundaries, audit_access, compliance_check, export_data, delete_data, encrypt_store, privacy_report.",
1894            TaskClassification::SelfImprovement => "For this task, call the 'self_improvement' tool. Actions: analyze_patterns, performance_report, suggest_improvements, set_preference, get_preferences, cognitive_load, feedback, reset_baseline.",
1895            _ => return None,
1896        };
1897
1898        Some(format!("TOOL ROUTING: {}", tool_hint))
1899    }
1900
1901    /// Auto-correct a tool call when the LLM is stuck calling the wrong tool.
1902    /// Returns Some((corrected_name, corrected_args)) if a correction is possible.
1903    /// Uses the cached `TaskClassification` from `AgentState` for O(1) matching.
1904    #[cfg(target_os = "macos")]
1905    fn auto_correct_tool_call(
1906        failed_tool: &str,
1907        _args: &serde_json::Value,
1908        state: &AgentState,
1909    ) -> Option<(String, serde_json::Value)> {
1910        let classification = state.task_classification.as_ref()?;
1911        let task = state.current_goal.as_deref().unwrap_or("");
1912
1913        match classification {
1914            // Redirect GUI scripting / app control / shell to slack for Slack tasks
1915            TaskClassification::Slack
1916                if matches!(
1917                    failed_tool,
1918                    "macos_gui_scripting" | "macos_app_control" | "shell_exec"
1919                ) =>
1920            {
1921                Some((
1922                    "slack".to_string(),
1923                    serde_json::json!({"action": "send_message"}),
1924                ))
1925            }
1926            // Redirect Safari/shell/curl to arxiv_research for paper tasks
1927            TaskClassification::ArxivResearch
1928                if matches!(
1929                    failed_tool,
1930                    "macos_safari" | "shell_exec" | "web_fetch" | "web_search"
1931                ) =>
1932            {
1933                Some((
1934                    "arxiv_research".to_string(),
1935                    serde_json::json!({"action": "search", "query": task, "max_results": 10}),
1936                ))
1937            }
1938            // Redirect Safari/shell to web_search for general web searches
1939            TaskClassification::WebSearch
1940                if matches!(failed_tool, "macos_safari" | "shell_exec") =>
1941            {
1942                Some(("web_search".to_string(), serde_json::json!({"query": task})))
1943            }
1944            // Redirect generic file/shell tools to clipboard
1945            TaskClassification::Clipboard
1946                if matches!(failed_tool, "document_read" | "file_read" | "shell_exec") =>
1947            {
1948                Some((
1949                    "macos_clipboard".to_string(),
1950                    serde_json::json!({"action": "read"}),
1951                ))
1952            }
1953            // Redirect to system_info based on classification
1954            TaskClassification::SystemInfo
1955                if matches!(failed_tool, "document_read" | "file_read" | "shell_exec") =>
1956            {
1957                // Use the task text to pick the right sub-action
1958                let lower = task.to_lowercase();
1959                let action = if lower.contains("battery") {
1960                    "battery"
1961                } else if lower.contains("disk") {
1962                    "disk"
1963                } else if lower.contains("cpu") || lower.contains("processor") {
1964                    "cpu"
1965                } else if lower.contains("memory") || lower.contains("ram") {
1966                    "memory"
1967                } else {
1968                    "version"
1969                };
1970                Some((
1971                    "macos_system_info".to_string(),
1972                    serde_json::json!({"action": action}),
1973                ))
1974            }
1975            // Redirect to app_control
1976            TaskClassification::AppControl
1977                if matches!(failed_tool, "document_read" | "file_read" | "shell_exec") =>
1978            {
1979                Some((
1980                    "macos_app_control".to_string(),
1981                    serde_json::json!({"action": "list_running"}),
1982                ))
1983            }
1984            _ => None,
1985        }
1986    }
1987
1988    /// Non-macOS fallback — Slack auto-correction only.
1989    #[cfg(not(target_os = "macos"))]
1990    fn auto_correct_tool_call(
1991        failed_tool: &str,
1992        _args: &serde_json::Value,
1993        state: &AgentState,
1994    ) -> Option<(String, serde_json::Value)> {
1995        let classification = state.task_classification.as_ref()?;
1996
1997        if matches!(classification, TaskClassification::Slack)
1998            && matches!(failed_tool, "shell_exec" | "web_fetch")
1999        {
2000            return Some((
2001                "slack".to_string(),
2002                serde_json::json!({"action": "send_message"}),
2003            ));
2004        }
2005
2006        None
2007    }
2008
2009    /// Build a decision explanation for a tool selection.
2010    fn build_decision_explanation(
2011        &self,
2012        tool_name: &str,
2013        arguments: &serde_json::Value,
2014    ) -> DecisionExplanation {
2015        let risk_level = self
2016            .tools
2017            .get(tool_name)
2018            .map(|t| t.risk_level)
2019            .unwrap_or(RiskLevel::Execute);
2020
2021        let mut builder = ExplanationBuilder::new(DecisionType::ToolSelection {
2022            selected_tool: tool_name.to_string(),
2023        });
2024
2025        // Add reasoning based on the tool and arguments
2026        builder.add_reasoning_step(
2027            format!("Selected tool '{}' (risk: {})", tool_name, risk_level),
2028            None,
2029        );
2030
2031        // Add argument summary as evidence
2032        if let Some(obj) = arguments.as_object() {
2033            let param_keys: Vec<&str> = obj.keys().map(|k| k.as_str()).collect();
2034            if !param_keys.is_empty() {
2035                builder.add_reasoning_step(
2036                    format!("Parameters: {}", param_keys.join(", ")),
2037                    Some(&arguments.to_string()),
2038                );
2039            }
2040        }
2041
2042        // Context factors from memory and safety state
2043        if let Some(goal) = &self.memory.working.current_goal {
2044            builder.add_context_factor(
2045                &format!("Current goal: {}", goal),
2046                FactorInfluence::Positive,
2047            );
2048        }
2049
2050        builder.add_context_factor(
2051            &format!("Approval mode: {}", self.safety.approval_mode()),
2052            FactorInfluence::Neutral,
2053        );
2054
2055        builder.add_context_factor(
2056            &format!(
2057                "Iteration {}/{}",
2058                self.state.iteration, self.state.max_iterations
2059            ),
2060            if self.state.iteration as f64 / self.state.max_iterations as f64 > 0.8 {
2061                FactorInfluence::Negative
2062            } else {
2063                FactorInfluence::Neutral
2064            },
2065        );
2066
2067        // List other available tools as considered alternatives
2068        for (name, tool) in &self.tools {
2069            if name != tool_name && tool.risk_level <= risk_level {
2070                builder.add_alternative(name, "Not selected by LLM for this step", tool.risk_level);
2071            }
2072        }
2073
2074        // Improved confidence scoring using multiple signals
2075        let confidence = self.calculate_tool_confidence(tool_name, risk_level);
2076        builder.set_confidence(confidence);
2077
2078        builder.build()
2079    }
2080
2081    /// Calculate confidence score for a tool call based on multiple factors.
2082    ///
2083    /// Considers risk level, prior usage in this session, and iteration depth.
2084    fn calculate_tool_confidence(&self, tool_name: &str, risk_level: RiskLevel) -> f32 {
2085        // Base confidence from risk level
2086        let mut confidence: f32 = match risk_level {
2087            RiskLevel::ReadOnly => 0.90,
2088            RiskLevel::Write => 0.75,
2089            RiskLevel::Execute => 0.65,
2090            RiskLevel::Network => 0.70,
2091            RiskLevel::Destructive => 0.45,
2092        };
2093
2094        // +0.05 if tool has been used successfully before in this session
2095        if self.tool_token_usage.contains_key(tool_name) {
2096            confidence += 0.05;
2097        }
2098
2099        // -0.1 if iteration count is high (>10), suggesting the agent may be looping
2100        if self.state.iteration > 10 {
2101            confidence -= 0.1;
2102        }
2103
2104        // -0.05 if approaching iteration limit (>80% of max)
2105        if self.state.max_iterations > 0
2106            && (self.state.iteration as f64 / self.state.max_iterations as f64) > 0.8
2107        {
2108            confidence -= 0.05;
2109        }
2110
2111        confidence.clamp(0.0, 1.0)
2112    }
2113
2114    /// Get the current agent state.
2115    pub fn state(&self) -> &AgentState {
2116        &self.state
2117    }
2118
2119    /// Get a cancellation token for this agent.
2120    pub fn cancellation_token(&self) -> CancellationToken {
2121        self.cancellation.clone()
2122    }
2123
2124    /// Cancel the current task.
2125    pub fn cancel(&self) {
2126        self.cancellation.cancel();
2127    }
2128
2129    /// Reset the cancellation token so the agent can process another task.
2130    /// Must be called before `process_task()` if a previous task was cancelled.
2131    pub fn reset_cancellation(&mut self) {
2132        self.cancellation = CancellationToken::new();
2133    }
2134
2135    /// Get the brain reference (for usage stats).
2136    pub fn brain(&self) -> &Brain {
2137        &self.brain
2138    }
2139
2140    /// Get the safety guardian reference (for audit log).
2141    pub fn safety(&self) -> &SafetyGuardian {
2142        &self.safety
2143    }
2144
2145    /// Get a mutable reference to the safety guardian (for contract setup).
2146    pub fn safety_mut(&mut self) -> &mut SafetyGuardian {
2147        &mut self.safety
2148    }
2149
2150    /// Get the memory system reference.
2151    pub fn memory(&self) -> &MemorySystem {
2152        &self.memory
2153    }
2154
2155    /// Get a mutable reference to the memory system.
2156    pub fn memory_mut(&mut self) -> &mut MemorySystem {
2157        &mut self.memory
2158    }
2159
2160    /// Get a reference to the agent configuration.
2161    pub fn config(&self) -> &AgentConfig {
2162        &self.config
2163    }
2164
2165    /// Get a mutable reference to the agent configuration.
2166    pub fn config_mut(&mut self) -> &mut AgentConfig {
2167        &mut self.config
2168    }
2169
2170    /// Get a reference to the cron scheduler (if enabled).
2171    pub fn cron_scheduler(&self) -> Option<&CronScheduler> {
2172        self.cron_scheduler.as_ref()
2173    }
2174
2175    /// Get a mutable reference to the cron scheduler (if enabled).
2176    pub fn cron_scheduler_mut(&mut self) -> Option<&mut CronScheduler> {
2177        self.cron_scheduler.as_mut()
2178    }
2179
2180    /// Get a reference to the job manager.
2181    pub fn job_manager(&self) -> &JobManager {
2182        &self.job_manager
2183    }
2184
2185    /// Get a mutable reference to the job manager.
2186    pub fn job_manager_mut(&mut self) -> &mut JobManager {
2187        &mut self.job_manager
2188    }
2189
2190    /// Check scheduler for due tasks and return their task strings.
2191    pub fn check_scheduler(&mut self) -> Vec<String> {
2192        let mut due_tasks = Vec::new();
2193
2194        // Check cron scheduler
2195        if let Some(ref scheduler) = self.cron_scheduler {
2196            let due_jobs: Vec<String> = scheduler
2197                .due_jobs()
2198                .iter()
2199                .map(|j| j.config.name.clone())
2200                .collect();
2201            for name in &due_jobs {
2202                if let Some(ref scheduler) = self.cron_scheduler {
2203                    if let Some(job) = scheduler.get_job(name) {
2204                        due_tasks.push(job.config.task.clone());
2205                    }
2206                }
2207            }
2208            // Mark them executed
2209            if let Some(ref mut scheduler) = self.cron_scheduler {
2210                for name in &due_jobs {
2211                    let _ = scheduler.mark_executed(name);
2212                }
2213            }
2214        }
2215
2216        // Check heartbeat tasks
2217        if let Some(ref mut heartbeat) = self.heartbeat_manager {
2218            let ready: Vec<(String, String)> = heartbeat
2219                .ready_tasks()
2220                .iter()
2221                .map(|t| (t.name.clone(), t.action.clone()))
2222                .collect();
2223            for (name, action) in &ready {
2224                if let Some(ref task_condition) = heartbeat
2225                    .config()
2226                    .tasks
2227                    .iter()
2228                    .find(|t| t.name == *name)
2229                    .and_then(|t| t.condition.clone())
2230                {
2231                    if HeartbeatManager::check_condition(task_condition) {
2232                        due_tasks.push(action.clone());
2233                        heartbeat.mark_executed(name);
2234                    }
2235                } else {
2236                    due_tasks.push(action.clone());
2237                    heartbeat.mark_executed(name);
2238                }
2239            }
2240        }
2241
2242        due_tasks
2243    }
2244
2245    /// Save scheduler state (cron jobs + background jobs) to the given directory.
2246    pub fn save_scheduler_state(
2247        &self,
2248        state_dir: &std::path::Path,
2249    ) -> Result<(), crate::error::SchedulerError> {
2250        if let Some(ref scheduler) = self.cron_scheduler {
2251            crate::scheduler::save_state(scheduler, &self.job_manager, state_dir)
2252        } else {
2253            // Nothing to save when scheduler is disabled
2254            Ok(())
2255        }
2256    }
2257
2258    /// Load scheduler state from disk and replace current scheduler/job_manager.
2259    pub fn load_scheduler_state(&mut self, state_dir: &std::path::Path) {
2260        if self.cron_scheduler.is_some() {
2261            let (loaded_scheduler, loaded_jm) = crate::scheduler::load_state(state_dir);
2262            if !loaded_scheduler.is_empty() {
2263                self.cron_scheduler = Some(loaded_scheduler);
2264                info!("Restored cron scheduler state from {:?}", state_dir);
2265            }
2266            if !loaded_jm.is_empty() {
2267                self.job_manager = loaded_jm;
2268                info!("Restored job manager state from {:?}", state_dir);
2269            }
2270        }
2271    }
2272
2273    /// Get recent decision explanations for transparency.
2274    pub fn recent_explanations(&self) -> &[DecisionExplanation] {
2275        &self.recent_explanations
2276    }
2277
2278    /// Get per-tool token usage breakdown (tool_name -> estimated tokens).
2279    pub fn tool_token_breakdown(&self) -> &HashMap<String, usize> {
2280        &self.tool_token_usage
2281    }
2282
2283    /// Format top token consumers as a summary string.
2284    pub fn top_tool_consumers(&self, n: usize) -> String {
2285        if self.tool_token_usage.is_empty() {
2286            return String::new();
2287        }
2288        let total: usize = self.tool_token_usage.values().sum();
2289        if total == 0 {
2290            return String::new();
2291        }
2292        let mut sorted: Vec<_> = self.tool_token_usage.iter().collect();
2293        sorted.sort_by(|a, b| b.1.cmp(a.1));
2294        let top: Vec<String> = sorted
2295            .iter()
2296            .take(n)
2297            .map(|(name, tokens)| {
2298                let pct = (**tokens as f64 / total as f64 * 100.0) as u8;
2299                format!("{} ({}%)", name, pct)
2300            })
2301            .collect();
2302        top.join(", ")
2303    }
2304
2305    /// Run a council deliberation if configured and the task is appropriate.
2306    ///
2307    /// Returns `Some(CouncilResult)` if council was used, `None` if skipped.
2308    /// Falls back gracefully if council fails.
2309    pub async fn think_with_council(
2310        &self,
2311        task: &str,
2312        council: &crate::council::PlanningCouncil,
2313    ) -> Option<crate::council::CouncilResult> {
2314        if !crate::council::should_use_council(task) {
2315            debug!(task, "Skipping council — task is not a planning task");
2316            return None;
2317        }
2318
2319        info!(task, "Using council deliberation for planning task");
2320        match council.deliberate(task).await {
2321            Ok(result) => {
2322                info!(
2323                    responses = result.member_responses.len(),
2324                    reviews = result.peer_reviews.len(),
2325                    cost = format!("${:.4}", result.total_cost),
2326                    "Council deliberation succeeded"
2327                );
2328                Some(result)
2329            }
2330            Err(e) => {
2331                warn!(error = %e, "Council deliberation failed, falling back to single model");
2332                None
2333            }
2334        }
2335    }
2336
2337    // --- Plan Mode ---
2338
2339    /// Toggle plan mode on or off.
2340    pub fn set_plan_mode(&mut self, enabled: bool) {
2341        self.plan_mode = enabled;
2342    }
2343
2344    /// Query whether plan mode is active.
2345    pub fn plan_mode(&self) -> bool {
2346        self.plan_mode
2347    }
2348
2349    /// Access the current plan, if any.
2350    pub fn current_plan(&self) -> Option<&crate::plan::ExecutionPlan> {
2351        self.current_plan.as_ref()
2352    }
2353
2354    /// Generate a structured execution plan for a task via the LLM.
2355    async fn generate_plan(
2356        &mut self,
2357        task: &str,
2358    ) -> Result<crate::plan::ExecutionPlan, RustantError> {
2359        use crate::plan::{PlanStatus, PLAN_GENERATION_PROMPT};
2360
2361        // Build a prompt with available tools and the task
2362        let tool_list: Vec<String> = self
2363            .tool_definitions()
2364            .iter()
2365            .map(|t| format!("- {} — {}", t.name, t.description))
2366            .collect();
2367        let tools_str = tool_list.join("\n");
2368
2369        let plan_prompt = format!(
2370            "{}\n\nAvailable tools:\n{}\n\nTask: {}",
2371            PLAN_GENERATION_PROMPT, tools_str, task
2372        );
2373
2374        // Use a temporary conversation for plan generation (don't pollute memory)
2375        let messages = vec![Message::system(&plan_prompt), Message::user(task)];
2376
2377        let response = self
2378            .brain
2379            .think_with_retry(&messages, None, 3)
2380            .await
2381            .map_err(RustantError::Llm)?;
2382
2383        // Record usage
2384        self.budget.record_usage(
2385            &response.usage,
2386            &CostEstimate {
2387                input_cost: 0.0,
2388                output_cost: 0.0,
2389            },
2390        );
2391
2392        let text = response.message.content.as_text().unwrap_or("").to_string();
2393        let mut plan = crate::plan::parse_plan_json(&text, task);
2394
2395        // Enforce max_steps from config
2396        let max_steps = self.config.plan.as_ref().map(|p| p.max_steps).unwrap_or(20);
2397        if plan.steps.len() > max_steps {
2398            plan.steps.truncate(max_steps);
2399        }
2400
2401        plan.status = PlanStatus::PendingReview;
2402        Ok(plan)
2403    }
2404
2405    /// Execute an approved plan step by step.
2406    async fn execute_plan(
2407        &mut self,
2408        plan: &mut crate::plan::ExecutionPlan,
2409    ) -> Result<TaskResult, RustantError> {
2410        use crate::plan::{PlanStatus, StepStatus};
2411
2412        plan.status = PlanStatus::Executing;
2413        let task_id = Uuid::new_v4();
2414
2415        while let Some(step_idx) = plan.next_pending_step() {
2416            plan.current_step = Some(step_idx);
2417            let step = &plan.steps[step_idx];
2418            let step_desc = step.description.clone();
2419            let step_tool = step.tool.clone();
2420            let step_args = step.tool_args.clone();
2421
2422            // Notify step start
2423            self.callback
2424                .on_plan_step_start(step_idx, &plan.steps[step_idx])
2425                .await;
2426            plan.steps[step_idx].status = StepStatus::InProgress;
2427
2428            let result = if let Some(tool_name) = &step_tool {
2429                // If we have a tool and args, execute directly
2430                let args = step_args.unwrap_or(serde_json::json!({}));
2431
2432                self.callback.on_tool_start(tool_name, &args).await;
2433                let start = std::time::Instant::now();
2434                let exec_result = self.execute_tool("plan", tool_name, &args).await;
2435                let duration_ms = start.elapsed().as_millis() as u64;
2436
2437                match exec_result {
2438                    Ok(output) => {
2439                        self.callback
2440                            .on_tool_result(tool_name, &output, duration_ms)
2441                            .await;
2442                        Ok(output.content)
2443                    }
2444                    Err(e) => Err(format!("{}", e)),
2445                }
2446            } else {
2447                // No specific tool — let the LLM handle this step
2448                // by running one Think iteration with the step as context
2449                let step_prompt = format!(
2450                    "Execute plan step {}: {}\n\nPrevious step results are in context.",
2451                    step_idx + 1,
2452                    step_desc
2453                );
2454                self.memory.add_message(Message::user(&step_prompt));
2455
2456                let conversation = self.memory.context_messages();
2457                let tools = Some(self.tool_definitions());
2458                let response = if self.config.llm.use_streaming {
2459                    self.think_streaming(&conversation, tools).await
2460                } else {
2461                    self.brain.think_with_retry(&conversation, tools, 3).await
2462                };
2463
2464                match response {
2465                    Ok(resp) => {
2466                        let text = resp
2467                            .message
2468                            .content
2469                            .as_text()
2470                            .unwrap_or("(no output)")
2471                            .to_string();
2472                        self.callback.on_assistant_message(&text).await;
2473                        self.memory.add_message(resp.message);
2474                        Ok(text)
2475                    }
2476                    Err(e) => Err(format!("{}", e)),
2477                }
2478            };
2479
2480            match result {
2481                Ok(output) => {
2482                    plan.complete_step(step_idx, &output);
2483                }
2484                Err(error) => {
2485                    plan.fail_step(step_idx, &error);
2486                    // Notify step failure
2487                    self.callback
2488                        .on_plan_step_complete(step_idx, &plan.steps[step_idx])
2489                        .await;
2490                    // Stop execution on first failure
2491                    plan.status = PlanStatus::Failed;
2492                    break;
2493                }
2494            }
2495
2496            // Notify step completion
2497            self.callback
2498                .on_plan_step_complete(step_idx, &plan.steps[step_idx])
2499                .await;
2500        }
2501
2502        // Update overall status
2503        if plan.status != PlanStatus::Failed {
2504            let all_done = plan
2505                .steps
2506                .iter()
2507                .all(|s| s.status == StepStatus::Completed || s.status == StepStatus::Skipped);
2508            plan.status = if all_done {
2509                PlanStatus::Completed
2510            } else {
2511                PlanStatus::Failed
2512            };
2513        }
2514
2515        let success = plan.status == PlanStatus::Completed;
2516        let response = plan.progress_summary();
2517
2518        Ok(TaskResult {
2519            task_id,
2520            success,
2521            response,
2522            iterations: plan.steps.len(),
2523            total_usage: *self.brain.total_usage(),
2524            total_cost: *self.brain.total_cost(),
2525        })
2526    }
2527
2528    /// Process a task in plan mode: generate → review → execute.
2529    async fn process_task_with_plan(&mut self, task: &str) -> Result<TaskResult, RustantError> {
2530        use crate::plan::{PlanDecision, PlanStatus};
2531
2532        // 1. Generate the plan
2533        self.state.status = AgentStatus::Planning;
2534        self.callback.on_status_change(AgentStatus::Planning).await;
2535        self.callback.on_plan_generating(task).await;
2536
2537        let mut plan = self.generate_plan(task).await?;
2538
2539        // 2. Handle any clarifications
2540        for question in &plan.clarifications.clone() {
2541            let answer = self.callback.on_clarification_request(question).await;
2542            if !answer.is_empty() {
2543                // Add clarification answer to context for potential re-generation
2544                self.memory
2545                    .add_message(Message::user(format!("Q: {} A: {}", question, answer)));
2546            }
2547        }
2548
2549        // 3. Review loop
2550        loop {
2551            let decision = self.callback.on_plan_review(&plan).await;
2552            match decision {
2553                PlanDecision::Approve => break,
2554                PlanDecision::Reject => {
2555                    plan.status = PlanStatus::Cancelled;
2556                    self.current_plan = Some(plan);
2557                    self.state.complete();
2558                    self.callback.on_status_change(AgentStatus::Complete).await;
2559                    let task_id = self.state.task_id.unwrap_or_else(Uuid::new_v4);
2560                    return Ok(TaskResult {
2561                        task_id,
2562                        success: false,
2563                        response: "Plan rejected by user.".to_string(),
2564                        iterations: 0,
2565                        total_usage: *self.brain.total_usage(),
2566                        total_cost: *self.brain.total_cost(),
2567                    });
2568                }
2569                PlanDecision::EditStep(idx, new_desc) => {
2570                    if let Some(step) = plan.steps.get_mut(idx) {
2571                        step.description = new_desc;
2572                        plan.updated_at = chrono::Utc::now();
2573                    }
2574                }
2575                PlanDecision::RemoveStep(idx) => {
2576                    if idx < plan.steps.len() {
2577                        plan.steps.remove(idx);
2578                        // Re-index remaining steps
2579                        for (i, step) in plan.steps.iter_mut().enumerate() {
2580                            step.index = i;
2581                        }
2582                        plan.updated_at = chrono::Utc::now();
2583                    }
2584                }
2585                PlanDecision::AddStep(idx, desc) => {
2586                    let new_step = crate::plan::PlanStep {
2587                        index: idx,
2588                        description: desc,
2589                        ..Default::default()
2590                    };
2591                    if idx <= plan.steps.len() {
2592                        plan.steps.insert(idx, new_step);
2593                    } else {
2594                        plan.steps.push(new_step);
2595                    }
2596                    // Re-index
2597                    for (i, step) in plan.steps.iter_mut().enumerate() {
2598                        step.index = i;
2599                    }
2600                    plan.updated_at = chrono::Utc::now();
2601                }
2602                PlanDecision::ReorderSteps(new_order) => {
2603                    let old_steps = plan.steps.clone();
2604                    plan.steps.clear();
2605                    for (i, &old_idx) in new_order.iter().enumerate() {
2606                        if let Some(mut step) = old_steps.get(old_idx).cloned() {
2607                            step.index = i;
2608                            plan.steps.push(step);
2609                        }
2610                    }
2611                    plan.updated_at = chrono::Utc::now();
2612                }
2613                PlanDecision::AskQuestion(question) => {
2614                    // Send question to LLM and display the answer
2615                    let messages = vec![
2616                        Message::system("Answer this question about the plan you generated."),
2617                        Message::user(&question),
2618                    ];
2619                    if let Ok(resp) = self.brain.think_with_retry(&messages, None, 1).await {
2620                        if let Some(answer) = resp.message.content.as_text() {
2621                            self.callback.on_assistant_message(answer).await;
2622                        }
2623                    }
2624                }
2625            }
2626        }
2627
2628        // 4. Execute the approved plan
2629        self.current_plan = Some(plan.clone());
2630        let result = self.execute_plan(&mut plan).await?;
2631        self.current_plan = Some(plan);
2632        self.state.complete();
2633        self.callback.on_status_change(AgentStatus::Complete).await;
2634
2635        Ok(result)
2636    }
2637
2638    /// Check if context compression is needed and perform it.
2639    ///
2640    /// Extracted from the agent loop to avoid duplication between the single-ToolCall
2641    /// and MultiPart code paths.
2642    async fn check_and_compress(&mut self) {
2643        if !self.memory.short_term.needs_compression() {
2644            return;
2645        }
2646
2647        debug!("Triggering LLM-based context compression");
2648        let msgs_to_summarize: Vec<crate::types::Message> = self
2649            .memory
2650            .short_term
2651            .messages_to_summarize()
2652            .into_iter()
2653            .cloned()
2654            .collect();
2655        let msgs_count = msgs_to_summarize.len();
2656        let pinned_count = self.memory.short_term.pinned_count();
2657
2658        let (summary_text, was_llm) = match self.summarizer.summarize(&msgs_to_summarize).await {
2659            Ok(result) => {
2660                info!(
2661                    messages_summarized = result.messages_summarized,
2662                    tokens_saved = result.tokens_saved,
2663                    "Context compression via LLM summarization"
2664                );
2665                (result.text, true)
2666            }
2667            Err(e) => {
2668                warn!(
2669                    error = %e,
2670                    "LLM summarization failed, falling back to truncation"
2671                );
2672                let text = crate::summarizer::smart_fallback_summary(&msgs_to_summarize, 500);
2673                (text, false)
2674            }
2675        };
2676
2677        self.memory.short_term.compress(summary_text);
2678
2679        self.callback
2680            .on_context_health(&ContextHealthEvent::Compressed {
2681                messages_compressed: msgs_count,
2682                was_llm_summarized: was_llm,
2683                pinned_preserved: pinned_count,
2684            })
2685            .await;
2686    }
2687
2688    /// Compact the conversation context by summarizing older messages.
2689    /// Returns (messages_before, messages_after).
2690    pub fn compact(&mut self) -> (usize, usize) {
2691        let before = self.memory.short_term.len();
2692        if before <= 2 {
2693            return (before, before);
2694        }
2695        let msgs: Vec<crate::types::Message> =
2696            self.memory.short_term.messages().iter().cloned().collect();
2697        let summary = crate::summarizer::smart_fallback_summary(&msgs, 500);
2698        self.memory.short_term.compress(summary);
2699        let after = self.memory.short_term.len();
2700        (before, after)
2701    }
2702}
2703
2704/// A no-op callback for testing.
2705pub struct NoOpCallback;
2706
2707#[async_trait::async_trait]
2708impl AgentCallback for NoOpCallback {
2709    async fn on_assistant_message(&self, _message: &str) {}
2710    async fn on_token(&self, _token: &str) {}
2711    async fn request_approval(&self, _action: &ActionRequest) -> ApprovalDecision {
2712        ApprovalDecision::Approve // auto-approve in tests
2713    }
2714    async fn on_tool_start(&self, _tool_name: &str, _args: &serde_json::Value) {}
2715    async fn on_tool_result(&self, _tool_name: &str, _output: &ToolOutput, _duration_ms: u64) {}
2716    async fn on_status_change(&self, _status: AgentStatus) {}
2717    async fn on_usage_update(&self, _usage: &TokenUsage, _cost: &CostEstimate) {}
2718    async fn on_decision_explanation(&self, _explanation: &DecisionExplanation) {}
2719}
2720
2721/// A callback that records all events for test assertions.
2722pub struct RecordingCallback {
2723    messages: tokio::sync::Mutex<Vec<String>>,
2724    tool_calls: tokio::sync::Mutex<Vec<String>>,
2725    status_changes: tokio::sync::Mutex<Vec<AgentStatus>>,
2726    explanations: tokio::sync::Mutex<Vec<DecisionExplanation>>,
2727    budget_warnings: tokio::sync::Mutex<Vec<(String, BudgetSeverity)>>,
2728    context_health_events: tokio::sync::Mutex<Vec<ContextHealthEvent>>,
2729}
2730
2731impl RecordingCallback {
2732    pub fn new() -> Self {
2733        Self {
2734            messages: tokio::sync::Mutex::new(Vec::new()),
2735            tool_calls: tokio::sync::Mutex::new(Vec::new()),
2736            status_changes: tokio::sync::Mutex::new(Vec::new()),
2737            explanations: tokio::sync::Mutex::new(Vec::new()),
2738            budget_warnings: tokio::sync::Mutex::new(Vec::new()),
2739            context_health_events: tokio::sync::Mutex::new(Vec::new()),
2740        }
2741    }
2742
2743    pub async fn messages(&self) -> Vec<String> {
2744        self.messages.lock().await.clone()
2745    }
2746
2747    pub async fn tool_calls(&self) -> Vec<String> {
2748        self.tool_calls.lock().await.clone()
2749    }
2750
2751    pub async fn status_changes(&self) -> Vec<AgentStatus> {
2752        self.status_changes.lock().await.clone()
2753    }
2754
2755    pub async fn explanations(&self) -> Vec<DecisionExplanation> {
2756        self.explanations.lock().await.clone()
2757    }
2758
2759    pub async fn budget_warnings(&self) -> Vec<(String, BudgetSeverity)> {
2760        self.budget_warnings.lock().await.clone()
2761    }
2762
2763    pub async fn context_health_events(&self) -> Vec<ContextHealthEvent> {
2764        self.context_health_events.lock().await.clone()
2765    }
2766}
2767
2768impl Default for RecordingCallback {
2769    fn default() -> Self {
2770        Self::new()
2771    }
2772}
2773
2774#[async_trait::async_trait]
2775impl AgentCallback for RecordingCallback {
2776    async fn on_assistant_message(&self, message: &str) {
2777        self.messages.lock().await.push(message.to_string());
2778    }
2779    async fn on_token(&self, _token: &str) {}
2780    async fn request_approval(&self, _action: &ActionRequest) -> ApprovalDecision {
2781        ApprovalDecision::Approve
2782    }
2783    async fn on_tool_start(&self, tool_name: &str, _args: &serde_json::Value) {
2784        self.tool_calls.lock().await.push(tool_name.to_string());
2785    }
2786    async fn on_tool_result(&self, _tool_name: &str, _output: &ToolOutput, _duration_ms: u64) {}
2787    async fn on_status_change(&self, status: AgentStatus) {
2788        self.status_changes.lock().await.push(status);
2789    }
2790    async fn on_usage_update(&self, _usage: &TokenUsage, _cost: &CostEstimate) {}
2791    async fn on_decision_explanation(&self, explanation: &DecisionExplanation) {
2792        self.explanations.lock().await.push(explanation.clone());
2793    }
2794    async fn on_budget_warning(&self, message: &str, severity: BudgetSeverity) {
2795        self.budget_warnings
2796            .lock()
2797            .await
2798            .push((message.to_string(), severity));
2799    }
2800    async fn on_context_health(&self, event: &ContextHealthEvent) {
2801        self.context_health_events.lock().await.push(event.clone());
2802    }
2803}
2804
2805#[cfg(test)]
2806mod tests {
2807    use super::*;
2808    use crate::brain::MockLlmProvider;
2809
2810    fn create_test_agent(provider: Arc<MockLlmProvider>) -> (Agent, Arc<RecordingCallback>) {
2811        let callback = Arc::new(RecordingCallback::new());
2812        let mut config = AgentConfig::default();
2813        // Use non-streaming for deterministic test behavior
2814        config.llm.use_streaming = false;
2815        let agent = Agent::new(provider, config, callback.clone());
2816        (agent, callback)
2817    }
2818
2819    #[tokio::test]
2820    async fn test_agent_simple_text_response() {
2821        let provider = Arc::new(MockLlmProvider::new());
2822        provider.queue_response(MockLlmProvider::text_response("Hello! I can help you."));
2823
2824        let (mut agent, callback) = create_test_agent(provider);
2825        let result = agent.process_task("Say hello").await.unwrap();
2826
2827        assert!(result.success);
2828        assert_eq!(result.response, "Hello! I can help you.");
2829        assert_eq!(result.iterations, 1);
2830
2831        let messages = callback.messages().await;
2832        assert_eq!(messages.len(), 1);
2833        assert_eq!(messages[0], "Hello! I can help you.");
2834    }
2835
2836    #[tokio::test]
2837    async fn test_agent_tool_call_then_response() {
2838        let provider = Arc::new(MockLlmProvider::new());
2839
2840        // First response: tool call
2841        provider.queue_response(MockLlmProvider::tool_call_response(
2842            "echo",
2843            serde_json::json!({"text": "test"}),
2844        ));
2845        // Second response after tool result: text
2846        provider.queue_response(MockLlmProvider::text_response(
2847            "I executed the echo tool successfully.",
2848        ));
2849
2850        let (mut agent, callback) = create_test_agent(provider);
2851
2852        // Register a simple echo tool
2853        agent.register_tool(RegisteredTool {
2854            definition: ToolDefinition {
2855                name: "echo".to_string(),
2856                description: "Echo input text".to_string(),
2857                parameters: serde_json::json!({
2858                    "type": "object",
2859                    "properties": { "text": { "type": "string" } },
2860                    "required": ["text"]
2861                }),
2862            },
2863            risk_level: RiskLevel::ReadOnly,
2864            executor: Box::new(|args: serde_json::Value| {
2865                Box::pin(async move {
2866                    let text = args["text"].as_str().unwrap_or("no text");
2867                    Ok(ToolOutput::text(format!("Echo: {}", text)))
2868                })
2869            }),
2870        });
2871
2872        let result = agent.process_task("Test echo tool").await.unwrap();
2873
2874        assert!(result.success);
2875        assert_eq!(result.iterations, 2);
2876
2877        let tool_calls = callback.tool_calls().await;
2878        assert_eq!(tool_calls.len(), 1);
2879        assert_eq!(tool_calls[0], "echo");
2880    }
2881
2882    #[tokio::test]
2883    async fn test_agent_tool_not_found() {
2884        let provider = Arc::new(MockLlmProvider::new());
2885        provider.queue_response(MockLlmProvider::tool_call_response(
2886            "nonexistent_tool",
2887            serde_json::json!({}),
2888        ));
2889        // After tool error, agent should respond with text
2890        provider.queue_response(MockLlmProvider::text_response(
2891            "Sorry, that tool doesn't exist.",
2892        ));
2893
2894        let (mut agent, _callback) = create_test_agent(provider);
2895        let result = agent.process_task("Use nonexistent tool").await.unwrap();
2896
2897        // Agent should still complete (with the tool error in context)
2898        assert!(result.success);
2899    }
2900
2901    #[tokio::test]
2902    async fn test_agent_state_tracking() {
2903        let provider = Arc::new(MockLlmProvider::new());
2904        provider.queue_response(MockLlmProvider::text_response("Done"));
2905
2906        let (mut agent, callback) = create_test_agent(provider);
2907
2908        assert_eq!(agent.state().status, AgentStatus::Idle);
2909
2910        agent.process_task("Simple task").await.unwrap();
2911
2912        assert_eq!(agent.state().status, AgentStatus::Complete);
2913
2914        let statuses = callback.status_changes().await;
2915        assert!(statuses.contains(&AgentStatus::Thinking));
2916        assert!(statuses.contains(&AgentStatus::Complete));
2917    }
2918
2919    #[tokio::test]
2920    async fn test_agent_max_iterations() {
2921        let provider = Arc::new(MockLlmProvider::new());
2922        // Queue many tool calls to exhaust iterations (more than max_iterations default of 50)
2923        for _ in 0..55 {
2924            provider.queue_response(MockLlmProvider::tool_call_response(
2925                "echo",
2926                serde_json::json!({"text": "loop"}),
2927            ));
2928        }
2929
2930        let (mut agent, _callback) = create_test_agent(provider);
2931        agent.register_tool(RegisteredTool {
2932            definition: ToolDefinition {
2933                name: "echo".to_string(),
2934                description: "Echo".to_string(),
2935                parameters: serde_json::json!({}),
2936            },
2937            risk_level: RiskLevel::ReadOnly,
2938            executor: Box::new(|_| Box::pin(async { Ok(ToolOutput::text("echoed")) })),
2939        });
2940
2941        let result = agent.process_task("Infinite loop test").await;
2942        assert!(result.is_err());
2943        match result.unwrap_err() {
2944            RustantError::Agent(AgentError::MaxIterationsReached { max }) => {
2945                assert_eq!(max, 50);
2946            }
2947            e => panic!("Expected MaxIterationsReached, got: {:?}", e),
2948        }
2949    }
2950
2951    #[tokio::test]
2952    async fn test_agent_cancellation() {
2953        let provider = Arc::new(MockLlmProvider::new());
2954        // Queue a tool call response so the agent enters the loop
2955        provider.queue_response(MockLlmProvider::tool_call_response(
2956            "echo",
2957            serde_json::json!({"text": "test"}),
2958        ));
2959
2960        let (mut agent, _callback) = create_test_agent(provider);
2961        agent.register_tool(RegisteredTool {
2962            definition: ToolDefinition {
2963                name: "echo".to_string(),
2964                description: "Echo".to_string(),
2965                parameters: serde_json::json!({}),
2966            },
2967            risk_level: RiskLevel::ReadOnly,
2968            executor: Box::new(|_| Box::pin(async { Ok(ToolOutput::text("echoed")) })),
2969        });
2970
2971        // Cancel before processing
2972        agent.cancel();
2973        let result = agent.process_task("Cancelled task").await;
2974        assert!(result.is_err());
2975        match result.unwrap_err() {
2976            RustantError::Agent(AgentError::Cancelled) => {}
2977            e => panic!("Expected Cancelled, got: {:?}", e),
2978        }
2979    }
2980
2981    #[test]
2982    fn test_no_op_callback() {
2983        // Just ensure it compiles and doesn't panic
2984        let _callback = NoOpCallback;
2985    }
2986
2987    #[tokio::test]
2988    async fn test_agent_streaming_mode() {
2989        let provider = Arc::new(MockLlmProvider::new());
2990        provider.queue_response(MockLlmProvider::text_response("streaming response"));
2991
2992        let callback = Arc::new(RecordingCallback::new());
2993        let mut config = AgentConfig::default();
2994        config.llm.use_streaming = true;
2995
2996        let mut agent = Agent::new(provider, config, callback.clone());
2997        let result = agent.process_task("Test streaming").await.unwrap();
2998
2999        assert!(result.success);
3000        assert!(result.response.contains("streaming"));
3001        // Streaming should have triggered on_token callbacks
3002        // (MockLlmProvider splits on whitespace)
3003    }
3004
3005    #[tokio::test]
3006    async fn test_recording_callback() {
3007        let callback = RecordingCallback::new();
3008        callback.on_assistant_message("hello").await;
3009        callback
3010            .on_tool_start("file_read", &serde_json::json!({}))
3011            .await;
3012        callback.on_status_change(AgentStatus::Thinking).await;
3013
3014        assert_eq!(callback.messages().await, vec!["hello"]);
3015        assert_eq!(callback.tool_calls().await, vec!["file_read"]);
3016        assert_eq!(callback.status_changes().await, vec![AgentStatus::Thinking]);
3017    }
3018
3019    // --- Gap 1: Explanation emission tests ---
3020
3021    #[tokio::test]
3022    async fn test_recording_callback_records_explanations() {
3023        let callback = RecordingCallback::new();
3024        let explanation = ExplanationBuilder::new(DecisionType::ToolSelection {
3025            selected_tool: "echo".into(),
3026        })
3027        .build();
3028        callback.on_decision_explanation(&explanation).await;
3029
3030        let explanations = callback.explanations().await;
3031        assert_eq!(explanations.len(), 1);
3032        match &explanations[0].decision_type {
3033            DecisionType::ToolSelection { selected_tool } => {
3034                assert_eq!(selected_tool, "echo");
3035            }
3036            other => panic!("Expected ToolSelection, got {:?}", other),
3037        }
3038    }
3039
3040    #[tokio::test]
3041    async fn test_multipart_tool_call_emits_explanation() {
3042        let provider = Arc::new(MockLlmProvider::new());
3043
3044        // First response: multipart (text + tool call)
3045        provider.queue_response(MockLlmProvider::multipart_response(
3046            "I'll echo for you",
3047            "echo",
3048            serde_json::json!({"text": "test"}),
3049        ));
3050        // Second response after tool result: text
3051        provider.queue_response(MockLlmProvider::text_response("Done."));
3052
3053        let (mut agent, callback) = create_test_agent(provider);
3054        agent.register_tool(RegisteredTool {
3055            definition: ToolDefinition {
3056                name: "echo".to_string(),
3057                description: "Echo input text".to_string(),
3058                parameters: serde_json::json!({
3059                    "type": "object",
3060                    "properties": { "text": { "type": "string" } },
3061                    "required": ["text"]
3062                }),
3063            },
3064            risk_level: RiskLevel::ReadOnly,
3065            executor: Box::new(|args: serde_json::Value| {
3066                Box::pin(async move {
3067                    let text = args["text"].as_str().unwrap_or("no text");
3068                    Ok(ToolOutput::text(format!("Echo: {}", text)))
3069                })
3070            }),
3071        });
3072
3073        agent.process_task("Echo test").await.unwrap();
3074
3075        let explanations = callback.explanations().await;
3076        assert!(
3077            !explanations.is_empty(),
3078            "MultiPart tool calls should emit explanations"
3079        );
3080        // Verify the explanation is for the echo tool
3081        let has_echo = explanations.iter().any(|e| {
3082            matches!(&e.decision_type, DecisionType::ToolSelection { selected_tool } if selected_tool == "echo")
3083        });
3084        assert!(has_echo, "Should have explanation for echo tool selection");
3085    }
3086
3087    #[tokio::test]
3088    async fn test_single_tool_call_emits_explanation() {
3089        let provider = Arc::new(MockLlmProvider::new());
3090        provider.queue_response(MockLlmProvider::tool_call_response(
3091            "echo",
3092            serde_json::json!({"text": "hi"}),
3093        ));
3094        provider.queue_response(MockLlmProvider::text_response("Done."));
3095
3096        let (mut agent, callback) = create_test_agent(provider);
3097        agent.register_tool(RegisteredTool {
3098            definition: ToolDefinition {
3099                name: "echo".to_string(),
3100                description: "Echo".to_string(),
3101                parameters: serde_json::json!({}),
3102            },
3103            risk_level: RiskLevel::ReadOnly,
3104            executor: Box::new(|_| Box::pin(async { Ok(ToolOutput::text("echoed")) })),
3105        });
3106
3107        agent.process_task("Echo test").await.unwrap();
3108
3109        let explanations = callback.explanations().await;
3110        assert!(
3111            !explanations.is_empty(),
3112            "Single tool calls should emit explanations"
3113        );
3114    }
3115
3116    #[tokio::test]
3117    async fn test_contract_violation_emits_error_recovery_explanation() {
3118        use crate::safety::{Invariant, Predicate, SafetyContract};
3119
3120        let provider = Arc::new(MockLlmProvider::new());
3121        provider.queue_response(MockLlmProvider::tool_call_response(
3122            "echo",
3123            serde_json::json!({"text": "test"}),
3124        ));
3125        // After the contract violation error, LLM responds with text
3126        provider.queue_response(MockLlmProvider::text_response("OK, I'll skip that."));
3127
3128        let callback = Arc::new(RecordingCallback::new());
3129        let mut config = AgentConfig::default();
3130        config.llm.use_streaming = false;
3131        let mut agent = Agent::new(provider, config, callback.clone());
3132        agent.register_tool(RegisteredTool {
3133            definition: ToolDefinition {
3134                name: "echo".to_string(),
3135                description: "Echo".to_string(),
3136                parameters: serde_json::json!({}),
3137            },
3138            risk_level: RiskLevel::ReadOnly,
3139            executor: Box::new(|_| Box::pin(async { Ok(ToolOutput::text("echoed")) })),
3140        });
3141
3142        // Set a contract that blocks all tools
3143        agent.safety_mut().set_contract(SafetyContract {
3144            name: "deny-all".into(),
3145            invariants: vec![Invariant {
3146                description: "no tools allowed".into(),
3147                predicate: Predicate::AlwaysFalse,
3148            }],
3149            ..Default::default()
3150        });
3151
3152        agent.process_task("Echo test").await.unwrap();
3153
3154        let explanations = callback.explanations().await;
3155        let has_error_recovery = explanations.iter().any(|e| {
3156            matches!(
3157                &e.decision_type,
3158                DecisionType::ErrorRecovery { error, .. } if error.contains("Contract violation")
3159            )
3160        });
3161        assert!(
3162            has_error_recovery,
3163            "Contract violations should emit ErrorRecovery explanations, got: {:?}",
3164            explanations
3165                .iter()
3166                .map(|e| &e.decision_type)
3167                .collect::<Vec<_>>()
3168        );
3169    }
3170
3171    // --- Gap 4: Budget warning tests ---
3172
3173    #[tokio::test]
3174    async fn test_recording_callback_records_budget_warnings() {
3175        let callback = RecordingCallback::new();
3176        callback
3177            .on_budget_warning(
3178                "Session cost at 85% of $1.00 limit",
3179                BudgetSeverity::Warning,
3180            )
3181            .await;
3182        callback
3183            .on_budget_warning("Budget exceeded!", BudgetSeverity::Exceeded)
3184            .await;
3185
3186        let warnings = callback.budget_warnings().await;
3187        assert_eq!(warnings.len(), 2);
3188        assert!(warnings[0].0.contains("85%"));
3189        assert_eq!(warnings[0].1, BudgetSeverity::Warning);
3190        assert_eq!(warnings[1].1, BudgetSeverity::Exceeded);
3191    }
3192
3193    #[test]
3194    fn test_budget_severity_enum() {
3195        assert_ne!(BudgetSeverity::Warning, BudgetSeverity::Exceeded);
3196        assert_eq!(BudgetSeverity::Warning, BudgetSeverity::Warning);
3197    }
3198
3199    // --- Gap 3: ActionDetails parsing tests ---
3200
3201    #[test]
3202    fn test_parse_action_details_file_read() {
3203        let args = serde_json::json!({"path": "src/lib.rs"});
3204        let details = Agent::parse_action_details("file_read", &args);
3205        match details {
3206            ActionDetails::FileRead { path } => {
3207                assert_eq!(path, std::path::PathBuf::from("src/lib.rs"));
3208            }
3209            other => panic!("Expected FileRead, got {:?}", other),
3210        }
3211    }
3212
3213    #[test]
3214    fn test_parse_action_details_file_list() {
3215        let args = serde_json::json!({"path": "src/"});
3216        let details = Agent::parse_action_details("file_list", &args);
3217        assert!(matches!(details, ActionDetails::FileRead { .. }));
3218    }
3219
3220    #[test]
3221    fn test_parse_action_details_file_write() {
3222        let args = serde_json::json!({"path": "x.rs", "content": "hello"});
3223        let details = Agent::parse_action_details("file_write", &args);
3224        match details {
3225            ActionDetails::FileWrite { path, size_bytes } => {
3226                assert_eq!(path, std::path::PathBuf::from("x.rs"));
3227                assert_eq!(size_bytes, 5); // "hello".len()
3228            }
3229            other => panic!("Expected FileWrite, got {:?}", other),
3230        }
3231    }
3232
3233    #[test]
3234    fn test_parse_action_details_shell_exec() {
3235        let args = serde_json::json!({"command": "cargo test"});
3236        let details = Agent::parse_action_details("shell_exec", &args);
3237        match details {
3238            ActionDetails::ShellCommand { command } => {
3239                assert_eq!(command, "cargo test");
3240            }
3241            other => panic!("Expected ShellCommand, got {:?}", other),
3242        }
3243    }
3244
3245    #[test]
3246    fn test_parse_action_details_git_commit() {
3247        let args = serde_json::json!({"message": "fix bug"});
3248        let details = Agent::parse_action_details("git_commit", &args);
3249        match details {
3250            ActionDetails::GitOperation { operation } => {
3251                assert!(
3252                    operation.contains("commit"),
3253                    "Expected 'commit' in '{}'",
3254                    operation
3255                );
3256                assert!(
3257                    operation.contains("fix bug"),
3258                    "Expected 'fix bug' in '{}'",
3259                    operation
3260                );
3261            }
3262            other => panic!("Expected GitOperation, got {:?}", other),
3263        }
3264    }
3265
3266    #[test]
3267    fn test_parse_action_details_git_status() {
3268        let args = serde_json::json!({});
3269        let details = Agent::parse_action_details("git_status", &args);
3270        assert!(matches!(details, ActionDetails::GitOperation { .. }));
3271    }
3272
3273    #[test]
3274    fn test_parse_action_details_unknown_falls_back() {
3275        let args = serde_json::json!({"foo": "bar"});
3276        let details = Agent::parse_action_details("custom_tool", &args);
3277        assert!(matches!(details, ActionDetails::Other { .. }));
3278    }
3279
3280    #[test]
3281    fn test_build_approval_context_file_write_has_reasoning() {
3282        let details = ActionDetails::FileWrite {
3283            path: "test.rs".into(),
3284            size_bytes: 100,
3285        };
3286        let ctx = Agent::build_approval_context("file_write", &details, RiskLevel::Write);
3287        assert!(
3288            ctx.reasoning.is_some(),
3289            "FileWrite should produce reasoning"
3290        );
3291        let reasoning = ctx.reasoning.unwrap();
3292        assert!(
3293            reasoning.contains("100 bytes"),
3294            "Reasoning should mention size: {}",
3295            reasoning
3296        );
3297        assert!(
3298            !ctx.consequences.is_empty(),
3299            "FileWrite should have consequences"
3300        );
3301    }
3302
3303    #[test]
3304    fn test_build_approval_context_shell_command_has_reasoning() {
3305        let details = ActionDetails::ShellCommand {
3306            command: "rm -rf /tmp/test".to_string(),
3307        };
3308        let ctx = Agent::build_approval_context("shell_exec", &details, RiskLevel::Execute);
3309        assert!(ctx.reasoning.is_some());
3310        let reasoning = ctx.reasoning.unwrap();
3311        assert!(reasoning.contains("rm -rf"));
3312    }
3313
3314    // --- Gap 5: Corrections/Facts production tests ---
3315
3316    /// A test callback that denies specific tools but approves all others.
3317    struct SelectiveDenyCallback {
3318        deny_tools: Vec<String>,
3319    }
3320
3321    impl SelectiveDenyCallback {
3322        fn new(deny_tools: Vec<String>) -> Self {
3323            Self { deny_tools }
3324        }
3325    }
3326
3327    #[async_trait::async_trait]
3328    impl AgentCallback for SelectiveDenyCallback {
3329        async fn on_assistant_message(&self, _message: &str) {}
3330        async fn on_token(&self, _token: &str) {}
3331        async fn request_approval(&self, action: &ActionRequest) -> ApprovalDecision {
3332            if self.deny_tools.contains(&action.tool_name) {
3333                ApprovalDecision::Deny
3334            } else {
3335                ApprovalDecision::Approve
3336            }
3337        }
3338        async fn on_tool_start(&self, _tool_name: &str, _args: &serde_json::Value) {}
3339        async fn on_tool_result(&self, _tool_name: &str, _output: &ToolOutput, _duration_ms: u64) {}
3340        async fn on_status_change(&self, _status: AgentStatus) {}
3341        async fn on_usage_update(&self, _usage: &TokenUsage, _cost: &CostEstimate) {}
3342        async fn on_decision_explanation(&self, _explanation: &DecisionExplanation) {}
3343    }
3344
3345    #[tokio::test]
3346    async fn test_successful_tool_execution_records_fact() {
3347        let provider = Arc::new(MockLlmProvider::new());
3348        provider.queue_response(MockLlmProvider::tool_call_response(
3349            "echo",
3350            serde_json::json!({"text": "important finding about the code"}),
3351        ));
3352        provider.queue_response(MockLlmProvider::text_response("Done."));
3353
3354        let (mut agent, _callback) = create_test_agent(provider);
3355        agent.register_tool(RegisteredTool {
3356            definition: ToolDefinition {
3357                name: "echo".to_string(),
3358                description: "Echo text".to_string(),
3359                parameters: serde_json::json!({}),
3360            },
3361            risk_level: RiskLevel::ReadOnly,
3362            executor: Box::new(|args: serde_json::Value| {
3363                Box::pin(async move {
3364                    let text = args["text"].as_str().unwrap_or("no text");
3365                    Ok(ToolOutput::text(format!("Echo: {}", text)))
3366                })
3367            }),
3368        });
3369
3370        agent.process_task("Test echo").await.unwrap();
3371
3372        assert!(
3373            !agent.memory().long_term.facts.is_empty(),
3374            "Successful tool execution should record a fact"
3375        );
3376        let fact = &agent.memory().long_term.facts[0];
3377        assert!(
3378            fact.content.contains("echo"),
3379            "Fact should mention tool name: {}",
3380            fact.content
3381        );
3382        assert!(
3383            fact.tags.contains(&"tool_result".to_string()),
3384            "Fact should have 'tool_result' tag"
3385        );
3386    }
3387
3388    #[tokio::test]
3389    async fn test_short_tool_output_not_recorded() {
3390        let provider = Arc::new(MockLlmProvider::new());
3391        provider.queue_response(MockLlmProvider::tool_call_response(
3392            "echo",
3393            serde_json::json!({"text": "x"}),
3394        ));
3395        provider.queue_response(MockLlmProvider::text_response("Done."));
3396
3397        let (mut agent, _callback) = create_test_agent(provider);
3398        agent.register_tool(RegisteredTool {
3399            definition: ToolDefinition {
3400                name: "echo".to_string(),
3401                description: "Echo".to_string(),
3402                parameters: serde_json::json!({}),
3403            },
3404            risk_level: RiskLevel::ReadOnly,
3405            // Return very short output (< 10 chars)
3406            executor: Box::new(|_| Box::pin(async { Ok(ToolOutput::text("ok")) })),
3407        });
3408
3409        agent.process_task("Test").await.unwrap();
3410
3411        assert!(
3412            agent.memory().long_term.facts.is_empty(),
3413            "Short tool output (<10 chars) should NOT be recorded as fact"
3414        );
3415    }
3416
3417    #[tokio::test]
3418    async fn test_huge_tool_output_not_recorded() {
3419        let provider = Arc::new(MockLlmProvider::new());
3420        provider.queue_response(MockLlmProvider::tool_call_response(
3421            "echo",
3422            serde_json::json!({"text": "x"}),
3423        ));
3424        provider.queue_response(MockLlmProvider::text_response("Done."));
3425
3426        let (mut agent, _callback) = create_test_agent(provider);
3427        let huge = "x".repeat(10_000);
3428        agent.register_tool(RegisteredTool {
3429            definition: ToolDefinition {
3430                name: "echo".to_string(),
3431                description: "Echo".to_string(),
3432                parameters: serde_json::json!({}),
3433            },
3434            risk_level: RiskLevel::ReadOnly,
3435            executor: Box::new(move |_| {
3436                let h = huge.clone();
3437                Box::pin(async move { Ok(ToolOutput::text(h)) })
3438            }),
3439        });
3440
3441        agent.process_task("Test").await.unwrap();
3442
3443        assert!(
3444            agent.memory().long_term.facts.is_empty(),
3445            "Huge tool output (>5000 chars) should NOT be recorded as fact"
3446        );
3447    }
3448
3449    #[tokio::test]
3450    async fn test_user_denial_records_correction() {
3451        let provider = Arc::new(MockLlmProvider::new());
3452        // First: try a write tool (will require approval, gets denied)
3453        provider.queue_response(MockLlmProvider::tool_call_response(
3454            "file_write",
3455            serde_json::json!({"path": "test.rs", "content": "bad code"}),
3456        ));
3457        // After denial error, agent falls back to text
3458        provider.queue_response(MockLlmProvider::text_response("Understood, I won't write."));
3459
3460        let callback = Arc::new(SelectiveDenyCallback::new(vec!["file_write".to_string()]));
3461        let mut config = AgentConfig::default();
3462        config.llm.use_streaming = false;
3463        // Use Paranoid mode so ALL actions require approval
3464        config.safety.approval_mode = crate::config::ApprovalMode::Paranoid;
3465
3466        let mut agent = Agent::new(provider, config, callback);
3467        agent.register_tool(RegisteredTool {
3468            definition: ToolDefinition {
3469                name: "file_write".to_string(),
3470                description: "Write file".to_string(),
3471                parameters: serde_json::json!({}),
3472            },
3473            risk_level: RiskLevel::Write,
3474            executor: Box::new(|_| Box::pin(async { Ok(ToolOutput::text("written")) })),
3475        });
3476
3477        agent.process_task("Write something").await.unwrap();
3478
3479        assert!(
3480            !agent.memory().long_term.corrections.is_empty(),
3481            "User denial should record a correction"
3482        );
3483        let correction = &agent.memory().long_term.corrections[0];
3484        assert!(
3485            correction.original.contains("file_write"),
3486            "Correction original should mention denied tool: {}",
3487            correction.original
3488        );
3489        assert!(
3490            correction.context.contains("denied"),
3491            "Correction context should mention denial: {}",
3492            correction.context
3493        );
3494    }
3495
3496    #[test]
3497    fn test_scheduler_fields_none_when_disabled() {
3498        let provider = Arc::new(MockLlmProvider::new());
3499        let (agent, _) = create_test_agent(provider);
3500        // Default config has no scheduler section, so fields should be None
3501        assert!(agent.cron_scheduler().is_none());
3502    }
3503
3504    #[test]
3505    fn test_save_scheduler_state_noop_when_disabled() {
3506        let provider = Arc::new(MockLlmProvider::new());
3507        let (agent, _) = create_test_agent(provider);
3508        let dir = tempfile::TempDir::new().unwrap();
3509        // Should succeed silently when scheduler is disabled
3510        assert!(agent.save_scheduler_state(dir.path()).is_ok());
3511    }
3512
3513    #[test]
3514    fn test_load_scheduler_state_noop_when_disabled() {
3515        let provider = Arc::new(MockLlmProvider::new());
3516        let (mut agent, _) = create_test_agent(provider);
3517        let dir = tempfile::TempDir::new().unwrap();
3518        // Should not panic when scheduler is disabled
3519        agent.load_scheduler_state(dir.path());
3520        assert!(agent.cron_scheduler().is_none());
3521    }
3522
3523    #[test]
3524    fn test_save_load_scheduler_roundtrip() {
3525        let provider = Arc::new(MockLlmProvider::new());
3526        let callback = Arc::new(RecordingCallback::new());
3527        let mut config = AgentConfig::default();
3528        config.llm.use_streaming = false;
3529        config.scheduler = Some(crate::config::SchedulerConfig {
3530            enabled: true,
3531            cron_jobs: vec![crate::scheduler::CronJobConfig::new(
3532                "test_job",
3533                "0 0 9 * * * *",
3534                "do something",
3535            )],
3536            ..Default::default()
3537        });
3538        let agent = Agent::new(provider.clone(), config, callback);
3539        assert_eq!(agent.cron_scheduler().unwrap().len(), 1);
3540
3541        let dir = tempfile::TempDir::new().unwrap();
3542        agent.save_scheduler_state(dir.path()).unwrap();
3543
3544        // Create a new agent with an empty scheduler and load state
3545        let callback2 = Arc::new(RecordingCallback::new());
3546        let mut config2 = AgentConfig::default();
3547        config2.llm.use_streaming = false;
3548        config2.scheduler = Some(crate::config::SchedulerConfig {
3549            enabled: true,
3550            cron_jobs: vec![],
3551            ..Default::default()
3552        });
3553        let mut agent2 = Agent::new(provider, config2, callback2);
3554        assert_eq!(agent2.cron_scheduler().unwrap().len(), 0);
3555
3556        agent2.load_scheduler_state(dir.path());
3557        assert_eq!(agent2.cron_scheduler().unwrap().len(), 1);
3558    }
3559}
rustant_core/agent.rs

rustant_core/
agent.rs