imp_core/agent/
mod.rs

1use std::path::{Path, PathBuf};
2use std::sync::{Arc, Mutex};
3use std::time::Instant;
4
5use imp_llm::{
6    AssistantMessage, ContentBlock, Message, Model, StopReason as LlmStopReason, ThinkingLevel,
7    Usage,
8};
9#[cfg(test)]
10use imp_llm::{Context, RequestOptions, StreamEvent};
11use tokio::sync::mpsc;
12
13use imp_llm::provider::RetryPolicy;
14
15use crate::config::{AgentMode, Config, ContextConfig, ContinuePolicy};
16use crate::guardrails::{GuardrailConfig, GuardrailProfile};
17use crate::hooks::{HookBackgroundEvent, HookEvent, HookRunner};
18use crate::mana_review::{
19    ManaMutationAction, ManaMutationRecord, ManaReviewScope, ManaReviewScopeKind, ManaReviewState,
20    ManaUnitSnapshot, TurnManaReview, TurnManaReviewAccumulator,
21};
22use crate::policy::RunPolicy;
23use crate::roles::Role;
24use crate::tools::{LuaToolLoader, ToolRegistry};
25use crate::trace::TraceWriter;
26use crate::workflow::WorkflowContract;
27
28mod events;
29mod loop_policy;
30mod loop_state;
31mod mana_loop;
32#[cfg(not(test))]
33pub(crate) use mana_loop::ManaPolicyDecision;
34#[cfg(test)]
35pub(crate) use mana_loop::{evaluate_mana_policy, ManaPolicyDecision};
36mod recovery;
37mod run_loop;
38mod tool_execution;
39
40pub use events::{
41    AgentEvent, RecoveryCheckpoint, RecoveryCheckpointKind, TimingEvent, TimingStage,
42};
43pub use loop_state::{
44    ContinueReason, LoopDecision, PlannedToolCall, RunFinalStatus, StopReason, ToolExecutionMode,
45    ToolPlan, ToolRisk, TurnPhase, TurnState,
46};
47pub use recovery::{
48    IncompleteToolRecovery, IncompleteToolState, RecoveryLedger, RecoveryReconciliation,
49};
50
51/// Commands sent to the agent (from UI or orchestrator).
52#[derive(Debug, Clone)]
53pub enum AgentCommand {
54    Cancel,
55    Steer(String),
56    FollowUp(String),
57}
58
59mod turn_assessment;
60
61use turn_assessment::{
62    ContinueRecommendation, ManaEvidence, PostTurnAssessment, RuntimeEvidence, TextFallbackEvidence,
63};
64pub use turn_assessment::{NextActionAssessment, NextActionDebugView};
65
66/// The core agent — runs the ReAct loop (reason, act, observe).
67pub struct Agent {
68    pub model: Model,
69    pub thinking_level: ThinkingLevel,
70    pub tools: ToolRegistry,
71    pub messages: Vec<Message>,
72    pub system_prompt: String,
73    pub cwd: PathBuf,
74    pub max_tokens: Option<u32>,
75    pub role: Option<Role>,
76    pub hooks: HookRunner,
77    pub api_key: String,
78    /// Optional auth store for automatic OAuth token refresh before LLM calls.
79    /// Optional auth store for automatic OAuth token refresh before LLM calls.
80    pub auth_store: Option<std::sync::Arc<tokio::sync::Mutex<imp_llm::auth::AuthStore>>>,
81    pub ui: Arc<dyn crate::ui::UserInterface>,
82    /// Context management thresholds (wired from Config via AgentBuilder).
83    pub context_config: ContextConfig,
84    /// Retry policy for transient LLM stream failures.
85    pub retry_policy: RetryPolicy,
86    /// Active agent mode — controls which tools are permitted.
87    pub mode: AgentMode,
88    /// Whether the legacy mana reference skill is available as an optional fallback.
89    pub has_mana_skill: bool,
90    /// Whether the legacy mana-basics reference skill is available as an optional fallback.
91    pub has_mana_basics_skill: bool,
92    /// Whether the legacy mana-delegation reference skill is available as an optional fallback.
93    pub has_mana_delegation_skill: bool,
94    /// Engineering guardrails config.
95    pub guardrail_config: GuardrailConfig,
96    /// Resolved guardrail profile (None = disabled).
97    pub guardrail_profile: Option<GuardrailProfile>,
98    /// Cloneable Lua extension tool loader inherited from the session/builder.
99    pub lua_tool_loader: Option<LuaToolLoader>,
100    /// In-session file content cache, shared across tool calls.
101    pub file_cache: Arc<crate::tools::FileCache>,
102    /// Shared checkpoint/file-history state, used to capture destructive edit restore points.
103    pub checkpoint_state: Arc<crate::tools::CheckpointState>,
104    /// Tracks which files have been read; used for staleness and unread-edit warnings.
105    pub file_tracker: Arc<std::sync::Mutex<crate::tools::FileTracker>>,
106    /// Session-local anchors emitted by read and consumed by anchored edit mode.
107    pub anchor_store: Arc<crate::tools::AnchorStore>,
108    /// Max lines the read tool may return before truncating. 0 means unlimited.
109    pub read_max_lines: usize,
110    /// Cache options for LLM requests.
111    pub cache_options: imp_llm::CacheOptions,
112    /// In-memory recovery checkpoints for this run. Session persistence can seed this ledger later.
113    pub recovery_ledger: Arc<std::sync::Mutex<RecoveryLedger>>,
114    /// Tracks identical consecutive tool calls to detect loops.
115    last_tool_call: std::sync::Arc<std::sync::Mutex<Option<RepeatedToolCallState>>>,
116    /// Prevent repeated self-nudges for mana externalization in a single run.
117    queued_mana_externalization_nudge: bool,
118    /// Policy for imp-local visible auto-continuation after high-confidence turns.
119    pub continue_policy: ContinuePolicy,
120    /// Prevent repeated confidence-based auto-continue nudges in a single run.
121    queued_confidence_continue_nudge: bool,
122    /// Prevent repeated execution-debt stop-gate follow-ups in a single run.
123    queued_execution_debt_follow_up: bool,
124    /// Runtime-side turn-scoped between-turn mana review accumulator.
125    turn_mana_review: Arc<std::sync::Mutex<TurnManaReviewAccumulator>>,
126    /// Resolved runtime config for tool-specific policy checks.
127    pub config: Arc<Config>,
128    /// Per-run tool/write policy layered on top of AgentMode.
129    pub run_policy: RunPolicy,
130    /// Verification gates declared for this run.
131    pub verification_gates: Vec<crate::workflow::VerificationGate>,
132
133    /// Lightweight workflow contract for this agent run. Initially informational; future runtime layers use it for policy, verification, and evidence.
134    pub workflow_contract: WorkflowContract,
135    /// Active trace writer for the current run artifact, if artifact creation succeeded.
136    trace_writer: Arc<Mutex<Option<TraceWriter>>>,
137    /// Active run artifact id for trace correlation.
138    run_id: Arc<Mutex<Option<String>>>,
139
140    event_tx: mpsc::Sender<AgentEvent>,
141    command_tx: mpsc::Sender<AgentCommand>,
142    command_rx: mpsc::Receiver<AgentCommand>,
143    cancel_token: Arc<std::sync::atomic::AtomicBool>,
144}
145
146/// Handle for controlling the agent from outside.
147pub struct AgentHandle {
148    pub event_rx: mpsc::Receiver<AgentEvent>,
149    pub command_tx: mpsc::Sender<AgentCommand>,
150    pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
151}
152
153#[derive(Debug, Clone)]
154struct RepeatedToolCallState {
155    tool_name: String,
156    args_json: String,
157    consecutive: usize,
158}
159
160#[derive(Debug, Clone)]
161enum RepeatedToolCallCheck {
162    Ok,
163    Warn(String),
164    Block(imp_llm::ToolResultMessage),
165}
166
167impl Agent {
168    pub fn new(model: Model, cwd: PathBuf) -> (Self, AgentHandle) {
169        let (event_tx, event_rx) = mpsc::channel(256);
170        let (command_tx, command_rx) = mpsc::channel(32);
171        let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
172        let mut hooks = HookRunner::new();
173        let background_event_tx = event_tx.clone();
174        hooks.set_background_reporter(Arc::new(move |event: HookBackgroundEvent| {
175            let background_event_tx = background_event_tx.clone();
176            tokio::spawn(async move {
177                let _ = background_event_tx
178                    .send(AgentEvent::Warning {
179                        message: event.to_string(),
180                    })
181                    .await;
182            });
183        }));
184
185        let agent = Self {
186            model,
187            thinking_level: ThinkingLevel::Medium,
188            tools: ToolRegistry::new(),
189            messages: Vec::new(),
190            system_prompt: String::new(),
191            cwd: cwd.clone(),
192            max_tokens: None,
193            role: None,
194            hooks,
195            api_key: String::new(),
196            ui: Arc::new(crate::ui::NullInterface),
197            context_config: ContextConfig::default(),
198            retry_policy: RetryPolicy::default(),
199            mode: AgentMode::Full,
200            has_mana_skill: false,
201            has_mana_basics_skill: false,
202            has_mana_delegation_skill: false,
203            guardrail_config: GuardrailConfig::default(),
204            guardrail_profile: None,
205            file_cache: Arc::new(crate::tools::FileCache::new()),
206            checkpoint_state: Arc::new(crate::tools::CheckpointState::new()),
207            file_tracker: Arc::new(std::sync::Mutex::new(crate::tools::FileTracker::new())),
208            anchor_store: Arc::new(crate::tools::AnchorStore::new()),
209            read_max_lines: 500,
210            auth_store: None,
211            cache_options: imp_llm::CacheOptions {
212                cache_system_prompt: true,
213                cache_tools: true,
214                cache_recent_turns: 2,
215                extended_ttl: false,
216                global_scope: false,
217            },
218            recovery_ledger: Arc::new(std::sync::Mutex::new(RecoveryLedger::new())),
219            last_tool_call: Arc::new(std::sync::Mutex::new(None)),
220            queued_mana_externalization_nudge: false,
221            continue_policy: ContinuePolicy::Disabled,
222            queued_confidence_continue_nudge: false,
223            queued_execution_debt_follow_up: false,
224            turn_mana_review: Arc::new(std::sync::Mutex::new(TurnManaReviewAccumulator::default())),
225            config: Arc::new(Config::default()),
226            run_policy: RunPolicy::default(),
227            workflow_contract: WorkflowContract::implicit_from(
228                crate::workflow::ImplicitWorkflowContractInput::prompt("").cwd(&cwd),
229            ),
230            verification_gates: Vec::new(),
231            trace_writer: Arc::new(Mutex::new(None)),
232            run_id: Arc::new(Mutex::new(None)),
233            lua_tool_loader: None,
234
235            event_tx,
236            command_tx: command_tx.clone(),
237            command_rx,
238            cancel_token: Arc::clone(&cancel_token),
239        };
240
241        let handle = AgentHandle {
242            event_rx,
243            command_tx,
244            cancel_token,
245        };
246
247        (agent, handle)
248    }
249
250    fn assess_post_turn(
251        &self,
252        message: &AssistantMessage,
253        tool_results: &[imp_llm::ToolResultMessage],
254        _used_tools: bool,
255        mana_review: &TurnManaReview,
256    ) -> PostTurnAssessment {
257        let repeated_action = tool_results_indicate_repeated_action(tool_results);
258        let runtime_execution_stop_reason =
259            tool_results_indicate_execution_blocker(tool_results, self.mode);
260        let work_completed = tool_results_indicate_work_completed(tool_results, self.mode);
261        let execution_debt = tool_results_indicate_execution_debt(tool_results, self.mode);
262        let execution_evidence = tool_results_indicate_execution_evidence(tool_results, self.mode);
263        let planning_only_progress = execution_debt && !execution_evidence;
264        let mana_stop_reason = mana_review_stop_reason(mana_review, self.mode);
265        let planner_text_stop_reason = planner_stop_reason(message, self.mode);
266        let execution_text_stop_reason = execution_stop_reason(message, self.mode);
267
268        let continue_recommendation = if should_queue_mana_externalization_follow_up(
269            message,
270            self.mode,
271            self.has_mana_skill,
272            self.queued_mana_externalization_nudge,
273        ) {
274            Some(ContinueRecommendation {
275                prompt: mana_externalization_follow_up_text().to_string(),
276                reason: ContinueReason::ExternalizationNeeded,
277            })
278        } else if !matches!(self.mode, AgentMode::Planner)
279            && should_queue_execution_debt_follow_up(
280                execution_debt,
281                execution_evidence,
282                self.queued_execution_debt_follow_up,
283                !assistant_message_text(message).trim().is_empty(),
284            )
285        {
286            Some(ContinueRecommendation {
287                prompt: execution_debt_follow_up_text().to_string(),
288                reason: ContinueReason::ExecutionDebt,
289            })
290        } else if should_queue_confidence_continue_follow_up(
291            message,
292            self.mode,
293            self.continue_policy,
294            self.queued_confidence_continue_nudge,
295        ) {
296            Some(ContinueRecommendation {
297                prompt: confidence_continue_follow_up_text().to_string(),
298                reason: ContinueReason::HighConfidenceVisibleNextStep,
299            })
300        } else {
301            None
302        };
303
304        PostTurnAssessment {
305            runtime: RuntimeEvidence {
306                repeated_action,
307                execution_stop_reason: runtime_execution_stop_reason,
308                work_completed,
309                execution_debt,
310                execution_evidence,
311                planning_only_progress,
312            },
313            mana: ManaEvidence {
314                stop_reason: mana_stop_reason,
315            },
316            text_fallback: TextFallbackEvidence {
317                planner_stop_reason: planner_text_stop_reason,
318                execution_stop_reason: execution_text_stop_reason,
319            },
320            continue_recommendation,
321        }
322    }
323
324    fn mark_continue_reason(&mut self, reason: ContinueReason) {
325        match reason {
326            ContinueReason::ExternalizationNeeded => {
327                self.queued_mana_externalization_nudge = true;
328            }
329            ContinueReason::HighConfidenceVisibleNextStep => {
330                self.queued_confidence_continue_nudge = true;
331            }
332            ContinueReason::ExecutionDebt => {
333                self.queued_execution_debt_follow_up = true;
334            }
335            ContinueReason::ToolResultsNeedInterpretation
336            | ContinueReason::QueuedUserFollowUp
337            | ContinueReason::RecoveryContinuation => {}
338        }
339    }
340
341    pub(crate) async fn emit(&self, event: AgentEvent) {
342        // Fire corresponding hooks for lifecycle events
343        match &event {
344            AgentEvent::AgentEnd { .. } => {
345                self.hooks
346                    .fire(&HookEvent::OnAgentEnd {
347                        messages: &self.messages,
348                    })
349                    .await;
350            }
351            AgentEvent::TurnEnd { index, message, .. } => {
352                self.hooks
353                    .fire(&HookEvent::OnTurnEnd {
354                        index: *index,
355                        message,
356                    })
357                    .await;
358            }
359            _ => {}
360        }
361        self.write_trace_event(&event);
362        let _ = self.event_tx.send(event).await;
363    }
364
365    fn write_trace_event(&self, event: &AgentEvent) {
366        let Some(run_id) = self.run_id.lock().ok().and_then(|run_id| run_id.clone()) else {
367            return;
368        };
369        let mut trace_event = event.to_trace_event(run_id);
370        if let Some(workflow_id) = self
371            .workflow_contract
372            .id
373            .as_ref()
374            .or(self.workflow_contract.mana_unit_ref.as_ref())
375        {
376            trace_event = trace_event.with_workflow_id(workflow_id.clone());
377        }
378        if let Ok(mut writer) = self.trace_writer.lock() {
379            if let Some(writer) = writer.as_mut() {
380                let _ = writer.write_event(trace_event);
381                let _ = writer.flush();
382            }
383        }
384    }
385
386    async fn emit_timing(
387        &self,
388        turn: u32,
389        stage: TimingStage,
390        turn_started_at: Instant,
391        llm_request_started_at: Option<Instant>,
392    ) {
393        self.emit_timing_with_details(TimingEvent::new(
394            turn,
395            stage,
396            turn_started_at,
397            llm_request_started_at,
398        ))
399        .await;
400    }
401
402    async fn emit_timing_with_details(&self, timing: TimingEvent) {
403        self.write_trace_event(&AgentEvent::Timing {
404            timing: timing.clone(),
405        });
406        let _ = self.event_tx.send(AgentEvent::Timing { timing }).await;
407    }
408
409    pub async fn emit_recovery_checkpoint(&self, checkpoint: RecoveryCheckpoint) {
410        if let Ok(mut ledger) = self.recovery_ledger.lock() {
411            ledger.record(checkpoint.clone());
412        }
413        self.write_trace_event(&AgentEvent::RecoveryCheckpoint {
414            checkpoint: checkpoint.clone(),
415        });
416        let _ = self
417            .event_tx
418            .send(AgentEvent::RecoveryCheckpoint { checkpoint })
419            .await;
420    }
421
422    fn recovery_checkpoint(
423        turn: u32,
424        kind: RecoveryCheckpointKind,
425        tool_call_id: Option<String>,
426        tool_name: Option<String>,
427        args_hash: Option<String>,
428        success: Option<bool>,
429        error_class: Option<String>,
430    ) -> RecoveryCheckpoint {
431        RecoveryCheckpoint {
432            version: 1,
433            turn,
434            kind,
435            tool_call_id,
436            tool_name,
437            args_hash,
438            success,
439            error_class,
440            timestamp: imp_llm::now(),
441        }
442    }
443
444    fn tool_args_hash(args: &serde_json::Value) -> String {
445        format!("{:016x}", crate::tools::stable_hash(args.to_string()))
446    }
447
448    fn finish_turn_mana_review(&self, turn: u32) -> TurnManaReview {
449        match self.turn_mana_review.lock() {
450            Ok(review) => {
451                let review = review.finalize();
452                if review.turn_index == turn {
453                    review
454                } else {
455                    TurnManaReview::no_change(turn)
456                }
457            }
458            Err(_) => TurnManaReview::no_change(turn),
459        }
460    }
461}
462fn push_stream_text_block(content: &mut Vec<ContentBlock>, text: String) {
463    if text.is_empty() {
464        return;
465    }
466
467    if let Some(ContentBlock::Text { text: existing }) = content.last_mut() {
468        existing.push_str(&text);
469    } else {
470        content.push(ContentBlock::Text { text });
471    }
472}
473
474fn push_stream_thinking_block(content: &mut Vec<ContentBlock>, text: String) {
475    if text.is_empty() {
476        return;
477    }
478
479    if let Some(ContentBlock::Thinking { text: existing }) = content.last_mut() {
480        existing.push_str(&text);
481    } else {
482        content.push(ContentBlock::Thinking { text });
483    }
484}
485
486fn assistant_message_text(message: &AssistantMessage) -> String {
487    message
488        .content
489        .iter()
490        .filter_map(|block| match block {
491            ContentBlock::Text { text } => Some(text.as_str()),
492            _ => None,
493        })
494        .collect::<Vec<_>>()
495        .join("\n")
496}
497
498fn assistant_message_contains_mana_tool_call(message: &AssistantMessage) -> bool {
499    message.content.iter().any(|block| match block {
500        ContentBlock::ToolCall { name, .. } => name == "mana",
501        _ => false,
502    })
503}
504
505fn should_queue_execution_debt_follow_up(
506    execution_debt: bool,
507    execution_evidence: bool,
508    already_queued: bool,
509    assistant_finalized: bool,
510) -> bool {
511    execution_debt && !execution_evidence && !already_queued && assistant_finalized
512}
513
514fn should_queue_mana_externalization_follow_up(
515    message: &AssistantMessage,
516    mode: AgentMode,
517    _has_mana_skill: bool,
518    already_queued: bool,
519) -> bool {
520    if already_queued {
521        return false;
522    }
523
524    if !matches!(
525        mode,
526        AgentMode::Full | AgentMode::Planner | AgentMode::Orchestrator
527    ) {
528        return false;
529    }
530
531    if assistant_message_contains_mana_tool_call(message) {
532        return false;
533    }
534
535    let text = assistant_message_text(message);
536    durable_mana_externalization_signal(&text)
537}
538
539fn durable_mana_externalization_signal(text: &str) -> bool {
540    let lower = text.to_ascii_lowercase();
541    let durable_state_signal = [
542        "acceptance",
543        "architecture decision",
544        "blocker",
545        "blocked by",
546        "dependency",
547        "dependencies",
548        "durable",
549        "follow-up work",
550        "handoff",
551        "migration",
552        "orchestration",
553        "parallel",
554        "phase 1",
555        "phase 2",
556        "verify gate",
557        "worker",
558        "workers",
559    ]
560    .iter()
561    .any(|needle| lower.contains(needle));
562
563    let explicit_mana_signal = [
564        "create mana",
565        "create a mana",
566        "externalize",
567        "mana unit",
568        "mana units",
569        "record this",
570        "save this plan",
571        "split this into units",
572        "turn this into mana",
573    ]
574    .iter()
575    .any(|needle| lower.contains(needle));
576
577    durable_state_signal || explicit_mana_signal
578}
579
580fn mana_externalization_follow_up_text() -> &'static str {
581    "Before you continue: externalize the durable plan or decomposition you just described into mana now. Create or update the relevant unit(s) with native mana actions, prefer root scope for cross-project work, and avoid extra chat restatement when the mana tool/UI already makes the delta obvious."
582}
583
584fn should_queue_confidence_continue_follow_up(
585    message: &AssistantMessage,
586    mode: AgentMode,
587    continue_policy: ContinuePolicy,
588    already_queued: bool,
589) -> bool {
590    if already_queued || matches!(continue_policy, ContinuePolicy::Disabled) {
591        return false;
592    }
593
594    if !matches!(
595        mode,
596        AgentMode::Full | AgentMode::Planner | AgentMode::Orchestrator
597    ) {
598        return false;
599    }
600
601    if !assistant_message_contains_mana_tool_call(message) {
602        return false;
603    }
604
605    let text = assistant_message_text(message);
606    if text.trim().is_empty() {
607        return false;
608    }
609
610    let lower = text.to_ascii_lowercase();
611    let positive_signal = [
612        "done",
613        "completed",
614        "finished",
615        "updated",
616        "created",
617        "next",
618        "continue",
619        "proceed",
620        "follow-up",
621        "follow up",
622    ]
623    .iter()
624    .filter(|needle| lower.contains(**needle))
625    .count();
626
627    let blocker_signal = [
628        "blocked",
629        "unclear",
630        "need your input",
631        "which should",
632        "approval",
633    ]
634    .iter()
635    .any(|needle| lower.contains(needle));
636
637    if blocker_signal {
638        return false;
639    }
640
641    let threshold = match continue_policy {
642        ContinuePolicy::Disabled => return false,
643        ContinuePolicy::Conservative => 3,
644        ContinuePolicy::Balanced => 2,
645        ContinuePolicy::Aggressive => 1,
646    };
647
648    positive_signal >= threshold
649}
650
651fn confidence_continue_follow_up_text() -> &'static str {
652    "Confidence is high and the mana delta is already visible. Continue to the next small, well-bounded step now using native mana-backed workflow, unless a consequential decision or blocker appears. Do not re-summarize the same visible mana change in chat unless new context needs to be called out."
653}
654
655fn execution_debt_follow_up_text() -> &'static str {
656    "You have recorded or planned work, but the requested outcome is not satisfied yet. Continue working until the user's requested outcome is satisfied, or until concrete evidence shows it cannot be completed. Do not stop merely because you recorded a plan, updated mana, or completed one intermediate step."
657}
658
659fn mana_result_action(result: &imp_llm::ToolResultMessage) -> Option<&str> {
660    if result.tool_name != "mana" {
661        return None;
662    }
663
664    result
665        .details
666        .get("action")
667        .and_then(|value| value.as_str())
668        .or_else(|| {
669            result
670                .details
671                .get("mana_loop_policy")
672                .and_then(|policy| policy.get("action"))
673                .and_then(|value| value.as_str())
674        })
675}
676
677fn mana_review_scope_from_result(result: &imp_llm::ToolResultMessage) -> ManaReviewScope {
678    let display = result
679        .details
680        .get("path")
681        .or_else(|| result.details.get("mana_dir"))
682        .and_then(|value| value.as_str())
683        .unwrap_or("auto")
684        .to_string();
685
686    ManaReviewScope {
687        kind: if display == "auto" {
688            ManaReviewScopeKind::None
689        } else {
690            ManaReviewScopeKind::ExplicitPath
691        },
692        display,
693    }
694}
695
696fn unit_snapshot_from_value(value: &serde_json::Value) -> Option<ManaUnitSnapshot> {
697    serde_json::from_value(value.clone()).ok()
698}
699
700fn unit_snapshot_from_result(result: &imp_llm::ToolResultMessage) -> Option<ManaUnitSnapshot> {
701    result
702        .details
703        .get("unit")
704        .and_then(unit_snapshot_from_value)
705}
706
707fn mana_mutation_action(action: &str) -> Option<ManaMutationAction> {
708    match action {
709        "create" => Some(ManaMutationAction::Create),
710        "close" => Some(ManaMutationAction::Close),
711        "update" => Some(ManaMutationAction::Update),
712        "notes_append" => Some(ManaMutationAction::NotesAppend),
713        "decision_add" => Some(ManaMutationAction::DecisionAdd),
714        "decision_resolve" => Some(ManaMutationAction::DecisionResolve),
715        "reopen" => Some(ManaMutationAction::Reopen),
716        "fail" => Some(ManaMutationAction::Fail),
717        "delete" => Some(ManaMutationAction::Delete),
718        "dep_add" => Some(ManaMutationAction::DepAdd),
719        "dep_remove" => Some(ManaMutationAction::DepRemove),
720        "fact_create" => Some(ManaMutationAction::FactCreate),
721        _ => None,
722    }
723}
724
725fn mutation_record_from_mana_result(
726    result: &imp_llm::ToolResultMessage,
727) -> Option<ManaMutationRecord> {
728    if result.is_error || result.tool_name != "mana" {
729        return None;
730    }
731
732    let action_name = mana_result_action(result)?;
733    let action = mana_mutation_action(action_name)?;
734    let after_unit = unit_snapshot_from_result(result);
735    let deleted_unit = if action == ManaMutationAction::Delete {
736        let id = result.details.get("id")?.as_str()?.to_string();
737        let title = result
738            .details
739            .get("title")
740            .and_then(|value| value.as_str())
741            .unwrap_or(&id)
742            .to_string();
743        Some(crate::mana_review::ManaUnitRef::new(id, title, None))
744    } else {
745        None
746    };
747
748    if after_unit.is_none()
749        && deleted_unit.is_none()
750        && !matches!(
751            action,
752            ManaMutationAction::DepAdd | ManaMutationAction::DepRemove
753        )
754    {
755        return None;
756    }
757
758    Some(ManaMutationRecord {
759        action,
760        scope: mana_review_scope_from_result(result),
761        before_unit: None,
762        after_unit,
763        deleted_unit,
764        parent_unit: None,
765        related_unit: None,
766        field_changes: Vec::new(),
767        notes_appended: Vec::new(),
768        decision_events: Vec::new(),
769    })
770}
771
772fn record_mana_mutation_results(
773    turn_mana_review: &std::sync::Arc<std::sync::Mutex<TurnManaReviewAccumulator>>,
774    tool_results: &[imp_llm::ToolResultMessage],
775) {
776    let Ok(mut review) = turn_mana_review.lock() else {
777        return;
778    };
779
780    for result in tool_results {
781        if let Some(record) = mutation_record_from_mana_result(result) {
782            review.push(record);
783        }
784    }
785}
786
787fn tool_results_indicate_repeated_action(tool_results: &[imp_llm::ToolResultMessage]) -> bool {
788    tool_results.iter().any(|result| {
789        result.is_error
790            && result.content.iter().any(|block| match block {
791                ContentBlock::Text { text } => {
792                    text.contains("Blocked: identical tool call repeated")
793                }
794                _ => false,
795            })
796    })
797}
798
799fn tool_results_indicate_execution_blocker(
800    tool_results: &[imp_llm::ToolResultMessage],
801    mode: AgentMode,
802) -> Option<StopReason> {
803    if !matches!(
804        mode,
805        AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
806    ) {
807        return None;
808    }
809
810    let saw_edit_like_success = tool_results.iter().any(|result| {
811        !result.is_error && matches!(result.tool_name.as_str(), "write" | "edit" | "multi_edit")
812    });
813
814    for result in tool_results {
815        let action = result.details.get("action").and_then(|v| v.as_str());
816
817        if action == Some("verify")
818            && result.details.get("passed").and_then(|v| v.as_bool()) == Some(false)
819        {
820            return Some(StopReason::ExecutionBlocked);
821        }
822
823        if result.tool_name == "ask_user" && !result.is_error {
824            return Some(StopReason::UserBlocker);
825        }
826
827        if result.tool_name == "bash" || result.tool_name == "shell" {
828            let exit_code = result.details.get("exit_code").and_then(|v| v.as_i64());
829            let timed_out = result.details.get("timed_out").and_then(|v| v.as_bool()) == Some(true);
830            let cancelled = result.details.get("cancelled").and_then(|v| v.as_bool()) == Some(true);
831            let command = result
832                .details
833                .get("command")
834                .and_then(|v| v.as_str())
835                .unwrap_or("")
836                .to_ascii_lowercase();
837            let looks_like_check = command.contains("check")
838                || command.contains("test")
839                || command.contains("verify")
840                || command.contains("pytest")
841                || command.contains("cargo test")
842                || command.contains("cargo check");
843
844            if looks_like_check
845                && (timed_out || cancelled || exit_code.is_some_and(|code| code != 0))
846            {
847                return Some(StopReason::ExecutionBlocked);
848            }
849
850            if saw_edit_like_success
851                && (timed_out || cancelled || exit_code.is_some_and(|code| code != 0))
852            {
853                return Some(StopReason::ExecutionBlocked);
854            }
855        }
856    }
857
858    None
859}
860
861fn tool_results_indicate_execution_debt(
862    tool_results: &[imp_llm::ToolResultMessage],
863    mode: AgentMode,
864) -> bool {
865    if !matches!(
866        mode,
867        AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
868    ) {
869        return false;
870    }
871
872    tool_results.iter().any(|result| {
873        !result.is_error
874            && result.tool_name == "mana"
875            && result
876                .details
877                .get("action")
878                .and_then(|v| v.as_str())
879                .is_some_and(|action| {
880                    matches!(
881                        mana_loop::classify_mana_action(action),
882                        mana_loop::ManaActionClass::ProgressCheckpoint
883                            | mana_loop::ManaActionClass::GraphMutation
884                            | mana_loop::ManaActionClass::DecisionFact
885                    )
886                })
887    })
888}
889
890fn tool_results_indicate_execution_evidence(
891    tool_results: &[imp_llm::ToolResultMessage],
892    mode: AgentMode,
893) -> bool {
894    if !matches!(
895        mode,
896        AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
897    ) {
898        return false;
899    }
900
901    tool_results.iter().any(|result| {
902        if result.is_error {
903            return false;
904        }
905
906        match result.tool_name.as_str() {
907            "write" | "edit" | "multi_edit" | "openrouter_secret_run" => true,
908            "bash" | "shell" => true,
909            "mana" => result
910                .details
911                .get("action")
912                .and_then(|v| v.as_str())
913                .is_some_and(|action| {
914                    matches!(
915                        mana_loop::classify_mana_action(action),
916                        mana_loop::ManaActionClass::Lifecycle
917                            | mana_loop::ManaActionClass::Orchestration
918                    )
919                }),
920            _ => false,
921        }
922    })
923}
924
925fn tool_results_indicate_work_completed(
926    tool_results: &[imp_llm::ToolResultMessage],
927    mode: AgentMode,
928) -> bool {
929    if !matches!(
930        mode,
931        AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
932    ) {
933        return false;
934    }
935
936    let mut saw_edit_like_success = false;
937    let mut saw_successful_check = false;
938
939    for result in tool_results {
940        if result.is_error {
941            continue;
942        }
943
944        if matches!(result.tool_name.as_str(), "write" | "edit" | "multi_edit") {
945            saw_edit_like_success = true;
946        }
947
948        let action = result.details.get("action").and_then(|v| v.as_str());
949        let has_closed_unit = result
950            .details
951            .get("unit")
952            .and_then(|unit| unit.get("status"))
953            .and_then(|v| v.as_str())
954            == Some("closed");
955
956        if let Some(command) = result.details.get("command").and_then(|v| v.as_str()) {
957            let exit_code_ok = result.details.get("exit_code").and_then(|v| v.as_i64()) == Some(0);
958            let command_lower = command.to_ascii_lowercase();
959            let looks_like_check = command_lower.contains("check")
960                || command_lower.contains("test")
961                || command_lower.contains("verify")
962                || command_lower.contains("pytest")
963                || command_lower.contains("cargo test")
964                || command_lower.contains("cargo check");
965            if exit_code_ok && looks_like_check {
966                saw_successful_check = true;
967            }
968        }
969
970        match action {
971            Some("close") => return true,
972            Some("verify")
973                if result.details.get("passed").and_then(|v| v.as_bool()) == Some(true) =>
974            {
975                return true;
976            }
977            _ if has_closed_unit => return true,
978            _ => {}
979        }
980    }
981
982    saw_edit_like_success && saw_successful_check
983}
984
985fn mana_review_stop_reason(mana_review: &TurnManaReview, mode: AgentMode) -> Option<StopReason> {
986    match mana_review.state {
987        ManaReviewState::NeedsDecision => Some(StopReason::UserBlocker),
988        ManaReviewState::Changed if matches!(mode, AgentMode::Planner) => {
989            if !mana_review.proposed_children.is_empty()
990                || !mana_review.touched_units.is_empty()
991                || !mana_review.material_field_changes.is_empty()
992                || !mana_review.notes_appended.is_empty()
993                || !mana_review.decision_events.is_empty()
994            {
995                Some(StopReason::DecompositionCompleted)
996            } else {
997                None
998            }
999        }
1000        _ => None,
1001    }
1002}
1003
1004fn planner_stop_reason(message: &AssistantMessage, mode: AgentMode) -> Option<StopReason> {
1005    if !matches!(mode, AgentMode::Planner) {
1006        return None;
1007    }
1008
1009    classify_stop_reason_from_text(message, true)
1010}
1011
1012fn execution_stop_reason(message: &AssistantMessage, mode: AgentMode) -> Option<StopReason> {
1013    if !matches!(
1014        mode,
1015        AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
1016    ) {
1017        return None;
1018    }
1019
1020    match classify_stop_reason_from_text(message, false) {
1021        Some(reason @ (StopReason::UserBlocker | StopReason::WorkCompleted)) => Some(reason),
1022        _ => None,
1023    }
1024}
1025
1026fn classify_stop_reason_from_text(
1027    message: &AssistantMessage,
1028    planner_mode: bool,
1029) -> Option<StopReason> {
1030    let text = assistant_message_text(message);
1031    if text.trim().is_empty() {
1032        return None;
1033    }
1034
1035    let lower = text.to_ascii_lowercase();
1036
1037    let blocker_signal = [
1038        "blocked",
1039        "need your input",
1040        "which should",
1041        "waiting on you",
1042        "approval",
1043        "before i continue",
1044        "before continuing",
1045    ]
1046    .iter()
1047    .any(|needle| lower.contains(needle));
1048    if blocker_signal {
1049        return Some(StopReason::UserBlocker);
1050    }
1051
1052    if planner_mode {
1053        let decomposition_complete_signal = [
1054            "externalized into mana",
1055            "created the units",
1056            "created child units",
1057            "decomposition is complete",
1058            "plan is complete",
1059            "ready for handoff",
1060        ]
1061        .iter()
1062        .any(|needle| lower.contains(needle));
1063        if decomposition_complete_signal {
1064            return Some(StopReason::DecompositionCompleted);
1065        }
1066    } else {
1067        let work_complete_signal = [
1068            "all done",
1069            "done",
1070            "completed",
1071            "finished",
1072            "implemented",
1073            "fixed",
1074            "handled",
1075        ]
1076        .iter()
1077        .any(|needle| lower.contains(needle));
1078        if work_complete_signal {
1079            return Some(StopReason::WorkCompleted);
1080        }
1081    }
1082
1083    None
1084}
1085
1086/// Build an AssistantMessage from accumulated stream parts while preserving
1087/// the original block order emitted by the model.
1088fn build_assistant_message(
1089    content: &[ContentBlock],
1090    tool_calls: &[(String, String, serde_json::Value)],
1091    usage: Option<Usage>,
1092) -> AssistantMessage {
1093    let stop_reason = if tool_calls.is_empty() {
1094        LlmStopReason::EndTurn
1095    } else {
1096        LlmStopReason::ToolUse
1097    };
1098
1099    AssistantMessage {
1100        content: content.to_vec(),
1101        usage,
1102        stop_reason,
1103        timestamp: imp_llm::now(),
1104    }
1105}
1106
1107fn clone_model(model: &Model) -> Model {
1108    Model {
1109        meta: model.meta.clone(),
1110        provider: Arc::clone(&model.provider),
1111    }
1112}
1113
1114fn extract_file_path(cwd: &Path, args: &serde_json::Value) -> Option<PathBuf> {
1115    let raw_path = args.get("path")?.as_str()?;
1116    if raw_path.is_empty() {
1117        return None;
1118    }
1119
1120    let path = PathBuf::from(raw_path);
1121    if path.is_absolute() {
1122        Some(path)
1123    } else {
1124        Some(cwd.join(path))
1125    }
1126}
1127
1128fn mana_bash_equivalent_hint(command: &str) -> Option<&'static str> {
1129    let trimmed = command.trim();
1130    let rest = trimmed.strip_prefix("mana")?;
1131    if rest.chars().next().is_some_and(|c| !c.is_whitespace()) {
1132        return None;
1133    }
1134
1135    let action = rest.split_whitespace().next().unwrap_or("");
1136    match action {
1137        "status" | "list" | "ls" | "show" | "read" | "create" | "close" | "update" | "run"
1138        | "run_state" | "evaluate" | "agents" | "logs" | "next" | "claim" | "release" | "tree" => {
1139            Some("Use the native mana tool instead of `bash` for this mana command. For orchestration, the native tool supports canonical target selection (`id`, `targets`, or all ready work) plus background run tracking.")
1140        }
1141        _ => None,
1142    }
1143}
1144
1145fn mana_skill_follow_up_hint(
1146    prompt: &str,
1147    mode: AgentMode,
1148    tools_available: bool,
1149    _has_mana_skill: bool,
1150    _has_mana_basics_skill: bool,
1151    _has_mana_delegation_skill: bool,
1152) -> Option<&'static str> {
1153    if !tools_available {
1154        return None;
1155    }
1156
1157    let lower = prompt.to_ascii_lowercase();
1158
1159    let orchestration_signal = [
1160        "decompose",
1161        "decomposition",
1162        "split this",
1163        "break this up",
1164        "break it up",
1165        "parallel",
1166        "parallel helper",
1167        "bounded helper",
1168        "orchestrate",
1169        "orchestration",
1170        "create a unit",
1171        "create units",
1172        "mana run",
1173    ]
1174    .iter()
1175    .any(|needle| lower.contains(needle));
1176
1177    let mana_signal = [
1178        " mana ",
1179        "mana status",
1180        "mana list",
1181        "mana show",
1182        "mana update",
1183        "mana create",
1184        "mana run",
1185    ]
1186    .iter()
1187    .any(|needle| lower.contains(needle));
1188
1189    match mode {
1190        AgentMode::Full | AgentMode::Orchestrator | AgentMode::Planner
1191            if orchestration_signal || mana_signal =>
1192        {
1193            Some("Before you continue: use native mana `guide` or `template` actions if you need extra help with unit design, decomposition, retries, or worker handoff.")
1194        }
1195        AgentMode::Worker | AgentMode::Auditor if mana_signal => {
1196            Some("Before you continue: use the native mana tool and stay within this mode's allowed mana workflow. Use the `guide` action if you need help.")
1197        }
1198        _ => None,
1199    }
1200}
1201
1202#[cfg(test)]
1203mod tests {
1204    use super::*;
1205    use crate::agent::turn_assessment::NextAction;
1206    use crate::builder::AgentBuilder;
1207    use std::pin::Pin;
1208    use std::sync::{
1209        atomic::{AtomicUsize, Ordering},
1210        Arc, Mutex as StdMutex,
1211    };
1212    use std::time::Duration;
1213
1214    use async_trait::async_trait;
1215    use futures_core::Stream;
1216    use imp_llm::auth::{ApiKey, AuthStore};
1217    use imp_llm::model::{Capabilities, ModelMeta, ModelPricing};
1218    use imp_llm::provider::Provider;
1219    use imp_llm::ToolResultMessage;
1220    use tokio::sync::{Mutex, Notify};
1221
1222    /// A mock provider that returns pre-programmed responses.
1223    /// Each call to `stream()` pops the next response from the queue.
1224    struct MockProvider {
1225        responses: Mutex<Vec<Vec<imp_llm::Result<StreamEvent>>>>,
1226    }
1227
1228    impl MockProvider {
1229        fn new(responses: Vec<Vec<StreamEvent>>) -> Self {
1230            Self {
1231                responses: Mutex::new(
1232                    responses
1233                        .into_iter()
1234                        .map(|events| events.into_iter().map(Ok).collect())
1235                        .collect(),
1236                ),
1237            }
1238        }
1239
1240        fn new_results(responses: Vec<Vec<imp_llm::Result<StreamEvent>>>) -> Self {
1241            Self {
1242                responses: Mutex::new(responses),
1243            }
1244        }
1245    }
1246
1247    #[async_trait]
1248    impl Provider for MockProvider {
1249        fn stream(
1250            &self,
1251            _model: &Model,
1252            _context: Context,
1253            _options: RequestOptions,
1254            _api_key: &str,
1255        ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
1256            // We need to get the next response synchronously. Use try_lock since
1257            // tests are single-threaded per agent run.
1258            let mut responses = self.responses.try_lock().expect("MockProvider lock");
1259            let events = if responses.is_empty() {
1260                vec![Ok(StreamEvent::Error {
1261                    error: "No more mock responses".to_string(),
1262                })]
1263            } else {
1264                responses.remove(0)
1265            };
1266            let stream = futures::stream::iter(events);
1267            Box::pin(stream)
1268        }
1269
1270        async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
1271            Ok("mock-key".to_string())
1272        }
1273
1274        fn id(&self) -> &str {
1275            "mock"
1276        }
1277
1278        fn models(&self) -> &[ModelMeta] {
1279            &[]
1280        }
1281    }
1282
1283    fn test_model(provider: Arc<dyn Provider>) -> Model {
1284        test_model_with_context_window(provider, 200_000)
1285    }
1286
1287    fn test_model_with_context_window(provider: Arc<dyn Provider>, context_window: u32) -> Model {
1288        Model {
1289            meta: ModelMeta {
1290                id: "test-model".to_string(),
1291                provider: "mock".to_string(),
1292                name: "Test Model".to_string(),
1293                context_window,
1294                max_output_tokens: 16_384,
1295                pricing: ModelPricing {
1296                    input_per_mtok: 3.0,
1297                    output_per_mtok: 15.0,
1298                    cache_read_per_mtok: 0.3,
1299                    cache_write_per_mtok: 3.75,
1300                },
1301                capabilities: Capabilities {
1302                    reasoning: true,
1303                    images: false,
1304                    tool_use: true,
1305                },
1306            },
1307            provider,
1308        }
1309    }
1310
1311    fn text_response(text: &str, input_tokens: u32, output_tokens: u32) -> Vec<StreamEvent> {
1312        vec![
1313            StreamEvent::MessageStart {
1314                model: "test-model".to_string(),
1315            },
1316            StreamEvent::TextDelta {
1317                text: text.to_string(),
1318            },
1319            StreamEvent::MessageEnd {
1320                message: AssistantMessage {
1321                    content: vec![ContentBlock::Text {
1322                        text: text.to_string(),
1323                    }],
1324                    usage: Some(Usage {
1325                        input_tokens,
1326                        output_tokens,
1327                        cache_read_tokens: 0,
1328                        cache_write_tokens: 0,
1329                    }),
1330                    stop_reason: LlmStopReason::EndTurn,
1331                    timestamp: 1000,
1332                },
1333            },
1334        ]
1335    }
1336
1337    fn tool_call_response(
1338        call_id: &str,
1339        tool_name: &str,
1340        args: serde_json::Value,
1341        input_tokens: u32,
1342        output_tokens: u32,
1343    ) -> Vec<StreamEvent> {
1344        vec![
1345            StreamEvent::MessageStart {
1346                model: "test-model".to_string(),
1347            },
1348            StreamEvent::ToolCall {
1349                id: call_id.to_string(),
1350                name: tool_name.to_string(),
1351                arguments: args.clone(),
1352            },
1353            StreamEvent::MessageEnd {
1354                message: AssistantMessage {
1355                    content: vec![ContentBlock::ToolCall {
1356                        id: call_id.to_string(),
1357                        name: tool_name.to_string(),
1358                        arguments: args,
1359                    }],
1360                    usage: Some(Usage {
1361                        input_tokens,
1362                        output_tokens,
1363                        cache_read_tokens: 0,
1364                        cache_write_tokens: 0,
1365                    }),
1366                    stop_reason: LlmStopReason::ToolUse,
1367                    timestamp: 1000,
1368                },
1369            },
1370        ]
1371    }
1372
1373    fn multi_tool_call_response(
1374        calls: &[(&str, &str, serde_json::Value)],
1375        input_tokens: u32,
1376        output_tokens: u32,
1377    ) -> Vec<StreamEvent> {
1378        let mut events = vec![StreamEvent::MessageStart {
1379            model: "test-model".to_string(),
1380        }];
1381
1382        let mut content = Vec::new();
1383        for (id, name, args) in calls {
1384            events.push(StreamEvent::ToolCall {
1385                id: id.to_string(),
1386                name: name.to_string(),
1387                arguments: args.clone(),
1388            });
1389            content.push(ContentBlock::ToolCall {
1390                id: id.to_string(),
1391                name: name.to_string(),
1392                arguments: args.clone(),
1393            });
1394        }
1395
1396        events.push(StreamEvent::MessageEnd {
1397            message: AssistantMessage {
1398                content,
1399                usage: Some(Usage {
1400                    input_tokens,
1401                    output_tokens,
1402                    cache_read_tokens: 0,
1403                    cache_write_tokens: 0,
1404                }),
1405                stop_reason: LlmStopReason::ToolUse,
1406                timestamp: 1000,
1407            },
1408        });
1409
1410        events
1411    }
1412
1413    fn make_assistant_tool_call(
1414        call_id: &str,
1415        tool_name: &str,
1416        args: serde_json::Value,
1417    ) -> Message {
1418        Message::Assistant(AssistantMessage {
1419            content: vec![ContentBlock::ToolCall {
1420                id: call_id.to_string(),
1421                name: tool_name.to_string(),
1422                arguments: args,
1423            }],
1424            usage: None,
1425            stop_reason: LlmStopReason::ToolUse,
1426            timestamp: imp_llm::now(),
1427        })
1428    }
1429
1430    fn make_tool_result(call_id: &str, tool_name: &str, output: &str) -> Message {
1431        Message::ToolResult(imp_llm::ToolResultMessage {
1432            tool_call_id: call_id.to_string(),
1433            tool_name: tool_name.to_string(),
1434            content: vec![ContentBlock::Text {
1435                text: output.to_string(),
1436            }],
1437            is_error: false,
1438            details: serde_json::Value::Null,
1439            timestamp: imp_llm::now(),
1440        })
1441    }
1442
1443    fn tool_result_text(message: &Message) -> Option<&str> {
1444        match message {
1445            Message::ToolResult(result) => result.content.iter().find_map(|block| match block {
1446                ContentBlock::Text { text } => Some(text.as_str()),
1447                _ => None,
1448            }),
1449            _ => None,
1450        }
1451    }
1452
1453    /// A simple echo tool for testing.
1454    struct EchoTool;
1455
1456    #[async_trait]
1457    impl crate::tools::Tool for EchoTool {
1458        fn name(&self) -> &str {
1459            "echo"
1460        }
1461        fn label(&self) -> &str {
1462            "Echo"
1463        }
1464        fn description(&self) -> &str {
1465            "Echoes back the input"
1466        }
1467        fn parameters(&self) -> serde_json::Value {
1468            serde_json::json!({
1469                "type": "object",
1470                "properties": {
1471                    "text": { "type": "string" }
1472                },
1473                "required": ["text"]
1474            })
1475        }
1476        fn is_readonly(&self) -> bool {
1477            true
1478        }
1479        async fn execute(
1480            &self,
1481            _call_id: &str,
1482            params: serde_json::Value,
1483            _ctx: crate::tools::ToolContext,
1484        ) -> crate::error::Result<crate::tools::ToolOutput> {
1485            let text = params["text"].as_str().unwrap_or("no text");
1486            Ok(crate::tools::ToolOutput::text(format!("echo: {text}")))
1487        }
1488    }
1489
1490    /// A mutable tool for testing write partitioning.
1491    #[allow(dead_code)]
1492    struct WriteTool;
1493
1494    #[async_trait]
1495    impl crate::tools::Tool for WriteTool {
1496        fn name(&self) -> &str {
1497            "write"
1498        }
1499        fn label(&self) -> &str {
1500            "Write"
1501        }
1502        fn description(&self) -> &str {
1503            "Writes data"
1504        }
1505        fn parameters(&self) -> serde_json::Value {
1506            serde_json::json!({
1507                "type": "object",
1508                "properties": {
1509                    "data": { "type": "string" }
1510                },
1511                "required": ["data"]
1512            })
1513        }
1514        fn is_readonly(&self) -> bool {
1515            false
1516        }
1517        async fn execute(
1518            &self,
1519            _call_id: &str,
1520            params: serde_json::Value,
1521            _ctx: crate::tools::ToolContext,
1522        ) -> crate::error::Result<crate::tools::ToolOutput> {
1523            let data = params["data"].as_str().unwrap_or("no data");
1524            Ok(crate::tools::ToolOutput::text(format!("wrote: {data}")))
1525        }
1526    }
1527
1528    struct ConcurrentReadonlyState {
1529        readonly_expected: usize,
1530        readonly_started: AtomicUsize,
1531        readonly_finished: AtomicUsize,
1532        mutable_observed_finished: AtomicUsize,
1533        log: StdMutex<Vec<String>>,
1534        notify: Notify,
1535    }
1536
1537    impl ConcurrentReadonlyState {
1538        fn new(readonly_expected: usize) -> Self {
1539            Self {
1540                readonly_expected,
1541                readonly_started: AtomicUsize::new(0),
1542                readonly_finished: AtomicUsize::new(0),
1543                mutable_observed_finished: AtomicUsize::new(0),
1544                log: StdMutex::new(Vec::new()),
1545                notify: Notify::new(),
1546            }
1547        }
1548
1549        fn record(&self, entry: impl Into<String>) {
1550            self.log
1551                .lock()
1552                .expect("concurrent log lock")
1553                .push(entry.into());
1554        }
1555
1556        async fn wait_for_all_readonly_to_start(&self) {
1557            while self.readonly_started.load(Ordering::SeqCst) < self.readonly_expected {
1558                self.notify.notified().await;
1559            }
1560        }
1561    }
1562
1563    struct CoordinatedReadonlyTool {
1564        name: &'static str,
1565        shared: Arc<ConcurrentReadonlyState>,
1566    }
1567
1568    #[async_trait]
1569    impl crate::tools::Tool for CoordinatedReadonlyTool {
1570        fn name(&self) -> &str {
1571            self.name
1572        }
1573        fn label(&self) -> &str {
1574            self.name
1575        }
1576        fn description(&self) -> &str {
1577            "Read-only tool used to verify concurrent execution"
1578        }
1579        fn parameters(&self) -> serde_json::Value {
1580            serde_json::json!({
1581                "type": "object",
1582                "properties": {
1583                    "text": { "type": "string" }
1584                },
1585                "required": ["text"]
1586            })
1587        }
1588        fn is_readonly(&self) -> bool {
1589            true
1590        }
1591        async fn execute(
1592            &self,
1593            _call_id: &str,
1594            params: serde_json::Value,
1595            _ctx: crate::tools::ToolContext,
1596        ) -> crate::error::Result<crate::tools::ToolOutput> {
1597            self.shared.record(format!("{}:start", self.name));
1598            self.shared.readonly_started.fetch_add(1, Ordering::SeqCst);
1599            self.shared.notify.notify_waiters();
1600            self.shared.wait_for_all_readonly_to_start().await;
1601            self.shared.record(format!("{}:end", self.name));
1602            self.shared.readonly_finished.fetch_add(1, Ordering::SeqCst);
1603
1604            let text = params["text"].as_str().unwrap_or(self.name);
1605            Ok(crate::tools::ToolOutput::text(format!(
1606                "{}: {text}",
1607                self.name
1608            )))
1609        }
1610    }
1611
1612    struct CoordinatedMutableTool {
1613        shared: Arc<ConcurrentReadonlyState>,
1614    }
1615
1616    #[async_trait]
1617    impl crate::tools::Tool for CoordinatedMutableTool {
1618        fn name(&self) -> &str {
1619            "write_after_reads"
1620        }
1621        fn label(&self) -> &str {
1622            "Write After Reads"
1623        }
1624        fn description(&self) -> &str {
1625            "Mutable tool used to verify read-only tools finish first"
1626        }
1627        fn parameters(&self) -> serde_json::Value {
1628            serde_json::json!({
1629                "type": "object",
1630                "properties": {
1631                    "data": { "type": "string" }
1632                },
1633                "required": ["data"]
1634            })
1635        }
1636        fn is_readonly(&self) -> bool {
1637            false
1638        }
1639        async fn execute(
1640            &self,
1641            _call_id: &str,
1642            params: serde_json::Value,
1643            _ctx: crate::tools::ToolContext,
1644        ) -> crate::error::Result<crate::tools::ToolOutput> {
1645            let finished = self.shared.readonly_finished.load(Ordering::SeqCst);
1646            self.shared
1647                .mutable_observed_finished
1648                .store(finished, Ordering::SeqCst);
1649            self.shared.record("write_after_reads:start");
1650
1651            let data = params["data"].as_str().unwrap_or("no data");
1652            Ok(crate::tools::ToolOutput::text(format!(
1653                "wrote after reads: {data}"
1654            )))
1655        }
1656    }
1657
1658    /// Collect all events from the handle until the channel closes.
1659    async fn collect_events(mut handle: AgentHandle) -> Vec<AgentEvent> {
1660        let mut events = Vec::new();
1661        while let Some(event) = handle.event_rx.recv().await {
1662            events.push(event);
1663        }
1664        events
1665    }
1666
1667    #[test]
1668    fn agent_queues_mana_hint_for_planner_requests() {
1669        let provider = Arc::new(MockProvider::new(vec![
1670            text_response("Loaded mana skill", 100, 20),
1671            text_response("Done", 120, 25),
1672        ]));
1673
1674        let model = test_model(provider);
1675        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
1676        agent.has_mana_skill = true;
1677        agent.mode = AgentMode::Planner;
1678
1679        let rt = tokio::runtime::Runtime::new().unwrap();
1680        rt.block_on(async {
1681            agent
1682                .run("Please split this into units for workers".to_string())
1683                .await
1684                .unwrap();
1685        });
1686
1687        let user_texts: Vec<String> = agent
1688            .messages
1689            .iter()
1690            .filter_map(|message| match message {
1691                Message::User(user) => user.content.iter().find_map(|block| match block {
1692                    ContentBlock::Text { text } => Some(text.clone()),
1693                    _ => None,
1694                }),
1695                _ => None,
1696            })
1697            .collect();
1698
1699        assert_eq!(user_texts.len(), 1);
1700        assert_eq!(user_texts[0], "Please split this into units for workers");
1701    }
1702
1703    #[tokio::test]
1704    async fn agent_queues_mana_externalization_follow_up_after_planning_turn() {
1705        let provider = Arc::new(MockProvider::new(vec![
1706            text_response("Here is the plan: split this into phases, add dependencies, and define verify steps.", 100, 20),
1707            text_response("Externalized into mana.", 120, 25),
1708        ]));
1709
1710        let model = test_model(provider);
1711        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
1712        agent.has_mana_skill = true;
1713        agent.mode = AgentMode::Planner;
1714
1715        agent.run("Plan the rollout".to_string()).await.unwrap();
1716
1717        let user_texts: Vec<String> = agent
1718            .messages
1719            .iter()
1720            .filter_map(|message| match message {
1721                Message::User(user) => user.content.iter().find_map(|block| match block {
1722                    ContentBlock::Text { text } => Some(text.clone()),
1723                    _ => None,
1724                }),
1725                _ => None,
1726            })
1727            .collect();
1728
1729        assert_eq!(user_texts.len(), 2);
1730        assert_eq!(user_texts[0], "Plan the rollout");
1731        assert!(user_texts[1].contains("externalize the durable plan"));
1732    }
1733
1734    #[tokio::test]
1735    async fn turn_assessment_debug_view_reports_execution_blocker() {
1736        let (agent, _handle) = Agent::new(
1737            test_model(Arc::new(MockProvider::new(vec![]))),
1738            PathBuf::from("/tmp"),
1739        );
1740        let assessment = agent.assess_post_turn(
1741            &AssistantMessage {
1742                content: vec![ContentBlock::Text {
1743                    text: "Verify failed.".to_string(),
1744                }],
1745                usage: None,
1746                stop_reason: LlmStopReason::EndTurn,
1747                timestamp: 0,
1748            },
1749            &[imp_llm::ToolResultMessage {
1750                tool_call_id: "call_verify".to_string(),
1751                tool_name: "mana".to_string(),
1752                content: vec![ContentBlock::Text {
1753                    text: "Verify failed".to_string(),
1754                }],
1755                is_error: true,
1756                details: serde_json::json!({
1757                    "action": "verify",
1758                    "passed": false,
1759                    "exit_code": 1
1760                }),
1761                timestamp: 0,
1762            }],
1763            true,
1764            &TurnManaReview::no_change(0),
1765        );
1766
1767        let debug = assessment.debug_view();
1768        assert_eq!(
1769            debug.runtime.execution_stop_reason.as_deref(),
1770            Some("execution_blocked")
1771        );
1772        assert_eq!(
1773            debug.chosen_action,
1774            NextActionDebugView::Stop {
1775                reason: "execution_blocked".to_string(),
1776            }
1777        );
1778    }
1779
1780    #[test]
1781    fn turn_assessment_debug_view_reports_continue_recommendation() {
1782        let assessment = PostTurnAssessment {
1783            runtime: RuntimeEvidence {
1784                repeated_action: false,
1785                execution_stop_reason: None,
1786                work_completed: false,
1787                execution_debt: false,
1788                execution_evidence: false,
1789                planning_only_progress: false,
1790            },
1791            mana: ManaEvidence { stop_reason: None },
1792            text_fallback: TextFallbackEvidence {
1793                planner_stop_reason: None,
1794                execution_stop_reason: None,
1795            },
1796            continue_recommendation: Some(ContinueRecommendation {
1797                prompt: "continue".to_string(),
1798                reason: ContinueReason::HighConfidenceVisibleNextStep,
1799            }),
1800        };
1801
1802        let debug = assessment.debug_view();
1803        let recommendation = debug
1804            .continue_recommendation
1805            .expect("continue recommendation present");
1806        assert_eq!(recommendation.reason, "high_confidence_visible_next_step");
1807        assert!(matches!(
1808            debug.chosen_action,
1809            NextActionDebugView::Continue { .. }
1810        ));
1811    }
1812
1813    #[tokio::test]
1814    async fn agent_run_artifacts_writes_trace_and_evidence_packet() {
1815        let temp = tempfile::TempDir::new().unwrap();
1816        let provider = Arc::new(MockProvider::new(vec![text_response("done", 10, 5)]));
1817        let model = test_model(provider);
1818        let (mut agent, _handle) = AgentBuilder::new(
1819            Config::default(),
1820            temp.path().to_path_buf(),
1821            model,
1822            String::new(),
1823        )
1824        .verify_command("printf verify-ok", true)
1825        .build()
1826        .unwrap();
1827        agent.workflow_contract.autonomy_mode = crate::workflow::AutonomyMode::AllowAll;
1828
1829        agent.run("Do the work".to_string()).await.unwrap();
1830
1831        let runs_dir = temp.path().join(".imp").join("runs");
1832        let mut runs = std::fs::read_dir(&runs_dir)
1833            .unwrap()
1834            .collect::<Result<Vec<_>, _>>()
1835            .unwrap();
1836        assert_eq!(runs.len(), 1);
1837        let run_dir = runs.pop().unwrap().path();
1838        let trace = std::fs::read_to_string(run_dir.join("trace.jsonl")).unwrap();
1839        assert!(trace.contains("agent.start"));
1840        assert!(trace.contains("agent.end"));
1841        let evidence = std::fs::read_to_string(run_dir.join("evidence.md")).unwrap();
1842        assert!(evidence.contains("# Evidence Packet"));
1843        assert!(evidence.contains("Do the work"));
1844        assert!(evidence.contains("**Autonomy:** allow-all"));
1845        assert!(evidence.contains("allow-all mode was active"));
1846        assert!(evidence.contains("hard-rail bypass: none recorded"));
1847        assert!(evidence.contains("policy.checked trace events"));
1848        assert!(evidence.contains("trace.jsonl"));
1849        assert!(evidence.contains("verify-ok"));
1850        assert!(evidence.contains("passed"));
1851        assert!(run_dir.join("verification/verify-1/status.json").exists());
1852        assert!(run_dir.join("workflow-contract.json").exists());
1853    }
1854
1855    #[tokio::test]
1856    async fn default_safe_compatibility_allows_readonly_tool() {
1857        let provider = Arc::new(MockProvider::new(vec![
1858            tool_call_response(
1859                "call_read",
1860                "echo",
1861                serde_json::json!({"text": "hello"}),
1862                100,
1863                30,
1864            ),
1865            text_response("done", 100, 10),
1866        ]));
1867        let model = test_model(provider);
1868        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1869        agent.tools.register(Arc::new(EchoTool));
1870
1871        let events_task = tokio::spawn(collect_events(handle));
1872        agent.run("Echo hello".to_string()).await.unwrap();
1873        drop(agent);
1874        let events = events_task.await.unwrap();
1875
1876        let policy = first_policy_record(&events).expect("policy checked");
1877        assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1878        assert!(policy.decision.is_allowed());
1879        let result = first_tool_result(&events).expect("tool end event");
1880        assert!(!result.is_error);
1881        assert_eq!(
1882            tool_result_text(&Message::ToolResult(result.clone())),
1883            Some("echo: hello")
1884        );
1885    }
1886
1887    #[tokio::test]
1888    async fn default_safe_compatibility_allows_write_tool() {
1889        let provider = Arc::new(MockProvider::new(vec![
1890            tool_call_response(
1891                "call_write",
1892                "write",
1893                serde_json::json!({"data": "hello"}),
1894                100,
1895                30,
1896            ),
1897            text_response("done", 100, 10),
1898        ]));
1899        let model = test_model(provider);
1900        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1901        agent.tools.register(Arc::new(WriteTool));
1902
1903        let events_task = tokio::spawn(collect_events(handle));
1904        agent.run("Write hello".to_string()).await.unwrap();
1905        drop(agent);
1906        let events = events_task.await.unwrap();
1907
1908        let policy = first_policy_record(&events).expect("policy checked");
1909        assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1910        assert!(policy.decision.is_allowed());
1911        let result = first_tool_result(&events).expect("tool end event");
1912        assert!(!result.is_error);
1913        assert_eq!(
1914            tool_result_text(&Message::ToolResult(result.clone())),
1915            Some("wrote: hello")
1916        );
1917    }
1918
1919    #[tokio::test]
1920    async fn default_safe_compatibility_preserves_run_policy_tool_deny() {
1921        let provider = Arc::new(MockProvider::new(vec![
1922            tool_call_response(
1923                "call_echo",
1924                "echo",
1925                serde_json::json!({"text": "hello"}),
1926                100,
1927                30,
1928            ),
1929            text_response("done", 100, 10),
1930        ]));
1931        let model = test_model(provider);
1932        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1933        agent.tools.register(Arc::new(EchoTool));
1934        agent.run_policy = crate::policy::RunPolicy::new().deny_tool("echo");
1935
1936        let events_task = tokio::spawn(collect_events(handle));
1937        agent.run("Echo hello".to_string()).await.unwrap();
1938        drop(agent);
1939        let events = events_task.await.unwrap();
1940
1941        let policy = first_policy_record(&events).expect("policy checked");
1942        assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1943        assert!(matches!(
1944            policy.decision,
1945            crate::reference_monitor::ToolPolicyDecision::Deny { .. }
1946        ));
1947        let result = first_tool_result(&events).expect("tool end event");
1948        assert!(result.is_error);
1949        assert_eq!(
1950            tool_result_text(&Message::ToolResult(result.clone())),
1951            Some("Tool `echo` denied by run policy.")
1952        );
1953    }
1954
1955    #[tokio::test]
1956    async fn default_safe_compatibility_preserves_agent_mode_tool_deny() {
1957        let provider = Arc::new(MockProvider::new(vec![
1958            tool_call_response(
1959                "call_write",
1960                "write",
1961                serde_json::json!({"data": "hello"}),
1962                100,
1963                30,
1964            ),
1965            text_response("done", 100, 10),
1966        ]));
1967        let model = test_model(provider);
1968        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1969        agent.mode = AgentMode::Reviewer;
1970        agent.tools.register(Arc::new(WriteTool));
1971
1972        let events_task = tokio::spawn(collect_events(handle));
1973        agent.run("Write hello".to_string()).await.unwrap();
1974        drop(agent);
1975        let events = events_task.await.unwrap();
1976
1977        let policy = first_policy_record(&events).expect("policy checked");
1978        assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1979        assert!(matches!(
1980            policy.decision,
1981            crate::reference_monitor::ToolPolicyDecision::Deny { .. }
1982        ));
1983        let result = first_tool_result(&events).expect("tool end event");
1984        assert!(result.is_error);
1985        assert_eq!(
1986            tool_result_text(&Message::ToolResult(result.clone())),
1987            Some("Tool 'write' is not available in reviewer mode")
1988        );
1989    }
1990
1991    fn first_policy_record(
1992        events: &[AgentEvent],
1993    ) -> Option<&crate::reference_monitor::PolicyTraceRecord> {
1994        events.iter().find_map(|event| match event {
1995            AgentEvent::PolicyChecked { record } => Some(record),
1996            _ => None,
1997        })
1998    }
1999
2000    fn first_tool_result(events: &[AgentEvent]) -> Option<&ToolResultMessage> {
2001        events.iter().find_map(|event| match event {
2002            AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
2003            _ => None,
2004        })
2005    }
2006
2007    #[tokio::test]
2008    async fn tool_execution_policy_routes_run_policy_deny_through_reference_monitor() {
2009        let provider = Arc::new(MockProvider::new(vec![
2010            tool_call_response(
2011                "call_1",
2012                "echo",
2013                serde_json::json!({"text": "hello"}),
2014                100,
2015                30,
2016            ),
2017            text_response("done", 100, 10),
2018        ]));
2019        let model = test_model(provider);
2020        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2021        agent.tools.register(Arc::new(EchoTool));
2022        agent.run_policy = crate::policy::RunPolicy::new().deny_tool("echo");
2023
2024        let events_task = tokio::spawn(collect_events(handle));
2025        agent.run("Echo hello".to_string()).await.unwrap();
2026        drop(agent);
2027        let events = events_task.await.unwrap();
2028
2029        let policy_event = events
2030            .iter()
2031            .find_map(|event| match event {
2032                AgentEvent::PolicyChecked { record } => Some(record),
2033                _ => None,
2034            })
2035            .expect("policy checked event");
2036        assert_eq!(policy_event.tool_name, "echo");
2037        assert!(policy_event.args_hash.is_some());
2038        assert!(matches!(
2039            policy_event.decision,
2040            crate::reference_monitor::ToolPolicyDecision::Deny { .. }
2041        ));
2042
2043        let result = events
2044            .iter()
2045            .find_map(|event| match event {
2046                AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
2047                _ => None,
2048            })
2049            .expect("tool end event");
2050        assert!(result.is_error);
2051        assert_eq!(
2052            tool_result_text(&Message::ToolResult(result.clone())),
2053            Some("Tool `echo` denied by run policy.")
2054        );
2055
2056        let checkpoint = events.iter().rev().find_map(|event| match event {
2057            AgentEvent::RecoveryCheckpoint { checkpoint } => checkpoint.error_class.as_deref(),
2058            _ => None,
2059        });
2060        assert_eq!(checkpoint, Some("run_policy_blocked"));
2061    }
2062
2063    #[tokio::test]
2064    async fn tool_execution_policy_routes_agent_mode_deny_through_reference_monitor() {
2065        let provider = Arc::new(MockProvider::new(vec![
2066            tool_call_response(
2067                "call_1",
2068                "write",
2069                serde_json::json!({"data": "hello"}),
2070                100,
2071                30,
2072            ),
2073            text_response("done", 100, 10),
2074        ]));
2075        let model = test_model(provider);
2076        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2077        agent.mode = AgentMode::Reviewer;
2078        agent.tools.register(Arc::new(WriteTool));
2079
2080        let events_task = tokio::spawn(collect_events(handle));
2081        agent.run("Write hello".to_string()).await.unwrap();
2082        drop(agent);
2083        let events = events_task.await.unwrap();
2084
2085        let policy_event = events
2086            .iter()
2087            .find_map(|event| match event {
2088                AgentEvent::PolicyChecked { record } => Some(record),
2089                _ => None,
2090            })
2091            .expect("policy checked event");
2092        assert_eq!(policy_event.tool_name, "write");
2093        assert_eq!(
2094            policy_event.autonomy_mode,
2095            crate::workflow::AutonomyMode::default()
2096        );
2097        assert!(matches!(
2098            policy_event.decision,
2099            crate::reference_monitor::ToolPolicyDecision::Deny { .. }
2100        ));
2101
2102        let result = events
2103            .iter()
2104            .find_map(|event| match event {
2105                AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
2106                _ => None,
2107            })
2108            .expect("tool end event");
2109        assert!(result.is_error);
2110        assert_eq!(
2111            tool_result_text(&Message::ToolResult(result.clone())),
2112            Some("Tool 'write' is not available in reviewer mode")
2113        );
2114
2115        let checkpoint = events.iter().rev().find_map(|event| match event {
2116            AgentEvent::RecoveryCheckpoint { checkpoint } => checkpoint.error_class.as_deref(),
2117            _ => None,
2118        });
2119        assert_eq!(checkpoint, Some("mode_blocked"));
2120    }
2121
2122    #[tokio::test]
2123    async fn emits_turn_assessment_event_for_execution_blocker() {
2124        let provider = Arc::new(MockProvider::new(vec![
2125            tool_call_response(
2126                "call_check",
2127                "bash",
2128                serde_json::json!({"command": "cargo check -p definitely_missing_crate", "timeout": 1}),
2129                100,
2130                20,
2131            ),
2132            text_response("The check failed.", 120, 20),
2133        ]));
2134
2135        let model = test_model(provider);
2136        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2137        agent.mode = AgentMode::Full;
2138        agent.tools.register(Arc::new(crate::tools::bash::BashTool));
2139
2140        let events_task = tokio::spawn(collect_events(handle));
2141        agent.run("Run the check".to_string()).await.unwrap();
2142        drop(agent);
2143        let events = events_task.await.unwrap();
2144
2145        let assessment = events.iter().find_map(|event| match event {
2146            AgentEvent::TurnAssessment { assessment, .. } => Some(assessment),
2147            _ => None,
2148        });
2149
2150        let assessment = assessment.expect("turn assessment emitted");
2151        assert_eq!(
2152            assessment.runtime.execution_stop_reason.as_deref(),
2153            Some("execution_blocked")
2154        );
2155        assert_eq!(
2156            assessment.chosen_action,
2157            NextActionDebugView::Stop {
2158                reason: "execution_blocked".to_string(),
2159            }
2160        );
2161    }
2162
2163    #[tokio::test]
2164    async fn emits_turn_assessment_event_for_continue_recommendation() {
2165        let provider = Arc::new(MockProvider::new(vec![
2166            vec![
2167                StreamEvent::MessageStart {
2168                    model: "test-model".to_string(),
2169                },
2170                StreamEvent::ToolCall {
2171                    id: "call_1".to_string(),
2172                    name: "mana".to_string(),
2173                    arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2174                },
2175                StreamEvent::TextDelta {
2176                    text: "Done. Updated mana and next step is ready to continue.".to_string(),
2177                },
2178                StreamEvent::MessageEnd {
2179                    message: AssistantMessage {
2180                        content: vec![
2181                            ContentBlock::ToolCall {
2182                                id: "call_1".to_string(),
2183                                name: "mana".to_string(),
2184                                arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2185                            },
2186                            ContentBlock::Text {
2187                                text: "Done. Updated mana and next step is ready to continue."
2188                                    .to_string(),
2189                            },
2190                        ],
2191                        usage: Some(Usage {
2192                            input_tokens: 100,
2193                            output_tokens: 20,
2194                            cache_read_tokens: 0,
2195                            cache_write_tokens: 0,
2196                        }),
2197                        stop_reason: LlmStopReason::ToolUse,
2198                        timestamp: 1000,
2199                    },
2200                },
2201            ],
2202            text_response("Stopped after visible mana turn.", 120, 25),
2203        ]));
2204
2205        let model = test_model(provider);
2206        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2207        agent.mode = AgentMode::Planner;
2208        agent.continue_policy = ContinuePolicy::Balanced;
2209        agent
2210            .tools
2211            .register(Arc::new(crate::tools::mana::ManaTool::default()));
2212
2213        let events_task = tokio::spawn(collect_events(handle));
2214        agent.run("Do the next thing".to_string()).await.unwrap();
2215        drop(agent);
2216        let events = events_task.await.unwrap();
2217
2218        let assessment = events.iter().find_map(|event| match event {
2219            AgentEvent::TurnAssessment { assessment, .. } => Some(assessment),
2220            _ => None,
2221        });
2222
2223        let assessment = assessment.expect("turn assessment emitted");
2224        let recommendation = assessment
2225            .continue_recommendation
2226            .as_ref()
2227            .expect("continue recommendation present");
2228        assert_eq!(recommendation.reason, "high_confidence_visible_next_step");
2229        assert!(matches!(
2230            assessment.chosen_action,
2231            NextActionDebugView::Continue { .. }
2232        ));
2233    }
2234
2235    #[test]
2236    fn post_turn_assessment_prefers_execution_blocker_over_completion() {
2237        let assessment = PostTurnAssessment {
2238            runtime: RuntimeEvidence {
2239                repeated_action: false,
2240                execution_stop_reason: Some(StopReason::ExecutionBlocked),
2241                work_completed: true,
2242                execution_debt: false,
2243                execution_evidence: false,
2244                planning_only_progress: false,
2245            },
2246            mana: ManaEvidence {
2247                stop_reason: Some(StopReason::DecompositionCompleted),
2248            },
2249            text_fallback: TextFallbackEvidence {
2250                planner_stop_reason: Some(StopReason::DecompositionCompleted),
2251                execution_stop_reason: Some(StopReason::WorkCompleted),
2252            },
2253            continue_recommendation: Some(ContinueRecommendation {
2254                prompt: "continue".to_string(),
2255                reason: ContinueReason::HighConfidenceVisibleNextStep,
2256            }),
2257        };
2258
2259        assert_eq!(
2260            assessment.into_next_action(),
2261            NextAction::Stop {
2262                reason: StopReason::ExecutionBlocked,
2263            }
2264        );
2265    }
2266
2267    #[test]
2268    fn post_turn_assessment_emits_continue_when_no_stop_reason_exists() {
2269        let assessment = PostTurnAssessment {
2270            runtime: RuntimeEvidence {
2271                repeated_action: false,
2272                execution_stop_reason: None,
2273                work_completed: false,
2274                execution_debt: false,
2275                execution_evidence: false,
2276                planning_only_progress: false,
2277            },
2278            mana: ManaEvidence { stop_reason: None },
2279            text_fallback: TextFallbackEvidence {
2280                planner_stop_reason: None,
2281                execution_stop_reason: None,
2282            },
2283            continue_recommendation: Some(ContinueRecommendation {
2284                prompt: "continue".to_string(),
2285                reason: ContinueReason::HighConfidenceVisibleNextStep,
2286            }),
2287        };
2288
2289        assert_eq!(
2290            assessment.into_next_action(),
2291            NextAction::Continue {
2292                prompt: "continue".to_string(),
2293                reason: ContinueReason::HighConfidenceVisibleNextStep,
2294            }
2295        );
2296    }
2297
2298    #[test]
2299    fn execution_debt_follow_up_is_preferred_before_stopping_for_planning_only_progress() {
2300        let assessment = PostTurnAssessment {
2301            runtime: RuntimeEvidence {
2302                repeated_action: false,
2303                execution_stop_reason: None,
2304                work_completed: false,
2305                execution_debt: true,
2306                execution_evidence: false,
2307                planning_only_progress: false,
2308            },
2309            mana: ManaEvidence { stop_reason: None },
2310            text_fallback: TextFallbackEvidence {
2311                planner_stop_reason: None,
2312                execution_stop_reason: None,
2313            },
2314            continue_recommendation: Some(ContinueRecommendation {
2315                prompt: execution_debt_follow_up_text().to_string(),
2316                reason: ContinueReason::ExecutionDebt,
2317            }),
2318        };
2319
2320        assert_eq!(
2321            assessment.into_next_action(),
2322            NextAction::Continue {
2323                prompt: execution_debt_follow_up_text().to_string(),
2324                reason: ContinueReason::ExecutionDebt,
2325            }
2326        );
2327    }
2328
2329    #[test]
2330    fn mana_planning_without_execution_creates_execution_debt_follow_up() {
2331        let result = imp_llm::ToolResultMessage {
2332            tool_call_id: "call_mana".to_string(),
2333            tool_name: "mana".to_string(),
2334            content: vec![ContentBlock::Text {
2335                text: "Created task".to_string(),
2336            }],
2337            is_error: false,
2338            details: serde_json::json!({ "action": "create" }),
2339            timestamp: 0,
2340        };
2341
2342        assert!(tool_results_indicate_execution_debt(
2343            std::slice::from_ref(&result),
2344            AgentMode::Full
2345        ));
2346        assert!(!tool_results_indicate_execution_evidence(
2347            std::slice::from_ref(&result),
2348            AgentMode::Full
2349        ));
2350        assert!(should_queue_execution_debt_follow_up(
2351            true, false, false, true
2352        ));
2353    }
2354
2355    #[test]
2356    fn mutating_tool_call_satisfies_execution_evidence() {
2357        let result = imp_llm::ToolResultMessage {
2358            tool_call_id: "call_edit".to_string(),
2359            tool_name: "edit".to_string(),
2360            content: vec![ContentBlock::Text {
2361                text: "diff".to_string(),
2362            }],
2363            is_error: false,
2364            details: serde_json::json!({ "path": "src/lib.rs" }),
2365            timestamp: 0,
2366        };
2367
2368        assert!(tool_results_indicate_execution_evidence(
2369            &[result],
2370            AgentMode::Full
2371        ));
2372        assert!(!should_queue_execution_debt_follow_up(
2373            true, true, false, true
2374        ));
2375    }
2376
2377    #[test]
2378    fn tool_results_indicate_execution_blocker_detects_failed_verify() {
2379        let result = imp_llm::ToolResultMessage {
2380            tool_call_id: "call_verify".to_string(),
2381            tool_name: "mana".to_string(),
2382            content: vec![ContentBlock::Text {
2383                text: "Verify failed".to_string(),
2384            }],
2385            is_error: true,
2386            details: serde_json::json!({
2387                "action": "verify",
2388                "passed": false,
2389                "exit_code": 1
2390            }),
2391            timestamp: 0,
2392        };
2393
2394        assert_eq!(
2395            tool_results_indicate_execution_blocker(&[result], AgentMode::Full),
2396            Some(StopReason::ExecutionBlocked)
2397        );
2398    }
2399
2400    #[test]
2401    fn tool_results_indicate_execution_blocker_detects_ask_tool_as_user_blocker() {
2402        let result = imp_llm::ToolResultMessage {
2403            tool_call_id: "call_ask".to_string(),
2404            tool_name: "ask_user".to_string(),
2405            content: vec![ContentBlock::Text {
2406                text: "blue".to_string(),
2407            }],
2408            is_error: false,
2409            details: serde_json::Value::Null,
2410            timestamp: 0,
2411        };
2412
2413        assert_eq!(
2414            tool_results_indicate_execution_blocker(&[result], AgentMode::Full),
2415            Some(StopReason::UserBlocker)
2416        );
2417    }
2418
2419    #[test]
2420    fn tool_results_indicate_work_completed_detects_edit_plus_successful_check() {
2421        let edit_result = imp_llm::ToolResultMessage {
2422            tool_call_id: "call_edit".to_string(),
2423            tool_name: "edit".to_string(),
2424            content: vec![ContentBlock::Text {
2425                text: "diff output".to_string(),
2426            }],
2427            is_error: false,
2428            details: serde_json::json!({
2429                "path": "/tmp/file.rs"
2430            }),
2431            timestamp: 0,
2432        };
2433        let check_result = imp_llm::ToolResultMessage {
2434            tool_call_id: "call_check".to_string(),
2435            tool_name: "bash".to_string(),
2436            content: vec![ContentBlock::Text {
2437                text: "ok".to_string(),
2438            }],
2439            is_error: false,
2440            details: serde_json::json!({
2441                "exit_code": 0,
2442                "command": "cargo check -p imp-core"
2443            }),
2444            timestamp: 0,
2445        };
2446
2447        assert!(tool_results_indicate_work_completed(
2448            &[edit_result, check_result],
2449            AgentMode::Full
2450        ));
2451    }
2452
2453    #[test]
2454    fn tool_results_indicate_work_completed_detects_closed_unit_details() {
2455        let result = imp_llm::ToolResultMessage {
2456            tool_call_id: "call_close".to_string(),
2457            tool_name: "mana".to_string(),
2458            content: vec![ContentBlock::Text {
2459                text: "Closed unit 1".to_string(),
2460            }],
2461            is_error: false,
2462            details: serde_json::json!({
2463                "action": "close",
2464                "unit": {
2465                    "id": "1",
2466                    "title": "Test unit",
2467                    "status": "closed"
2468                }
2469            }),
2470            timestamp: 0,
2471        };
2472
2473        assert!(tool_results_indicate_work_completed(
2474            &[result],
2475            AgentMode::Full
2476        ));
2477    }
2478
2479    #[test]
2480    fn mana_review_needs_decision_maps_to_user_blocker() {
2481        let review = TurnManaReview {
2482            turn_index: 0,
2483            state: ManaReviewState::NeedsDecision,
2484            scope: crate::mana_review::ManaReviewScope::default(),
2485            anchor_unit: None,
2486            touched_units: Vec::new(),
2487            proposed_children: Vec::new(),
2488            material_field_changes: Vec::new(),
2489            notes_appended: Vec::new(),
2490            decision_events: Vec::new(),
2491            unresolved_consequential_choices: Vec::new(),
2492            next_question: Some("Which path should we take?".to_string()),
2493        };
2494
2495        assert_eq!(
2496            mana_review_stop_reason(&review, AgentMode::Planner),
2497            Some(StopReason::UserBlocker)
2498        );
2499    }
2500
2501    #[test]
2502    fn mana_review_changed_with_planner_children_maps_to_decomposition_completed() {
2503        let review = TurnManaReview {
2504            turn_index: 0,
2505            state: ManaReviewState::Changed,
2506            scope: crate::mana_review::ManaReviewScope::default(),
2507            anchor_unit: None,
2508            touched_units: Vec::new(),
2509            proposed_children: vec![crate::mana_review::TurnManaProposedChild {
2510                unit: crate::mana_review::ManaUnitRef::new(
2511                    "28.6.1",
2512                    "child",
2513                    Some("job".to_string()),
2514                ),
2515                parent: crate::mana_review::ManaUnitRef::new(
2516                    "28.6",
2517                    "parent",
2518                    Some("epic".to_string()),
2519                ),
2520                child_kind: crate::mana_review::ManaReviewUnitKind::Job,
2521                child_origin: crate::mana_review::ManaUnitOrigin::CreatedInTurn,
2522            }],
2523            material_field_changes: Vec::new(),
2524            notes_appended: Vec::new(),
2525            decision_events: Vec::new(),
2526            unresolved_consequential_choices: Vec::new(),
2527            next_question: None,
2528        };
2529
2530        assert_eq!(
2531            mana_review_stop_reason(&review, AgentMode::Planner),
2532            Some(StopReason::DecompositionCompleted)
2533        );
2534    }
2535
2536    #[tokio::test]
2537    async fn planner_stops_after_decomposition_is_externalized() {
2538        let provider = Arc::new(MockProvider::new(vec![text_response(
2539            "Externalized into mana. Plan is complete and ready for handoff.",
2540            100,
2541            20,
2542        )]));
2543
2544        let model = test_model(provider);
2545        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2546        agent.mode = AgentMode::Planner;
2547        agent.has_mana_skill = true;
2548
2549        agent.run("Plan the rollout".to_string()).await.unwrap();
2550
2551        let user_texts: Vec<String> = agent
2552            .messages
2553            .iter()
2554            .filter_map(|message| match message {
2555                Message::User(user) => user.content.iter().find_map(|block| match block {
2556                    ContentBlock::Text { text } => Some(text.clone()),
2557                    _ => None,
2558                }),
2559                _ => None,
2560            })
2561            .collect();
2562
2563        assert_eq!(user_texts, vec!["Plan the rollout".to_string()]);
2564    }
2565
2566    #[tokio::test]
2567    async fn planner_stops_for_user_blocker_instead_of_auto_follow_up() {
2568        let provider = Arc::new(MockProvider::new(vec![text_response(
2569            "Blocked: I need your input on which auth direction we should choose before continuing.",
2570            100,
2571            20,
2572        )]));
2573
2574        let model = test_model(provider);
2575        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2576        agent.mode = AgentMode::Planner;
2577        agent.has_mana_skill = true;
2578
2579        agent.run("Plan the rollout".to_string()).await.unwrap();
2580
2581        let user_texts: Vec<String> = agent
2582            .messages
2583            .iter()
2584            .filter_map(|message| match message {
2585                Message::User(user) => user.content.iter().find_map(|block| match block {
2586                    ContentBlock::Text { text } => Some(text.clone()),
2587                    _ => None,
2588                }),
2589                _ => None,
2590            })
2591            .collect();
2592
2593        assert_eq!(user_texts, vec!["Plan the rollout".to_string()]);
2594    }
2595
2596    #[tokio::test]
2597    async fn agent_queues_confidence_continue_follow_up_after_visible_mana_turn() {
2598        let provider = Arc::new(MockProvider::new(vec![
2599            vec![
2600                StreamEvent::MessageStart {
2601                    model: "test-model".to_string(),
2602                },
2603                StreamEvent::ToolCall {
2604                    id: "call_1".to_string(),
2605                    name: "mana".to_string(),
2606                    arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2607                },
2608                StreamEvent::TextDelta {
2609                    text: "Done. Updated mana and next step is ready to continue.".to_string(),
2610                },
2611                StreamEvent::MessageEnd {
2612                    message: AssistantMessage {
2613                        content: vec![
2614                            ContentBlock::ToolCall {
2615                                id: "call_1".to_string(),
2616                                name: "mana".to_string(),
2617                                arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2618                            },
2619                            ContentBlock::Text {
2620                                text: "Done. Updated mana and next step is ready to continue."
2621                                    .to_string(),
2622                            },
2623                        ],
2624                        usage: Some(Usage {
2625                            input_tokens: 100,
2626                            output_tokens: 20,
2627                            cache_read_tokens: 0,
2628                            cache_write_tokens: 0,
2629                        }),
2630                        stop_reason: LlmStopReason::ToolUse,
2631                        timestamp: 1000,
2632                    },
2633                },
2634            ],
2635            text_response("Continuing.", 120, 25),
2636        ]));
2637
2638        let model = test_model(provider);
2639        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2640        agent.mode = AgentMode::Planner;
2641        agent.continue_policy = ContinuePolicy::Balanced;
2642        agent
2643            .tools
2644            .register(Arc::new(crate::tools::mana::ManaTool::default()));
2645
2646        agent.run("Do the next thing".to_string()).await.unwrap();
2647
2648        let user_texts: Vec<String> = agent
2649            .messages
2650            .iter()
2651            .filter_map(|message| match message {
2652                Message::User(user) => user.content.iter().find_map(|block| match block {
2653                    ContentBlock::Text { text } => Some(text.clone()),
2654                    _ => None,
2655                }),
2656                _ => None,
2657            })
2658            .collect();
2659
2660        assert_eq!(user_texts.len(), 2);
2661        assert!(user_texts[1].contains("Confidence is high"));
2662    }
2663
2664    #[tokio::test]
2665    async fn agent_does_not_queue_confidence_continue_when_policy_disabled() {
2666        let provider = Arc::new(MockProvider::new(vec![
2667            vec![
2668                StreamEvent::MessageStart {
2669                    model: "test-model".to_string(),
2670                },
2671                StreamEvent::ToolCall {
2672                    id: "call_1".to_string(),
2673                    name: "mana".to_string(),
2674                    arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2675                },
2676                StreamEvent::TextDelta {
2677                    text: "Done. Updated mana and next step is ready to continue.".to_string(),
2678                },
2679                StreamEvent::MessageEnd {
2680                    message: AssistantMessage {
2681                        content: vec![
2682                            ContentBlock::ToolCall {
2683                                id: "call_1".to_string(),
2684                                name: "mana".to_string(),
2685                                arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2686                            },
2687                            ContentBlock::Text {
2688                                text: "Done. Updated mana and next step is ready to continue."
2689                                    .to_string(),
2690                            },
2691                        ],
2692                        usage: Some(Usage {
2693                            input_tokens: 100,
2694                            output_tokens: 20,
2695                            cache_read_tokens: 0,
2696                            cache_write_tokens: 0,
2697                        }),
2698                        stop_reason: LlmStopReason::ToolUse,
2699                        timestamp: 1000,
2700                    },
2701                },
2702            ],
2703            text_response("Stopped after visible mana turn.", 120, 25),
2704        ]));
2705
2706        let model = test_model(provider);
2707        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2708        agent.mode = AgentMode::Planner;
2709        agent.continue_policy = ContinuePolicy::Disabled;
2710        agent
2711            .tools
2712            .register(Arc::new(crate::tools::mana::ManaTool::default()));
2713
2714        agent.run("Do the next thing".to_string()).await.unwrap();
2715
2716        let user_texts: Vec<String> = agent
2717            .messages
2718            .iter()
2719            .filter_map(|message| match message {
2720                Message::User(user) => user.content.iter().find_map(|block| match block {
2721                    ContentBlock::Text { text } => Some(text.clone()),
2722                    _ => None,
2723                }),
2724                _ => None,
2725            })
2726            .collect();
2727
2728        assert_eq!(user_texts, vec!["Do the next thing".to_string()]);
2729    }
2730
2731    #[tokio::test]
2732    async fn agent_does_not_queue_externalization_follow_up_after_mana_tool_turn() {
2733        let provider = Arc::new(MockProvider::new(vec![
2734            tool_call_response(
2735                "call_1",
2736                "mana",
2737                serde_json::json!({"action": "status"}),
2738                100,
2739                20,
2740            ),
2741            text_response("Done after mana", 120, 25),
2742        ]));
2743
2744        let model = test_model(provider);
2745        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2746        agent.has_mana_skill = true;
2747        agent.mode = AgentMode::Planner;
2748        agent
2749            .tools
2750            .register(Arc::new(crate::tools::mana::ManaTool::default()));
2751
2752        agent.run("Plan the rollout".to_string()).await.unwrap();
2753
2754        let user_texts: Vec<String> = agent
2755            .messages
2756            .iter()
2757            .filter_map(|message| match message {
2758                Message::User(user) => user.content.iter().find_map(|block| match block {
2759                    ContentBlock::Text { text } => Some(text.clone()),
2760                    _ => None,
2761                }),
2762                _ => None,
2763            })
2764            .collect();
2765
2766        assert_eq!(user_texts, vec!["Plan the rollout".to_string()]);
2767    }
2768
2769    #[tokio::test]
2770    async fn agent_queues_mana_basics_hint_for_worker_mana_requests() {
2771        let provider = Arc::new(MockProvider::new(vec![
2772            text_response("Loaded basics skill", 100, 20),
2773            text_response("Done", 120, 25),
2774        ]));
2775
2776        let model = test_model(provider);
2777        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2778        agent.has_mana_basics_skill = true;
2779        agent.mode = AgentMode::Worker;
2780
2781        agent
2782            .run("Check mana status and logs for my unit".to_string())
2783            .await
2784            .unwrap();
2785
2786        let user_texts: Vec<String> = agent
2787            .messages
2788            .iter()
2789            .filter_map(|message| match message {
2790                Message::User(user) => user.content.iter().find_map(|block| match block {
2791                    ContentBlock::Text { text } => Some(text.clone()),
2792                    _ => None,
2793                }),
2794                _ => None,
2795            })
2796            .collect();
2797
2798        assert_eq!(user_texts.len(), 1);
2799        assert_eq!(user_texts[0], "Check mana status and logs for my unit");
2800    }
2801
2802    #[tokio::test]
2803    async fn agent_does_not_queue_mana_hint_without_matching_signal() {
2804        let provider = Arc::new(MockProvider::new(vec![text_response("No nudge", 100, 20)]));
2805
2806        let model = test_model(provider);
2807        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2808        agent.has_mana_skill = true;
2809        agent.mode = AgentMode::Planner;
2810
2811        agent
2812            .run("Explain how this parser works".to_string())
2813            .await
2814            .unwrap();
2815
2816        let user_texts: Vec<String> = agent
2817            .messages
2818            .iter()
2819            .filter_map(|message| match message {
2820                Message::User(user) => user.content.iter().find_map(|block| match block {
2821                    ContentBlock::Text { text } => Some(text.clone()),
2822                    _ => None,
2823                }),
2824                _ => None,
2825            })
2826            .collect();
2827
2828        assert_eq!(
2829            user_texts,
2830            vec!["Explain how this parser works".to_string()]
2831        );
2832    }
2833
2834    #[tokio::test]
2835    async fn agent_does_not_queue_mana_basics_hint_when_no_tools_available() {
2836        let provider = Arc::new(MockProvider::new(vec![text_response(
2837            "Loaded basics skill",
2838            100,
2839            20,
2840        )]));
2841
2842        let model = test_model(provider);
2843        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2844        agent.has_mana_basics_skill = true;
2845        agent.mode = AgentMode::Worker;
2846        agent.tools.retain(|_| false);
2847
2848        agent
2849            .run("Check mana status and logs for my unit".to_string())
2850            .await
2851            .unwrap();
2852
2853        let user_texts: Vec<String> = agent
2854            .messages
2855            .iter()
2856            .filter_map(|message| match message {
2857                Message::User(user) => user.content.iter().find_map(|block| match block {
2858                    ContentBlock::Text { text } => Some(text.clone()),
2859                    _ => None,
2860                }),
2861                _ => None,
2862            })
2863            .collect();
2864
2865        assert_eq!(
2866            user_texts,
2867            vec!["Check mana status and logs for my unit".to_string()]
2868        );
2869    }
2870
2871    #[tokio::test]
2872    async fn single_text_turn_with_no_tools_exits_cleanly() {
2873        let provider = Arc::new(MockProvider::new(vec![text_response("SMOKE_OK", 50, 10)]));
2874        let model = test_model(provider);
2875        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2876        agent.mode = AgentMode::Worker;
2877        agent.has_mana_basics_skill = true;
2878        agent.tools.retain(|_| false);
2879
2880        let events_task = tokio::spawn(collect_events(handle));
2881        let result = agent.run("Check mana status and finish".to_string()).await;
2882        drop(agent);
2883
2884        assert!(result.is_ok());
2885
2886        let events = events_task.await.unwrap();
2887        assert!(events
2888            .iter()
2889            .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
2890        assert!(!events.iter().any(|e| matches!(
2891            e,
2892            AgentEvent::Error { error } if error.contains("Max turns exceeded")
2893        )));
2894    }
2895
2896    #[tokio::test]
2897    async fn agent_emits_timing_events_in_order() {
2898        let provider = Arc::new(MockProvider::new(vec![text_response("timed", 10, 5)]));
2899        let model = test_model(provider);
2900        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2901
2902        let events_task = tokio::spawn(collect_events(handle));
2903        agent.run("time this".to_string()).await.unwrap();
2904        drop(agent);
2905
2906        let events = events_task.await.unwrap();
2907        let timings: Vec<_> = events
2908            .iter()
2909            .filter_map(|event| match event {
2910                AgentEvent::Timing { timing } => Some(timing.clone()),
2911                _ => None,
2912            })
2913            .collect();
2914
2915        assert!(timings.len() >= 7);
2916        assert_eq!(timings[0].stage, TimingStage::ContextAssemblyStart);
2917        assert_eq!(timings[1].stage, TimingStage::ContextAssemblyEnd);
2918        assert_eq!(timings[2].stage, TimingStage::LlmRequestStart);
2919        assert_eq!(timings[3].stage, TimingStage::FirstStreamEvent);
2920        assert_eq!(timings[4].stage, TimingStage::FirstTextDelta);
2921        assert!(timings
2922            .iter()
2923            .any(|timing| timing.stage == TimingStage::MessageEnd));
2924        assert!(timings
2925            .iter()
2926            .any(|timing| timing.stage == TimingStage::PostTurnAssessmentEnd));
2927
2928        for timing in timings {
2929            assert_eq!(timing.turn, 0);
2930            if let Some(since_llm_request_start_ms) = timing.since_llm_request_start_ms {
2931                assert!(timing.since_turn_start_ms >= since_llm_request_start_ms);
2932            }
2933        }
2934    }
2935
2936    #[tokio::test]
2937    async fn agent_streams_message_delta_before_message_end() {
2938        let provider = Arc::new(MockProvider::new_results(vec![vec![
2939            Ok(StreamEvent::MessageStart {
2940                model: "test-model".to_string(),
2941            }),
2942            Ok(StreamEvent::TextDelta {
2943                text: "streaming".to_string(),
2944            }),
2945            Ok(StreamEvent::MessageEnd {
2946                message: AssistantMessage {
2947                    content: vec![ContentBlock::Text {
2948                        text: "streaming".to_string(),
2949                    }],
2950                    usage: Some(Usage {
2951                        input_tokens: 10,
2952                        output_tokens: 5,
2953                        cache_read_tokens: 0,
2954                        cache_write_tokens: 0,
2955                    }),
2956                    stop_reason: LlmStopReason::EndTurn,
2957                    timestamp: 1000,
2958                },
2959            }),
2960        ]]));
2961
2962        let model = test_model(provider);
2963        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2964
2965        let events_task = tokio::spawn(collect_events(handle));
2966        agent.run("Say hi".to_string()).await.unwrap();
2967        drop(agent);
2968
2969        let events = events_task.await.unwrap();
2970        let text_delta_idx = events.iter().position(|event| {
2971            matches!(
2972                event,
2973                AgentEvent::MessageDelta {
2974                    delta: StreamEvent::TextDelta { text }
2975                } if text == "streaming"
2976            )
2977        });
2978        let turn_end_idx = events
2979            .iter()
2980            .position(|event| matches!(event, AgentEvent::TurnEnd { .. }));
2981
2982        assert!(text_delta_idx.is_some());
2983        assert!(turn_end_idx.is_some());
2984        assert!(text_delta_idx.unwrap() < turn_end_idx.unwrap());
2985    }
2986
2987    #[tokio::test]
2988    async fn agent_retries_before_first_meaningful_event_but_not_after() {
2989        let provider = Arc::new(MockProvider::new_results(vec![
2990            vec![
2991                Ok(StreamEvent::MessageStart {
2992                    model: "test-model".to_string(),
2993                }),
2994                Err(imp_llm::Error::Stream("startup failure".into())),
2995            ],
2996            vec![
2997                Ok(StreamEvent::MessageStart {
2998                    model: "test-model".to_string(),
2999                }),
3000                Ok(StreamEvent::TextDelta {
3001                    text: "recovered".to_string(),
3002                }),
3003                Ok(StreamEvent::MessageEnd {
3004                    message: AssistantMessage {
3005                        content: vec![ContentBlock::Text {
3006                            text: "recovered".to_string(),
3007                        }],
3008                        usage: Some(Usage {
3009                            input_tokens: 10,
3010                            output_tokens: 5,
3011                            cache_read_tokens: 0,
3012                            cache_write_tokens: 0,
3013                        }),
3014                        stop_reason: LlmStopReason::EndTurn,
3015                        timestamp: 1000,
3016                    },
3017                }),
3018            ],
3019        ]));
3020
3021        let model = test_model(provider);
3022        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3023
3024        let events_task = tokio::spawn(collect_events(handle));
3025        agent.run("Recover".to_string()).await.unwrap();
3026        drop(agent);
3027
3028        let events = events_task.await.unwrap();
3029        let text_delta = events.iter().position(|e| {
3030            matches!(
3031                e,
3032                AgentEvent::MessageDelta {
3033                    delta: StreamEvent::TextDelta { text }
3034                } if text == "recovered"
3035            )
3036        });
3037        let turn_end = events
3038            .iter()
3039            .position(|e| matches!(e, AgentEvent::TurnEnd { .. }));
3040
3041        assert!(text_delta.is_some());
3042        assert!(turn_end.is_some());
3043        assert!(text_delta.unwrap() < turn_end.unwrap());
3044    }
3045
3046    #[tokio::test]
3047    async fn agent_surfaces_error_after_partial_stream_without_retrying() {
3048        let provider = Arc::new(MockProvider::new_results(vec![vec![
3049            Ok(StreamEvent::TextDelta {
3050                text: "partial".to_string(),
3051            }),
3052            Err(imp_llm::Error::Stream("mid-stream failure".into())),
3053        ]]));
3054
3055        let model = test_model(provider);
3056        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3057
3058        let events_task = tokio::spawn(collect_events(handle));
3059        let result = agent.run("Fail midway".to_string()).await;
3060        drop(agent);
3061
3062        assert!(result.is_err());
3063
3064        let events = events_task.await.unwrap();
3065        let text_delta = events.iter().position(|e| {
3066            matches!(
3067                e,
3068                AgentEvent::MessageDelta {
3069                    delta: StreamEvent::TextDelta { text }
3070                } if text == "partial"
3071            )
3072        });
3073        let error_idx = events.iter().position(|e| {
3074            matches!(
3075                e,
3076                AgentEvent::Error { error }
3077                if error.contains("Provider stream failed after partial output")
3078                    && error.contains("mid-stream failure")
3079            )
3080        });
3081
3082        assert!(text_delta.is_some());
3083        assert!(error_idx.is_some());
3084        assert!(text_delta.unwrap() < error_idx.unwrap());
3085    }
3086
3087    #[tokio::test]
3088    async fn agent_treats_silent_eof_without_message_end_as_error() {
3089        let provider = Arc::new(MockProvider::new_results(vec![vec![Ok(
3090            StreamEvent::TextDelta {
3091                text: "partial".to_string(),
3092            },
3093        )]]));
3094
3095        let model = test_model(provider);
3096        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3097
3098        let events_task = tokio::spawn(collect_events(handle));
3099        let result = agent.run("Fail with silent eof".to_string()).await;
3100        drop(agent);
3101
3102        assert!(result.is_err());
3103
3104        let events = events_task.await.unwrap();
3105        let text_delta = events.iter().position(|e| {
3106            matches!(
3107                e,
3108                AgentEvent::MessageDelta {
3109                    delta: StreamEvent::TextDelta { text }
3110                } if text == "partial"
3111            )
3112        });
3113        let error_idx = events.iter().position(|e| {
3114            matches!(
3115                e,
3116                AgentEvent::Error { error }
3117                if error.contains("missing terminal completion event")
3118            )
3119        });
3120        let turn_end_idx = events
3121            .iter()
3122            .position(|e| matches!(e, AgentEvent::TurnEnd { .. }));
3123
3124        assert!(text_delta.is_some());
3125        assert!(error_idx.is_some());
3126        assert!(turn_end_idx.is_none());
3127        assert!(text_delta.unwrap() < error_idx.unwrap());
3128    }
3129
3130    // ── Test 1: Simple text response ───────────────────────────────
3131
3132    #[tokio::test]
3133    async fn agent_simple_text_response() {
3134        let provider = Arc::new(MockProvider::new(vec![text_response(
3135            "Hello, world!",
3136            100,
3137            20,
3138        )]));
3139
3140        let model = test_model(provider);
3141        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3142
3143        let events_task = tokio::spawn(collect_events(handle));
3144        agent.run("Say hello".to_string()).await.unwrap();
3145        drop(agent); // close event channel
3146
3147        let events = events_task.await.unwrap();
3148
3149        // Verify event order: AgentStart → TurnStart → deltas → TurnEnd → AgentEnd
3150        assert!(matches!(events[0], AgentEvent::AgentStart { .. }));
3151
3152        let turn_start = events
3153            .iter()
3154            .position(|e| matches!(e, AgentEvent::TurnStart { index: 0 }));
3155        assert!(turn_start.is_some());
3156
3157        let turn_end = events
3158            .iter()
3159            .position(|e| matches!(e, AgentEvent::TurnEnd { index: 0, .. }));
3160        assert!(turn_end.is_some());
3161        assert!(turn_end.unwrap() > turn_start.unwrap());
3162
3163        let agent_end = events
3164            .iter()
3165            .position(|e| matches!(e, AgentEvent::AgentEnd { .. }));
3166        assert!(agent_end.is_some());
3167        assert!(agent_end.unwrap() > turn_end.unwrap());
3168
3169        // Verify usage
3170        if let AgentEvent::AgentEnd { usage, cost, .. } = &events[agent_end.unwrap()] {
3171            assert_eq!(usage.input_tokens, 100);
3172            assert_eq!(usage.output_tokens, 20);
3173            assert!(cost.total > 0.0);
3174        } else {
3175            panic!("Expected AgentEnd");
3176        }
3177
3178        // Only one turn (no tool calls)
3179        let turn_starts: Vec<_> = events
3180            .iter()
3181            .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3182            .collect();
3183        assert_eq!(turn_starts.len(), 1);
3184    }
3185
3186    // ── Test 2: Single tool call → result → text response ──────────
3187
3188    #[tokio::test]
3189    async fn agent_single_tool_call() {
3190        let provider = Arc::new(MockProvider::new(vec![
3191            // Turn 0: model calls echo tool
3192            tool_call_response(
3193                "call_1",
3194                "echo",
3195                serde_json::json!({"text": "hello"}),
3196                100,
3197                30,
3198            ),
3199            // Turn 1: model responds with text after seeing tool result
3200            text_response("The echo said: hello", 200, 25),
3201        ]));
3202
3203        let model = test_model(provider);
3204        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3205        agent.tools.register(Arc::new(EchoTool));
3206
3207        let events_task = tokio::spawn(collect_events(handle));
3208        agent.run("Echo hello".to_string()).await.unwrap();
3209        drop(agent);
3210
3211        let events = events_task.await.unwrap();
3212
3213        // Should have 2 TurnStart events (turn 0 with tool, turn 1 with text)
3214        let turn_starts: Vec<_> = events
3215            .iter()
3216            .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3217            .collect();
3218        assert_eq!(turn_starts.len(), 2);
3219
3220        // Should have tool execution events
3221        let tool_starts: Vec<_> = events
3222            .iter()
3223            .filter(|e| matches!(e, AgentEvent::ToolExecutionStart { .. }))
3224            .collect();
3225        assert_eq!(tool_starts.len(), 1);
3226
3227        let tool_ends: Vec<_> = events
3228            .iter()
3229            .filter(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }))
3230            .collect();
3231        assert_eq!(tool_ends.len(), 1);
3232
3233        // Verify accumulated usage across turns (100 + 200 input, 30 + 25 output)
3234        if let Some(AgentEvent::AgentEnd { usage, .. }) = events
3235            .iter()
3236            .find(|e| matches!(e, AgentEvent::AgentEnd { .. }))
3237        {
3238            assert_eq!(usage.input_tokens, 300);
3239            assert_eq!(usage.output_tokens, 55);
3240        } else {
3241            panic!("Expected AgentEnd");
3242        }
3243    }
3244
3245    // ── Test 3: Multiple tool calls → follow-up tool calls → done ──
3246
3247    #[tokio::test]
3248    async fn agent_multiple_tool_calls() {
3249        let provider = Arc::new(MockProvider::new(vec![
3250            // Turn 0: model calls echo twice
3251            multi_tool_call_response(
3252                &[
3253                    ("call_1", "echo", serde_json::json!({"text": "first"})),
3254                    ("call_2", "echo", serde_json::json!({"text": "second"})),
3255                ],
3256                100,
3257                40,
3258            ),
3259            // Turn 1: model calls echo once more
3260            tool_call_response(
3261                "call_3",
3262                "echo",
3263                serde_json::json!({"text": "third"}),
3264                200,
3265                20,
3266            ),
3267            // Turn 2: model responds with final text
3268            text_response("All done!", 300, 10),
3269        ]));
3270
3271        let model = test_model(provider);
3272        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3273        agent.tools.register(Arc::new(EchoTool));
3274
3275        let events_task = tokio::spawn(collect_events(handle));
3276        agent.run("Echo three things".to_string()).await.unwrap();
3277        drop(agent);
3278
3279        let events = events_task.await.unwrap();
3280
3281        // 3 turns
3282        let turn_starts: Vec<_> = events
3283            .iter()
3284            .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3285            .collect();
3286        assert_eq!(turn_starts.len(), 3);
3287
3288        // 3 tool executions total
3289        let tool_starts: Vec<_> = events
3290            .iter()
3291            .filter(|e| matches!(e, AgentEvent::ToolExecutionStart { .. }))
3292            .collect();
3293        assert_eq!(tool_starts.len(), 3);
3294
3295        // Total usage: 100+200+300=600 input, 40+20+10=70 output
3296        if let Some(AgentEvent::AgentEnd { usage, .. }) = events
3297            .iter()
3298            .find(|e| matches!(e, AgentEvent::AgentEnd { .. }))
3299        {
3300            assert_eq!(usage.input_tokens, 600);
3301            assert_eq!(usage.output_tokens, 70);
3302        } else {
3303            panic!("Expected AgentEnd");
3304        }
3305    }
3306
3307    // ── Test 4: Cancel command mid-run ─────────────────────────────
3308
3309    #[tokio::test]
3310    async fn execution_stops_after_failed_verify_tool_result_without_blocked_text() {
3311        let provider = Arc::new(MockProvider::new(vec![
3312            tool_call_response(
3313                "call_verify",
3314                "mana",
3315                serde_json::json!({"action": "verify", "id": "1"}),
3316                100,
3317                20,
3318            ),
3319            text_response("Verify failed.", 120, 20),
3320        ]));
3321
3322        let model = test_model(provider);
3323        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3324        agent.mode = AgentMode::Full;
3325        agent
3326            .tools
3327            .register(Arc::new(crate::tools::mana::ManaTool::default()));
3328
3329        agent.run("Verify the unit".to_string()).await.unwrap();
3330
3331        let user_texts: Vec<String> = agent
3332            .messages
3333            .iter()
3334            .filter_map(|message| match message {
3335                Message::User(user) => user.content.iter().find_map(|block| match block {
3336                    ContentBlock::Text { text } => Some(text.clone()),
3337                    _ => None,
3338                }),
3339                _ => None,
3340            })
3341            .collect();
3342
3343        assert_eq!(user_texts, vec!["Verify the unit".to_string()]);
3344    }
3345
3346    #[tokio::test]
3347    async fn execution_stops_after_mana_close_tool_result_without_done_text() {
3348        let provider = Arc::new(MockProvider::new(vec![
3349            tool_call_response(
3350                "call_close",
3351                "mana",
3352                serde_json::json!({"action": "close", "id": "1"}),
3353                100,
3354                20,
3355            ),
3356            text_response("Unit closed.", 120, 20),
3357        ]));
3358
3359        let model = test_model(provider);
3360        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3361        agent.mode = AgentMode::Full;
3362        agent
3363            .tools
3364            .register(Arc::new(crate::tools::mana::ManaTool::default()));
3365
3366        agent.run("Close the unit".to_string()).await.unwrap();
3367
3368        let user_texts: Vec<String> = agent
3369            .messages
3370            .iter()
3371            .filter_map(|message| match message {
3372                Message::User(user) => user.content.iter().find_map(|block| match block {
3373                    ContentBlock::Text { text } => Some(text.clone()),
3374                    _ => None,
3375                }),
3376                _ => None,
3377            })
3378            .collect();
3379
3380        assert_eq!(user_texts, vec!["Close the unit".to_string()]);
3381    }
3382
3383    #[tokio::test]
3384    async fn execution_stops_after_work_completed_text() {
3385        let provider = Arc::new(MockProvider::new(vec![text_response(
3386            "All done! Implemented the change and finished the task.",
3387            100,
3388            20,
3389        )]));
3390
3391        let model = test_model(provider);
3392        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3393        agent.mode = AgentMode::Full;
3394
3395        agent.run("Implement the change".to_string()).await.unwrap();
3396
3397        let user_texts: Vec<String> = agent
3398            .messages
3399            .iter()
3400            .filter_map(|message| match message {
3401                Message::User(user) => user.content.iter().find_map(|block| match block {
3402                    ContentBlock::Text { text } => Some(text.clone()),
3403                    _ => None,
3404                }),
3405                _ => None,
3406            })
3407            .collect();
3408
3409        assert_eq!(user_texts, vec!["Implement the change".to_string()]);
3410    }
3411
3412    #[tokio::test]
3413    async fn execution_stops_for_user_blocker_text() {
3414        let provider = Arc::new(MockProvider::new(vec![text_response(
3415            "Blocked: I need your input on which path to take before continuing.",
3416            100,
3417            20,
3418        )]));
3419
3420        let model = test_model(provider);
3421        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3422        agent.mode = AgentMode::Full;
3423
3424        agent.run("Implement the change".to_string()).await.unwrap();
3425
3426        let user_texts: Vec<String> = agent
3427            .messages
3428            .iter()
3429            .filter_map(|message| match message {
3430                Message::User(user) => user.content.iter().find_map(|block| match block {
3431                    ContentBlock::Text { text } => Some(text.clone()),
3432                    _ => None,
3433                }),
3434                _ => None,
3435            })
3436            .collect();
3437
3438        assert_eq!(user_texts, vec!["Implement the change".to_string()]);
3439    }
3440
3441    #[tokio::test]
3442    async fn agent_follow_up_runs_after_current_work_finishes() {
3443        let provider = Arc::new(MockProvider::new(vec![
3444            tool_call_response(
3445                "call_1",
3446                "echo",
3447                serde_json::json!({"text": "hello"}),
3448                100,
3449                20,
3450            ),
3451            text_response("Handled follow-up", 120, 25),
3452        ]));
3453
3454        let model = test_model(provider);
3455        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3456        agent.tools.register(Arc::new(EchoTool));
3457
3458        handle
3459            .command_tx
3460            .send(AgentCommand::FollowUp("What next?".into()))
3461            .await
3462            .unwrap();
3463
3464        let events_task = tokio::spawn(collect_events(handle));
3465        agent.run("Do the first thing".to_string()).await.unwrap();
3466        drop(agent);
3467
3468        let events = events_task.await.unwrap();
3469        let turn_starts: Vec<_> = events
3470            .iter()
3471            .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3472            .collect();
3473        assert_eq!(turn_starts.len(), 2);
3474    }
3475
3476    #[tokio::test]
3477    async fn agent_follow_up_preserves_order_with_multiple_messages() {
3478        let provider = Arc::new(MockProvider::new(vec![
3479            tool_call_response(
3480                "call_1",
3481                "echo",
3482                serde_json::json!({"text": "hello"}),
3483                100,
3484                20,
3485            ),
3486            text_response("First follow-up handled", 120, 25),
3487            text_response("Second follow-up handled", 130, 30),
3488        ]));
3489
3490        let model = test_model(provider);
3491        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3492        agent.tools.register(Arc::new(EchoTool));
3493
3494        handle
3495            .command_tx
3496            .send(AgentCommand::FollowUp("follow up one".into()))
3497            .await
3498            .unwrap();
3499        handle
3500            .command_tx
3501            .send(AgentCommand::FollowUp("follow up two".into()))
3502            .await
3503            .unwrap();
3504
3505        agent.run("Do the first thing".to_string()).await.unwrap();
3506
3507        let user_texts: Vec<String> = agent
3508            .messages
3509            .iter()
3510            .filter_map(|message| match message {
3511                Message::User(user) => user.content.iter().find_map(|block| match block {
3512                    ContentBlock::Text { text } => Some(text.clone()),
3513                    _ => None,
3514                }),
3515                _ => None,
3516            })
3517            .collect();
3518
3519        assert_eq!(
3520            user_texts,
3521            vec![
3522                "Do the first thing".to_string(),
3523                "follow up one".to_string(),
3524                "follow up two".to_string()
3525            ]
3526        );
3527    }
3528
3529    #[tokio::test]
3530    async fn agent_cancel_still_wins_over_follow_up_queue() {
3531        let provider = Arc::new(MockProvider::new(vec![tool_call_response(
3532            "call_1",
3533            "echo",
3534            serde_json::json!({"text": "hello"}),
3535            100,
3536            20,
3537        )]));
3538
3539        let model = test_model(provider);
3540        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3541        agent.tools.register(Arc::new(EchoTool));
3542
3543        handle
3544            .command_tx
3545            .send(AgentCommand::FollowUp("queued later".into()))
3546            .await
3547            .unwrap();
3548        handle.command_tx.send(AgentCommand::Cancel).await.unwrap();
3549
3550        let result = agent.run("Do something".to_string()).await;
3551        assert!(matches!(result, Err(crate::error::Error::Cancelled)));
3552    }
3553
3554    #[test]
3555    fn mana_bash_equivalent_hint_handles_release_and_tree() {
3556        assert!(mana_bash_equivalent_hint("mana release 1").is_some());
3557        assert!(mana_bash_equivalent_hint("mana tree").is_some());
3558    }
3559
3560    #[test]
3561    fn mana_bash_equivalent_hint_ignores_non_mana_prefixes() {
3562        assert!(mana_bash_equivalent_hint("manatee status").is_none());
3563        assert!(mana_bash_equivalent_hint("./mana status").is_none());
3564    }
3565
3566    #[tokio::test]
3567    async fn agent_blocks_bash_mana_when_native_action_exists() {
3568        let provider = Arc::new(MockProvider::new(vec![
3569            tool_call_response(
3570                "call_1",
3571                "bash",
3572                serde_json::json!({"command": "mana status", "timeout": 5}),
3573                100,
3574                20,
3575            ),
3576            text_response("Recovered after native-mana hint", 120, 25),
3577        ]));
3578
3579        let model = test_model(provider);
3580        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3581        agent.tools.register(Arc::new(crate::tools::bash::BashTool));
3582
3583        let events_task = tokio::spawn(collect_events(handle));
3584        agent.run("Check mana state".to_string()).await.unwrap();
3585        drop(agent);
3586
3587        let events = events_task.await.unwrap();
3588        let tool_end = events.iter().find_map(|e| match e {
3589            AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
3590            _ => None,
3591        });
3592        let tool_end = tool_end.expect("expected ToolExecutionEnd");
3593        assert!(tool_end.is_error);
3594        let text = tool_end
3595            .content
3596            .iter()
3597            .find_map(|b| match b {
3598                ContentBlock::Text { text } => Some(text.as_str()),
3599                _ => None,
3600            })
3601            .unwrap_or("");
3602        assert!(text.contains("Use the native mana tool"));
3603    }
3604
3605    #[tokio::test]
3606    async fn agent_allows_non_mana_bash_commands() {
3607        let provider = Arc::new(MockProvider::new(vec![
3608            tool_call_response(
3609                "call_1",
3610                "bash",
3611                serde_json::json!({"command": "printf 'ok'", "timeout": 5}),
3612                100,
3613                20,
3614            ),
3615            text_response("done", 120, 25),
3616        ]));
3617
3618        let model = test_model(provider);
3619        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3620        agent.tools.register(Arc::new(crate::tools::bash::BashTool));
3621
3622        agent.run("Run a shell command".to_string()).await.unwrap();
3623
3624        let tool_result = agent
3625            .messages
3626            .iter()
3627            .find_map(|message| match message {
3628                Message::ToolResult(result) => Some(result),
3629                _ => None,
3630            })
3631            .expect("expected tool result");
3632        assert!(!tool_result.is_error);
3633    }
3634
3635    #[tokio::test]
3636    async fn agent_cancel_mid_run() {
3637        let provider = Arc::new(MockProvider::new(vec![
3638            // Turn 0: tool call (agent will process this, then see Cancel before turn 1)
3639            tool_call_response(
3640                "call_1",
3641                "echo",
3642                serde_json::json!({"text": "hello"}),
3643                100,
3644                20,
3645            ),
3646            // Turn 1: this should never be reached
3647            text_response("Should not see this", 100, 20),
3648        ]));
3649
3650        let model = test_model(provider);
3651        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3652        agent.tools.register(Arc::new(EchoTool));
3653
3654        // Send cancel before the second turn
3655        handle.command_tx.send(AgentCommand::Cancel).await.unwrap();
3656
3657        let events_task = tokio::spawn(collect_events(handle));
3658        let result = agent.run("Do something".to_string()).await;
3659        drop(agent);
3660
3661        // Should return Cancelled error
3662        assert!(matches!(result, Err(crate::error::Error::Cancelled)));
3663
3664        let events = events_task.await.unwrap();
3665
3666        // Should have AgentEnd
3667        assert!(events
3668            .iter()
3669            .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
3670
3671        // Should NOT have a second turn
3672        let turn_starts: Vec<_> = events
3673            .iter()
3674            .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3675            .collect();
3676        assert!(turn_starts.len() <= 1);
3677    }
3678
3679    #[tokio::test]
3680    async fn single_text_turn_exits_cleanly() {
3681        let provider = Arc::new(MockProvider::new(vec![text_response("SMOKE_OK", 50, 10)]));
3682        let model = test_model(provider);
3683        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3684
3685        let events_task = tokio::spawn(collect_events(handle));
3686        let result = agent.run("Reply once and stop".to_string()).await;
3687        drop(agent);
3688
3689        assert!(result.is_ok());
3690
3691        let events = events_task.await.unwrap();
3692        assert!(events
3693            .iter()
3694            .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
3695        assert!(!events.iter().any(|e| matches!(
3696            e,
3697            AgentEvent::Error { error } if error.contains("Max turns exceeded")
3698        )));
3699    }
3700
3701    // ── Test 6: Unknown tool → error result → model self-corrects ──
3702
3703    #[tokio::test]
3704    async fn agent_unknown_tool_self_corrects() {
3705        let provider = Arc::new(MockProvider::new(vec![
3706            // Turn 0: model calls a tool that doesn't exist
3707            tool_call_response(
3708                "call_1",
3709                "nonexistent",
3710                serde_json::json!({"foo": "bar"}),
3711                100,
3712                20,
3713            ),
3714            // Turn 1: model self-corrects and responds with text
3715            text_response("Sorry, I used the wrong tool. Here's the answer.", 200, 30),
3716        ]));
3717
3718        let model = test_model(provider);
3719        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3720        // Deliberately NOT registering the "nonexistent" tool
3721
3722        let events_task = tokio::spawn(collect_events(handle));
3723        agent.run("Do something".to_string()).await.unwrap();
3724        drop(agent);
3725
3726        let events = events_task.await.unwrap();
3727
3728        // The tool execution should produce an error result
3729        let tool_end = events
3730            .iter()
3731            .find(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }));
3732        assert!(tool_end.is_some());
3733        if let Some(AgentEvent::ToolExecutionEnd { result, .. }) = tool_end {
3734            assert!(result.is_error);
3735            let text = result.content.iter().find_map(|c| {
3736                if let ContentBlock::Text { text } = c {
3737                    Some(text.as_str())
3738                } else {
3739                    None
3740                }
3741            });
3742            assert!(text.unwrap().contains("Unknown tool"));
3743        }
3744
3745        // Model should have self-corrected in turn 1
3746        let turn_starts: Vec<_> = events
3747            .iter()
3748            .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3749            .collect();
3750        assert_eq!(turn_starts.len(), 2);
3751
3752        // Should complete successfully
3753        assert!(events
3754            .iter()
3755            .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
3756    }
3757
3758    #[tokio::test]
3759    async fn agent_concurrent_readonly() {
3760        let shared = Arc::new(ConcurrentReadonlyState::new(3));
3761        let provider = Arc::new(MockProvider::new(vec![
3762            multi_tool_call_response(
3763                &[
3764                    ("call_ro_1", "echo_a", serde_json::json!({"text": "first"})),
3765                    (
3766                        "call_write",
3767                        "write_after_reads",
3768                        serde_json::json!({"data": "mutate"}),
3769                    ),
3770                    ("call_ro_2", "echo_b", serde_json::json!({"text": "second"})),
3771                    ("call_ro_3", "echo_c", serde_json::json!({"text": "third"})),
3772                ],
3773                100,
3774                40,
3775            ),
3776            text_response("All tools finished", 150, 20),
3777        ]));
3778
3779        let model = test_model(provider);
3780        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3781        drop(handle);
3782
3783        agent.tools.register(Arc::new(CoordinatedReadonlyTool {
3784            name: "echo_a",
3785            shared: shared.clone(),
3786        }));
3787        agent.tools.register(Arc::new(CoordinatedReadonlyTool {
3788            name: "echo_b",
3789            shared: shared.clone(),
3790        }));
3791        agent.tools.register(Arc::new(CoordinatedReadonlyTool {
3792            name: "echo_c",
3793            shared: shared.clone(),
3794        }));
3795        agent.tools.register(Arc::new(CoordinatedMutableTool {
3796            shared: shared.clone(),
3797        }));
3798
3799        tokio::time::timeout(
3800            Duration::from_millis(250),
3801            agent.run("Run all tools".to_string()),
3802        )
3803        .await
3804        .expect("read-only tools should not block each other")
3805        .expect("agent should complete successfully");
3806
3807        let tool_result_ids: Vec<_> = agent
3808            .messages
3809            .iter()
3810            .filter_map(|message| match message {
3811                Message::ToolResult(result) => Some(result.tool_call_id.as_str()),
3812                _ => None,
3813            })
3814            .collect();
3815        assert_eq!(
3816            tool_result_ids,
3817            vec!["call_ro_1", "call_write", "call_ro_2", "call_ro_3"]
3818        );
3819
3820        assert_eq!(shared.readonly_started.load(Ordering::SeqCst), 3);
3821        assert_eq!(shared.readonly_finished.load(Ordering::SeqCst), 3);
3822        assert_eq!(shared.mutable_observed_finished.load(Ordering::SeqCst), 3);
3823
3824        let log = shared.log.lock().expect("concurrent log lock").clone();
3825        assert_eq!(
3826            log.last().map(String::as_str),
3827            Some("write_after_reads:start")
3828        );
3829    }
3830
3831    // ── Event ordering validation ──────────────────────────────────
3832
3833    #[tokio::test]
3834    async fn agent_event_ordering() {
3835        let provider = Arc::new(MockProvider::new(vec![
3836            tool_call_response(
3837                "call_1",
3838                "echo",
3839                serde_json::json!({"text": "hello"}),
3840                50,
3841                10,
3842            ),
3843            text_response("Done", 50, 10),
3844        ]));
3845
3846        let model = test_model(provider);
3847        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3848        agent.tools.register(Arc::new(EchoTool));
3849
3850        let events_task = tokio::spawn(collect_events(handle));
3851        agent.run("test".to_string()).await.unwrap();
3852        drop(agent);
3853
3854        let events = events_task.await.unwrap();
3855
3856        // Extract event types in order
3857        let types: Vec<&str> = events
3858            .iter()
3859            .map(|e| match e {
3860                AgentEvent::AgentStart { .. } => "AgentStart",
3861                AgentEvent::AgentEnd { .. } => "AgentEnd",
3862                AgentEvent::TurnStart { .. } => "TurnStart",
3863                AgentEvent::TurnEnd { .. } => "TurnEnd",
3864                AgentEvent::MessageDelta { .. } => "MessageDelta",
3865                AgentEvent::ToolExecutionStart { .. } => "ToolExecStart",
3866                AgentEvent::ToolExecutionEnd { .. } => "ToolExecEnd",
3867                AgentEvent::Warning { .. } => "Warning",
3868                AgentEvent::EvidenceWritten { .. } => "EvidenceWritten",
3869                AgentEvent::VerificationStarted { .. } => "VerificationStarted",
3870                AgentEvent::VerificationCompleted { .. } => "VerificationCompleted",
3871                AgentEvent::PolicyChecked { .. } => "PolicyChecked",
3872                AgentEvent::Error { .. } => "Error",
3873                _ => "Other",
3874            })
3875            .collect();
3876
3877        // Must start with AgentStart
3878        assert_eq!(types[0], "AgentStart");
3879
3880        // Must end with AgentEnd
3881        assert_eq!(types[types.len() - 1], "AgentEnd");
3882
3883        // TurnStart must come before TurnEnd for each turn
3884        let mut turn_start_indices: Vec<usize> = Vec::new();
3885        let mut turn_end_indices: Vec<usize> = Vec::new();
3886        for (i, t) in types.iter().enumerate() {
3887            if *t == "TurnStart" {
3888                turn_start_indices.push(i);
3889            }
3890            if *t == "TurnEnd" {
3891                turn_end_indices.push(i);
3892            }
3893        }
3894        assert_eq!(turn_start_indices.len(), 2);
3895        assert_eq!(turn_end_indices.len(), 2);
3896        for i in 0..turn_start_indices.len() {
3897            assert!(turn_start_indices[i] < turn_end_indices[i]);
3898        }
3899
3900        // ToolExecStart must come before ToolExecEnd
3901        let tool_start = types.iter().position(|t| *t == "ToolExecStart");
3902        let tool_end = types.iter().position(|t| *t == "ToolExecEnd");
3903        assert!(tool_start.is_some());
3904        assert!(tool_end.is_some());
3905        assert!(tool_start.unwrap() < tool_end.unwrap());
3906    }
3907
3908    #[tokio::test]
3909    async fn agent_fires_hooks() {
3910        let provider = Arc::new(MockProvider::new(vec![text_response("hooked", 100, 20)]));
3911        let model = test_model(provider);
3912        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3913        drop(handle);
3914
3915        let hook_calls = Arc::new(AtomicUsize::new(0));
3916        let hook_calls_for_callback = hook_calls.clone();
3917        agent.hooks.register(crate::hooks::HookDefinition {
3918            event: "before_llm_call".to_string(),
3919            match_pattern: None,
3920            action: crate::hooks::HookAction::Callback(Arc::new(move |_event| {
3921                hook_calls_for_callback.fetch_add(1, Ordering::SeqCst);
3922                crate::hooks::HookResult::default()
3923            })),
3924            blocking: true,
3925            threshold: None,
3926        });
3927
3928        agent.run("Run once".to_string()).await.unwrap();
3929
3930        assert_eq!(hook_calls.load(Ordering::SeqCst), 1);
3931    }
3932
3933    #[tokio::test]
3934    async fn agent_context_masking() {
3935        let provider = Arc::new(MockProvider::new(vec![text_response("done", 100, 20)]));
3936
3937        let mut seeded_messages = Vec::new();
3938        for index in 0..12 {
3939            let call_id = format!("call_{index}");
3940            seeded_messages.push(make_assistant_tool_call(
3941                &call_id,
3942                "read",
3943                serde_json::json!({"path": format!("src/file_{index}.rs")}),
3944            ));
3945            seeded_messages.push(make_tool_result(&call_id, "read", &"x".repeat(400)));
3946        }
3947
3948        let mut usage_messages = seeded_messages.clone();
3949        usage_messages.push(Message::user("trigger masking"));
3950        let provisional_model = test_model(provider.clone());
3951        let usage = crate::context::context_usage(&usage_messages, &provisional_model);
3952        let context_window = ((usage.used as f64) / 0.7).ceil() as u32;
3953
3954        let model = test_model_with_context_window(provider, context_window.max(1));
3955        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3956        drop(handle);
3957        agent.messages = seeded_messages;
3958
3959        agent.run("trigger masking".to_string()).await.unwrap();
3960
3961        let masked = tool_result_text(&agent.messages[1]).expect("first tool result text");
3962        assert!(masked.starts_with("[Output omitted"));
3963
3964        let recent_index = (10 * 2) + 1;
3965        let recent =
3966            tool_result_text(&agent.messages[recent_index]).expect("recent tool result text");
3967        let expected_recent = "x".repeat(400);
3968        assert_eq!(recent, expected_recent.as_str());
3969    }
3970
3971    #[tokio::test]
3972    async fn agent_masks_observations_when_context_is_tight() {
3973        let provider = Arc::new(MockProvider::new(vec![text_response("done", 100, 20)]));
3974
3975        let mut seeded_messages = Vec::new();
3976        for index in 0..12 {
3977            let call_id = format!("call_{index}");
3978            seeded_messages.push(make_assistant_tool_call(
3979                &call_id,
3980                "read",
3981                serde_json::json!({"path": format!("src/file_{index}.rs")}),
3982            ));
3983            seeded_messages.push(make_tool_result(&call_id, "read", &"x".repeat(400)));
3984        }
3985
3986        let mut usage_messages = seeded_messages.clone();
3987        usage_messages.push(Message::user("trigger masking"));
3988        let provisional_model = test_model(provider.clone());
3989        let usage_before = crate::context::context_usage(&usage_messages, &provisional_model);
3990
3991        let mut masked_messages = usage_messages.clone();
3992        crate::context::mask_observations(&mut masked_messages, 10);
3993        let usage_after = crate::context::context_usage(&masked_messages, &provisional_model);
3994
3995        assert!(usage_before.used > usage_after.used);
3996
3997        // Pick a window where masking definitely triggers.
3998        let context_window = ((usage_before.used as f64) / 0.7).ceil() as u32;
3999
4000        let model = test_model_with_context_window(provider, context_window.max(1));
4001        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4002        let events_task = tokio::spawn(collect_events(handle));
4003        agent.messages = seeded_messages;
4004
4005        agent.run("trigger masking".to_string()).await.unwrap();
4006        drop(agent);
4007
4008        let events = events_task.await.unwrap();
4009
4010        assert!(
4011            events
4012                .iter()
4013                .any(|e| matches!(e, AgentEvent::TurnStart { index: 0 })),
4014            "agent should still run normally"
4015        );
4016    }
4017
4018    // ── Usage/cost accumulation ────────────────────────────────────
4019
4020    #[tokio::test]
4021    async fn agent_usage_cost_accumulation() {
4022        let provider = Arc::new(MockProvider::new(vec![
4023            tool_call_response(
4024                "call_1",
4025                "echo",
4026                serde_json::json!({"text": "a"}),
4027                1_000_000, // 1M input tokens
4028                500_000,   // 500k output tokens
4029            ),
4030            text_response("done", 1_000_000, 500_000),
4031        ]));
4032
4033        let model = test_model(provider);
4034        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4035        agent.tools.register(Arc::new(EchoTool));
4036
4037        let events_task = tokio::spawn(collect_events(handle));
4038        agent.run("test".to_string()).await.unwrap();
4039        drop(agent);
4040
4041        let events = events_task.await.unwrap();
4042
4043        if let Some(AgentEvent::AgentEnd { usage, cost, .. }) = events
4044            .iter()
4045            .find(|e| matches!(e, AgentEvent::AgentEnd { .. }))
4046        {
4047            // 2M input, 1M output
4048            assert_eq!(usage.input_tokens, 2_000_000);
4049            assert_eq!(usage.output_tokens, 1_000_000);
4050
4051            // Cost: 2M * $3/Mtok input = $6, 1M * $15/Mtok output = $15, total = $21
4052            assert!((cost.input - 6.0).abs() < 1e-10);
4053            assert!((cost.output - 15.0).abs() < 1e-10);
4054            assert!((cost.total - 21.0).abs() < 1e-10);
4055        } else {
4056            panic!("Expected AgentEnd");
4057        }
4058    }
4059
4060    // ── Retry policy tests ─────────────────────────────────────────
4061
4062    /// A mock provider that returns a fixed sequence of results. Each call to
4063    /// `stream()` returns the next item: an `Err` for errors, or a pre-built
4064    /// event sequence for success.
4065    struct RetryMockProvider {
4066        calls: Mutex<Vec<std::result::Result<Vec<StreamEvent>, imp_llm::Error>>>,
4067    }
4068
4069    impl RetryMockProvider {
4070        fn new(calls: Vec<std::result::Result<Vec<StreamEvent>, imp_llm::Error>>) -> Self {
4071            Self {
4072                calls: Mutex::new(calls),
4073            }
4074        }
4075    }
4076
4077    #[async_trait]
4078    impl Provider for RetryMockProvider {
4079        fn stream(
4080            &self,
4081            _model: &Model,
4082            _context: Context,
4083            _options: RequestOptions,
4084            _api_key: &str,
4085        ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
4086            let mut calls = self.calls.try_lock().expect("RetryMockProvider lock");
4087            let outcome = if calls.is_empty() {
4088                Ok(vec![StreamEvent::Error {
4089                    error: "No more mock responses".to_string(),
4090                }])
4091            } else {
4092                calls.remove(0)
4093            };
4094            match outcome {
4095                Ok(events) => Box::pin(futures::stream::iter(
4096                    events.into_iter().map(imp_llm::Result::Ok),
4097                )),
4098                Err(e) => Box::pin(futures::stream::once(async move {
4099                    imp_llm::Result::<StreamEvent>::Err(e)
4100                })),
4101            }
4102        }
4103
4104        async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4105            Ok("mock-key".to_string())
4106        }
4107
4108        fn id(&self) -> &str {
4109            "retry-mock"
4110        }
4111
4112        fn models(&self) -> &[ModelMeta] {
4113            &[]
4114        }
4115    }
4116
4117    /// Provider that fails N times with a rate-limit error, then succeeds.
4118    #[tokio::test]
4119    async fn retry_succeeds_after_transient_failures() {
4120        use imp_llm::provider::RetryPolicy;
4121
4122        let provider = Arc::new(RetryMockProvider::new(vec![
4123            // First two calls fail with a rate-limit error
4124            Err(imp_llm::Error::RateLimited {
4125                retry_after_secs: Some(0),
4126            }),
4127            Err(imp_llm::Error::RateLimited {
4128                retry_after_secs: Some(0),
4129            }),
4130            // Third call succeeds
4131            Ok(text_response("Hello after retries", 100, 20)),
4132        ]));
4133
4134        let model = test_model(provider);
4135        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4136        // Zero delays so the test runs fast
4137        agent.retry_policy = RetryPolicy {
4138            max_retries: 3,
4139            base_delay: std::time::Duration::from_millis(0),
4140            max_delay: std::time::Duration::from_secs(30),
4141            retry_on: vec![],
4142        };
4143
4144        let events_task = tokio::spawn(collect_events(handle));
4145        agent.run("Say hello".to_string()).await.unwrap();
4146        drop(agent);
4147
4148        let events = events_task.await.unwrap();
4149
4150        // Agent should have completed successfully
4151        assert!(events
4152            .iter()
4153            .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
4154
4155        // The final text should be present in TurnEnd
4156        let turn_end = events.iter().find_map(|e| match e {
4157            AgentEvent::TurnEnd { message, .. } => Some(message),
4158            _ => None,
4159        });
4160        assert!(turn_end.is_some());
4161        let content_text = turn_end
4162            .unwrap()
4163            .content
4164            .iter()
4165            .find_map(|b| match b {
4166                ContentBlock::Text { text } => Some(text.as_str()),
4167                _ => None,
4168            })
4169            .unwrap_or("");
4170        assert!(
4171            content_text.contains("Hello after retries"),
4172            "expected final text, got: {content_text}"
4173        );
4174    }
4175
4176    /// When max_retries is exhausted the agent returns an error.
4177    #[tokio::test]
4178    async fn retry_fails_when_max_retries_exhausted() {
4179        use imp_llm::provider::RetryPolicy;
4180
4181        let provider = Arc::new(RetryMockProvider::new(vec![
4182            Err(imp_llm::Error::RateLimited {
4183                retry_after_secs: Some(0),
4184            }),
4185            Err(imp_llm::Error::RateLimited {
4186                retry_after_secs: Some(0),
4187            }),
4188            Err(imp_llm::Error::RateLimited {
4189                retry_after_secs: Some(0),
4190            }),
4191            Err(imp_llm::Error::RateLimited {
4192                retry_after_secs: Some(0),
4193            }),
4194        ]));
4195
4196        let model = test_model(provider);
4197        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4198        agent.retry_policy = RetryPolicy {
4199            max_retries: 2, // only 2 retries allowed
4200            base_delay: std::time::Duration::from_millis(0),
4201            max_delay: std::time::Duration::from_secs(30),
4202            retry_on: vec![],
4203        };
4204        drop(handle);
4205
4206        let result = agent.run("Fail".to_string()).await;
4207        assert!(
4208            result.is_err(),
4209            "should have failed after exhausting retries"
4210        );
4211    }
4212
4213    /// Auth errors (HTTP 401/403) must NOT be retried.
4214    #[tokio::test]
4215    async fn retry_does_not_retry_auth_errors() {
4216        use imp_llm::provider::RetryPolicy;
4217        use std::sync::atomic::{AtomicUsize, Ordering};
4218
4219        let call_count = Arc::new(AtomicUsize::new(0));
4220        let call_count_clone = call_count.clone();
4221
4222        struct CountingAuthFailProvider {
4223            calls: AtomicUsize,
4224            success_after: usize,
4225        }
4226
4227        #[async_trait]
4228        impl Provider for CountingAuthFailProvider {
4229            fn stream(
4230                &self,
4231                _model: &Model,
4232                _context: Context,
4233                _options: RequestOptions,
4234                _api_key: &str,
4235            ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
4236                let n = self.calls.fetch_add(1, Ordering::SeqCst);
4237                if n < self.success_after {
4238                    Box::pin(futures::stream::once(async {
4239                        Err(imp_llm::Error::Auth("Invalid API key".to_string()))
4240                    }))
4241                } else {
4242                    Box::pin(futures::stream::iter(
4243                        text_response("ok", 10, 5).into_iter().map(Ok),
4244                    ))
4245                }
4246            }
4247
4248            async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4249                Ok("mock-key".to_string())
4250            }
4251
4252            fn id(&self) -> &str {
4253                "auth-fail-mock"
4254            }
4255
4256            fn models(&self) -> &[ModelMeta] {
4257                &[]
4258            }
4259        }
4260
4261        let _ = call_count_clone; // silence unused warning
4262
4263        let provider = Arc::new(CountingAuthFailProvider {
4264            calls: AtomicUsize::new(0),
4265            success_after: 999, // would succeed eventually, but we expect no retry
4266        });
4267        let call_ref = &provider.calls;
4268
4269        let model = test_model(provider.clone());
4270        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4271        agent.retry_policy = RetryPolicy {
4272            max_retries: 5, // generous, to confirm auth errors bypass retry entirely
4273            base_delay: std::time::Duration::from_millis(0),
4274            max_delay: std::time::Duration::from_secs(30),
4275            retry_on: vec![],
4276        };
4277        drop(handle);
4278
4279        let result = agent.run("Auth test".to_string()).await;
4280        assert!(result.is_err(), "should fail on auth error");
4281
4282        // The provider should have been called exactly once — no retries.
4283        assert_eq!(
4284            call_ref.load(std::sync::atomic::Ordering::SeqCst),
4285            1,
4286            "auth errors should not be retried"
4287        );
4288    }
4289}
4290
4291// ── Integration tests: full ReAct cycle with real tools ─────────────
4292
4293#[cfg(test)]
4294mod integration {
4295    use super::*;
4296    use std::path::PathBuf;
4297    use std::pin::Pin;
4298    use std::sync::Arc;
4299
4300    use async_trait::async_trait;
4301    use futures_core::Stream;
4302    use imp_llm::auth::{ApiKey, AuthStore};
4303    use imp_llm::model::{Capabilities, ModelMeta, ModelPricing};
4304    use imp_llm::provider::Provider;
4305    use tokio::sync::Mutex;
4306
4307    use crate::tools::{bash::BashTool, edit::EditTool, read::ReadTool, write::WriteTool};
4308
4309    // ── Shared test helpers (duplicated from unit tests to keep modules independent) ──
4310
4311    struct MockProvider {
4312        responses: Mutex<Vec<Vec<StreamEvent>>>,
4313    }
4314
4315    impl MockProvider {
4316        fn new(responses: Vec<Vec<StreamEvent>>) -> Self {
4317            Self {
4318                responses: Mutex::new(responses),
4319            }
4320        }
4321    }
4322
4323    #[async_trait]
4324    impl Provider for MockProvider {
4325        fn stream(
4326            &self,
4327            _model: &Model,
4328            _context: Context,
4329            _options: RequestOptions,
4330            _api_key: &str,
4331        ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
4332            let mut responses = self.responses.try_lock().expect("MockProvider lock");
4333            let events = if responses.is_empty() {
4334                vec![StreamEvent::Error {
4335                    error: "No more mock responses".to_string(),
4336                }]
4337            } else {
4338                responses.remove(0)
4339            };
4340            Box::pin(futures::stream::iter(events.into_iter().map(Ok)))
4341        }
4342
4343        async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4344            Ok("mock-key".to_string())
4345        }
4346
4347        fn id(&self) -> &str {
4348            "mock"
4349        }
4350
4351        fn models(&self) -> &[ModelMeta] {
4352            &[]
4353        }
4354    }
4355
4356    fn test_model(provider: Arc<dyn Provider>) -> Model {
4357        Model {
4358            meta: ModelMeta {
4359                id: "test-model".to_string(),
4360                provider: "mock".to_string(),
4361                name: "Test Model".to_string(),
4362                context_window: 200_000,
4363                max_output_tokens: 16_384,
4364                pricing: ModelPricing {
4365                    input_per_mtok: 3.0,
4366                    output_per_mtok: 15.0,
4367                    cache_read_per_mtok: 0.3,
4368                    cache_write_per_mtok: 3.75,
4369                },
4370                capabilities: Capabilities {
4371                    reasoning: true,
4372                    images: false,
4373                    tool_use: true,
4374                },
4375            },
4376            provider,
4377        }
4378    }
4379
4380    fn text_response(text: &str, input_tokens: u32, output_tokens: u32) -> Vec<StreamEvent> {
4381        vec![
4382            StreamEvent::MessageStart {
4383                model: "test-model".to_string(),
4384            },
4385            StreamEvent::TextDelta {
4386                text: text.to_string(),
4387            },
4388            StreamEvent::MessageEnd {
4389                message: AssistantMessage {
4390                    content: vec![ContentBlock::Text {
4391                        text: text.to_string(),
4392                    }],
4393                    usage: Some(Usage {
4394                        input_tokens,
4395                        output_tokens,
4396                        cache_read_tokens: 0,
4397                        cache_write_tokens: 0,
4398                    }),
4399                    stop_reason: LlmStopReason::EndTurn,
4400                    timestamp: 1000,
4401                },
4402            },
4403        ]
4404    }
4405
4406    fn tool_call_response(
4407        call_id: &str,
4408        tool_name: &str,
4409        args: serde_json::Value,
4410        input_tokens: u32,
4411        output_tokens: u32,
4412    ) -> Vec<StreamEvent> {
4413        vec![
4414            StreamEvent::MessageStart {
4415                model: "test-model".to_string(),
4416            },
4417            StreamEvent::ToolCall {
4418                id: call_id.to_string(),
4419                name: tool_name.to_string(),
4420                arguments: args.clone(),
4421            },
4422            StreamEvent::MessageEnd {
4423                message: AssistantMessage {
4424                    content: vec![ContentBlock::ToolCall {
4425                        id: call_id.to_string(),
4426                        name: tool_name.to_string(),
4427                        arguments: args,
4428                    }],
4429                    usage: Some(Usage {
4430                        input_tokens,
4431                        output_tokens,
4432                        cache_read_tokens: 0,
4433                        cache_write_tokens: 0,
4434                    }),
4435                    stop_reason: LlmStopReason::ToolUse,
4436                    timestamp: 1000,
4437                },
4438            },
4439        ]
4440    }
4441
4442    /// Create an agent pre-loaded with the reduced default tool set used by tests.
4443    fn create_agent_with_tools(provider: Arc<dyn Provider>, cwd: PathBuf) -> (Agent, AgentHandle) {
4444        let model = test_model(provider);
4445        let (mut agent, handle) = Agent::new(model, cwd);
4446        agent.tools.register(Arc::new(WriteTool));
4447        agent.tools.register(Arc::new(ReadTool));
4448        agent.tools.register(Arc::new(EditTool));
4449        agent.tools.register(Arc::new(BashTool));
4450        (agent, handle)
4451    }
4452
4453    /// Create an agent with reduced tools only (used for synthetic A/B tests).
4454    fn create_agent_with_reduced_tools(
4455        provider: Arc<dyn Provider>,
4456        cwd: PathBuf,
4457    ) -> (Agent, AgentHandle) {
4458        let model = test_model(provider);
4459        let (mut agent, handle) = Agent::new(model, cwd);
4460        agent.tools.register(Arc::new(WriteTool));
4461        agent.tools.register(Arc::new(ReadTool));
4462        agent.tools.register(Arc::new(EditTool));
4463        agent.tools.register(Arc::new(BashTool));
4464        (agent, handle)
4465    }
4466
4467    // ── Test 1: Write then read a file ─────────────────────────────
4468
4469    #[tokio::test]
4470    async fn agent_reads_and_writes_file() {
4471        let tmp = tempfile::tempdir().unwrap();
4472        let provider = Arc::new(MockProvider::new(vec![
4473            tool_call_response(
4474                "call_write",
4475                "write",
4476                serde_json::json!({"path": "test.txt", "content": "hello world"}),
4477                100,
4478                20,
4479            ),
4480            tool_call_response(
4481                "call_read",
4482                "read",
4483                serde_json::json!({"path": "test.txt"}),
4484                100,
4485                20,
4486            ),
4487            text_response("The file contains: hello world", 100, 20),
4488        ]));
4489
4490        let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4491        drop(handle);
4492
4493        agent
4494            .run("Write and read a file".to_string())
4495            .await
4496            .unwrap();
4497
4498        // File should exist on disk with correct content
4499        let on_disk = std::fs::read_to_string(tmp.path().join("test.txt")).unwrap();
4500        assert_eq!(on_disk, "hello world");
4501
4502        // Read tool result should contain the file content
4503        let read_result = agent
4504            .messages
4505            .iter()
4506            .find_map(|m| match m {
4507                Message::ToolResult(r) if r.tool_call_id == "call_read" => Some(r),
4508                _ => None,
4509            })
4510            .expect("should have a read tool result");
4511        let read_text = read_result
4512            .content
4513            .iter()
4514            .find_map(|b| match b {
4515                ContentBlock::Text { text } => Some(text.as_str()),
4516                _ => None,
4517            })
4518            .unwrap();
4519        assert!(
4520            read_text.contains("hello world"),
4521            "read result should contain file content, got: {read_text}"
4522        );
4523
4524        // 3 assistant messages = 3 turns (write, read, final text)
4525        let assistant_count = agent
4526            .messages
4527            .iter()
4528            .filter(|m| matches!(m, Message::Assistant(_)))
4529            .count();
4530        assert_eq!(assistant_count, 3);
4531    }
4532
4533    // ── Test 2: Edit tool modifies a file ──────────────────────────
4534
4535    #[tokio::test]
4536    async fn agent_edit_tool_modifies_file() {
4537        let tmp = tempfile::tempdir().unwrap();
4538        let provider = Arc::new(MockProvider::new(vec![
4539            tool_call_response(
4540                "call_write",
4541                "write",
4542                serde_json::json!({
4543                    "path": "src/main.rs",
4544                    "content": "fn main() {\n    println!(\"old\");\n}"
4545                }),
4546                100,
4547                20,
4548            ),
4549            tool_call_response(
4550                "call_edit",
4551                "edit",
4552                serde_json::json!({
4553                    "path": "src/main.rs",
4554                    "oldText": "old",
4555                    "newText": "new"
4556                }),
4557                100,
4558                20,
4559            ),
4560            tool_call_response(
4561                "call_read",
4562                "read",
4563                serde_json::json!({"path": "src/main.rs"}),
4564                100,
4565                20,
4566            ),
4567            text_response("Done", 100, 20),
4568        ]));
4569
4570        let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4571        drop(handle);
4572
4573        agent.run("Edit a file".to_string()).await.unwrap();
4574
4575        // File should contain "new" not "old"
4576        let on_disk = std::fs::read_to_string(tmp.path().join("src/main.rs")).unwrap();
4577        assert!(on_disk.contains("new"), "file should contain 'new'");
4578        assert!(!on_disk.contains("old"), "file should not contain 'old'");
4579
4580        // Edit tool result should include a diff
4581        let edit_result = agent
4582            .messages
4583            .iter()
4584            .find_map(|m| match m {
4585                Message::ToolResult(r) if r.tool_call_id == "call_edit" => Some(r),
4586                _ => None,
4587            })
4588            .expect("should have an edit tool result");
4589        let edit_text = edit_result
4590            .content
4591            .iter()
4592            .find_map(|b| match b {
4593                ContentBlock::Text { text } => Some(text.as_str()),
4594                _ => None,
4595            })
4596            .unwrap();
4597        assert!(
4598            edit_text.contains("---") || edit_text.contains("+++"),
4599            "edit result should include a diff, got: {edit_text}"
4600        );
4601    }
4602
4603    // ── Test 3: Bash search finds a pattern (synthetic A/B baseline) ──────
4604
4605    #[tokio::test]
4606    async fn agent_bash_search_finds_pattern() {
4607        let tmp = tempfile::tempdir().unwrap();
4608        std::fs::write(
4609            tmp.path().join("search_me.txt"),
4610            "line one\nunique_pattern_xyz here\nline three\n",
4611        )
4612        .unwrap();
4613        let provider = Arc::new(MockProvider::new(vec![
4614            tool_call_response(
4615                "call_bash",
4616                "bash",
4617                serde_json::json!({"command": "grep --no-color -rn 'unique_pattern_xyz' ."}),
4618                100,
4619                20,
4620            ),
4621            text_response("Found it!", 100, 20),
4622        ]));
4623
4624        let (mut agent, handle) =
4625            create_agent_with_reduced_tools(provider, tmp.path().to_path_buf());
4626        drop(handle);
4627
4628        agent.run("Search for a pattern".to_string()).await.unwrap();
4629
4630        let bash_result = agent
4631            .messages
4632            .iter()
4633            .find_map(|m| match m {
4634                Message::ToolResult(r) if r.tool_call_id == "call_bash" => Some(r),
4635                _ => None,
4636            })
4637            .expect("should have a bash tool result");
4638        let bash_text = bash_result
4639            .content
4640            .iter()
4641            .find_map(|b| match b {
4642                ContentBlock::Text { text } => Some(text.as_str()),
4643                _ => None,
4644            })
4645            .unwrap();
4646        assert!(
4647            !bash_text.trim().is_empty(),
4648            "bash grep output should not be empty"
4649        );
4650    }
4651
4652    // ── Test 3b: repeated identical tool calls warn and then block ────────
4653
4654    #[tokio::test]
4655    async fn agent_repeated_tool_calls_warn_then_block() {
4656        let tmp = tempfile::tempdir().unwrap();
4657        std::fs::write(tmp.path().join("repeat.txt"), "same content\n").unwrap();
4658
4659        let provider = Arc::new(MockProvider::new(vec![
4660            tool_call_response(
4661                "call_1",
4662                "read",
4663                serde_json::json!({"path": "repeat.txt"}),
4664                100,
4665                20,
4666            ),
4667            tool_call_response(
4668                "call_2",
4669                "read",
4670                serde_json::json!({"path": "repeat.txt"}),
4671                100,
4672                20,
4673            ),
4674            tool_call_response(
4675                "call_3",
4676                "read",
4677                serde_json::json!({"path": "repeat.txt"}),
4678                100,
4679                20,
4680            ),
4681            tool_call_response(
4682                "call_4",
4683                "read",
4684                serde_json::json!({"path": "repeat.txt"}),
4685                100,
4686                20,
4687            ),
4688            text_response("Done", 100, 20),
4689        ]));
4690
4691        let (mut agent, handle) =
4692            create_agent_with_reduced_tools(provider, tmp.path().to_path_buf());
4693        drop(handle);
4694
4695        agent
4696            .run("Read the same file repeatedly".to_string())
4697            .await
4698            .unwrap();
4699
4700        let third = agent
4701            .messages
4702            .iter()
4703            .find_map(|m| match m {
4704                Message::ToolResult(r) if r.tool_call_id == "call_3" => Some(r),
4705                _ => None,
4706            })
4707            .expect("third tool result");
4708        let fourth = agent
4709            .messages
4710            .iter()
4711            .find_map(|m| match m {
4712                Message::ToolResult(r) if r.tool_call_id == "call_4" => Some(r),
4713                _ => None,
4714            })
4715            .expect("fourth tool result");
4716
4717        let third_text = third
4718            .content
4719            .iter()
4720            .filter_map(|b| match b {
4721                ContentBlock::Text { text } => Some(text.as_str()),
4722                _ => None,
4723            })
4724            .collect::<Vec<_>>()
4725            .join("\n");
4726        let fourth_text = fourth
4727            .content
4728            .iter()
4729            .filter_map(|b| match b {
4730                ContentBlock::Text { text } => Some(text.as_str()),
4731                _ => None,
4732            })
4733            .collect::<Vec<_>>()
4734            .join("\n");
4735
4736        assert!(third_text.contains("Warning: identical tool call repeated 3 times"));
4737        assert!(fourth.is_error);
4738        assert!(fourth_text.contains("Blocked: identical tool call repeated 4 times"));
4739        assert_eq!(
4740            agent
4741                .messages
4742                .iter()
4743                .filter(|message| matches!(message, Message::User(_)))
4744                .count(),
4745            1,
4746            "agent should stop after repeated-action block rather than enqueueing more follow-ups"
4747        );
4748    }
4749
4750    #[test]
4751    fn tool_results_indicate_repeated_action_detects_blocked_repeat_message() {
4752        let result = imp_llm::ToolResultMessage {
4753            tool_call_id: "call_repeat".to_string(),
4754            tool_name: "read".to_string(),
4755            content: vec![ContentBlock::Text {
4756                text: "Blocked: identical tool call repeated 4 times in a row for 'read'."
4757                    .to_string(),
4758            }],
4759            is_error: true,
4760            details: serde_json::Value::Null,
4761            timestamp: 0,
4762        };
4763
4764        assert!(tool_results_indicate_repeated_action(&[result]));
4765    }
4766
4767    // ── Test 4: Bash runs a command ────────────────────────────────
4768
4769    #[tokio::test]
4770    async fn agent_bash_runs_command() {
4771        let tmp = tempfile::tempdir().unwrap();
4772        let provider = Arc::new(MockProvider::new(vec![
4773            tool_call_response(
4774                "call_bash",
4775                "bash",
4776                serde_json::json!({"command": "echo hello && echo world"}),
4777                100,
4778                20,
4779            ),
4780            text_response("Done", 100, 20),
4781        ]));
4782
4783        let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4784        drop(handle);
4785
4786        agent.run("Run a command".to_string()).await.unwrap();
4787
4788        // Bash result should contain the command output
4789        let bash_result = agent
4790            .messages
4791            .iter()
4792            .find_map(|m| match m {
4793                Message::ToolResult(r) if r.tool_call_id == "call_bash" => Some(r),
4794                _ => None,
4795            })
4796            .expect("should have a bash tool result");
4797        let bash_text = bash_result
4798            .content
4799            .iter()
4800            .find_map(|b| match b {
4801                ContentBlock::Text { text } => Some(text.as_str()),
4802                _ => None,
4803            })
4804            .unwrap();
4805        assert!(
4806            bash_text.contains("hello"),
4807            "bash output should contain 'hello', got: {bash_text}"
4808        );
4809        assert!(
4810            bash_text.contains("world"),
4811            "bash output should contain 'world', got: {bash_text}"
4812        );
4813
4814        // Details should include exit_code: 0
4815        assert_eq!(bash_result.details["exit_code"], 0);
4816    }
4817
4818    // ── Test 5: Tool error → agent self-corrects ───────────────────
4819
4820    #[tokio::test]
4821    async fn agent_handles_tool_error_gracefully() {
4822        let tmp = tempfile::tempdir().unwrap();
4823        let provider = Arc::new(MockProvider::new(vec![
4824            tool_call_response(
4825                "call_read",
4826                "read",
4827                serde_json::json!({"path": "nonexistent.txt"}),
4828                100,
4829                20,
4830            ),
4831            text_response("File not found, let me try something else", 100, 20),
4832        ]));
4833
4834        let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4835        drop(handle);
4836
4837        agent.run("Read a file".to_string()).await.unwrap();
4838
4839        // Read tool result should have is_error=true
4840        let read_result = agent
4841            .messages
4842            .iter()
4843            .find_map(|m| match m {
4844                Message::ToolResult(r) if r.tool_call_id == "call_read" => Some(r),
4845                _ => None,
4846            })
4847            .expect("should have a read tool result");
4848        assert!(
4849            read_result.is_error,
4850            "reading nonexistent file should produce an error result"
4851        );
4852
4853        // Agent should continue to turn 1 and self-correct with text
4854        let assistant_count = agent
4855            .messages
4856            .iter()
4857            .filter(|m| matches!(m, Message::Assistant(_)))
4858            .count();
4859        assert_eq!(
4860            assistant_count, 2,
4861            "agent should have 2 turns: error + recovery"
4862        );
4863
4864        // Agent completed successfully (no Err return)
4865    }
4866}
4867
4868// ── Mode enforcement tests ─────────────────────────────────────────
4869
4870#[cfg(test)]
4871mod mode_tests {
4872    use super::*;
4873    use std::path::PathBuf;
4874    use std::pin::Pin;
4875    use std::sync::Arc;
4876
4877    use async_trait::async_trait;
4878    use futures_core::Stream;
4879    use imp_llm::auth::{ApiKey, AuthStore};
4880    use imp_llm::model::ModelMeta;
4881    use imp_llm::provider::Provider;
4882    use tokio::sync::Mutex;
4883
4884    // ── Mock provider (same shape as in tests) ─────────────────────
4885
4886    struct MockProvider {
4887        responses: Mutex<Vec<Vec<imp_llm::StreamEvent>>>,
4888    }
4889
4890    impl MockProvider {
4891        fn new(responses: Vec<Vec<imp_llm::StreamEvent>>) -> Self {
4892            Self {
4893                responses: Mutex::new(responses),
4894            }
4895        }
4896    }
4897
4898    #[async_trait]
4899    impl Provider for MockProvider {
4900        fn stream(
4901            &self,
4902            _model: &imp_llm::Model,
4903            _context: imp_llm::Context,
4904            _options: imp_llm::RequestOptions,
4905            _api_key: &str,
4906        ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<imp_llm::StreamEvent>> + Send>> {
4907            let mut responses = self.responses.try_lock().expect("MockProvider lock");
4908            let events = if responses.is_empty() {
4909                vec![imp_llm::StreamEvent::Error {
4910                    error: "No more mock responses".to_string(),
4911                }]
4912            } else {
4913                responses.remove(0)
4914            };
4915            Box::pin(futures::stream::iter(events.into_iter().map(Ok)))
4916        }
4917
4918        async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4919            Ok("mock-key".to_string())
4920        }
4921
4922        fn id(&self) -> &str {
4923            "mock"
4924        }
4925
4926        fn models(&self) -> &[imp_llm::model::ModelMeta] {
4927            &[]
4928        }
4929    }
4930
4931    fn test_model(provider: Arc<dyn Provider>) -> imp_llm::Model {
4932        imp_llm::Model {
4933            meta: ModelMeta {
4934                id: "test-model".to_string(),
4935                provider: "mock".to_string(),
4936                name: "Test Model".to_string(),
4937                context_window: 200_000,
4938                max_output_tokens: 16_384,
4939                pricing: imp_llm::model::ModelPricing {
4940                    input_per_mtok: 3.0,
4941                    output_per_mtok: 15.0,
4942                    cache_read_per_mtok: 0.3,
4943                    cache_write_per_mtok: 3.75,
4944                },
4945                capabilities: imp_llm::model::Capabilities {
4946                    reasoning: true,
4947                    images: false,
4948                    tool_use: true,
4949                },
4950            },
4951            provider,
4952        }
4953    }
4954
4955    fn text_response(text: &str, input: u32, output: u32) -> Vec<imp_llm::StreamEvent> {
4956        vec![
4957            imp_llm::StreamEvent::MessageStart {
4958                model: "test-model".to_string(),
4959            },
4960            imp_llm::StreamEvent::TextDelta {
4961                text: text.to_string(),
4962            },
4963            imp_llm::StreamEvent::MessageEnd {
4964                message: imp_llm::AssistantMessage {
4965                    content: vec![imp_llm::ContentBlock::Text {
4966                        text: text.to_string(),
4967                    }],
4968                    usage: Some(imp_llm::Usage {
4969                        input_tokens: input,
4970                        output_tokens: output,
4971                        cache_read_tokens: 0,
4972                        cache_write_tokens: 0,
4973                    }),
4974                    stop_reason: imp_llm::StopReason::EndTurn,
4975                    timestamp: 1000,
4976                },
4977            },
4978        ]
4979    }
4980
4981    fn tool_call_response(
4982        call_id: &str,
4983        tool_name: &str,
4984        args: serde_json::Value,
4985        input: u32,
4986        output: u32,
4987    ) -> Vec<imp_llm::StreamEvent> {
4988        vec![
4989            imp_llm::StreamEvent::MessageStart {
4990                model: "test-model".to_string(),
4991            },
4992            imp_llm::StreamEvent::ToolCall {
4993                id: call_id.to_string(),
4994                name: tool_name.to_string(),
4995                arguments: args.clone(),
4996            },
4997            imp_llm::StreamEvent::MessageEnd {
4998                message: imp_llm::AssistantMessage {
4999                    content: vec![imp_llm::ContentBlock::ToolCall {
5000                        id: call_id.to_string(),
5001                        name: tool_name.to_string(),
5002                        arguments: args,
5003                    }],
5004                    usage: Some(imp_llm::Usage {
5005                        input_tokens: input,
5006                        output_tokens: output,
5007                        cache_read_tokens: 0,
5008                        cache_write_tokens: 0,
5009                    }),
5010                    stop_reason: imp_llm::StopReason::ToolUse,
5011                    timestamp: 1000,
5012                },
5013            },
5014        ]
5015    }
5016
5017    async fn collect_events(mut handle: AgentHandle) -> Vec<AgentEvent> {
5018        let mut events = Vec::new();
5019        while let Some(event) = handle.event_rx.recv().await {
5020            events.push(event);
5021        }
5022        events
5023    }
5024
5025    // ── Tool fixtures ───────────────────────────────────────────────
5026
5027    struct EchoTool;
5028
5029    #[async_trait]
5030    impl crate::tools::Tool for EchoTool {
5031        fn name(&self) -> &str {
5032            "echo"
5033        }
5034        fn label(&self) -> &str {
5035            "Echo"
5036        }
5037        fn description(&self) -> &str {
5038            "Echoes back the input"
5039        }
5040        fn parameters(&self) -> serde_json::Value {
5041            serde_json::json!({
5042                "type": "object",
5043                "properties": { "text": { "type": "string" } },
5044                "required": ["text"]
5045            })
5046        }
5047        fn is_readonly(&self) -> bool {
5048            true
5049        }
5050        async fn execute(
5051            &self,
5052            _call_id: &str,
5053            params: serde_json::Value,
5054            _ctx: crate::tools::ToolContext,
5055        ) -> crate::error::Result<crate::tools::ToolOutput> {
5056            let text = params["text"].as_str().unwrap_or("no text");
5057            Ok(crate::tools::ToolOutput::text(format!("echo: {text}")))
5058        }
5059    }
5060
5061    struct NamedWriteTool(&'static str);
5062
5063    #[async_trait]
5064    impl crate::tools::Tool for NamedWriteTool {
5065        fn name(&self) -> &str {
5066            self.0
5067        }
5068        fn label(&self) -> &str {
5069            self.0
5070        }
5071        fn description(&self) -> &str {
5072            "A write tool"
5073        }
5074        fn parameters(&self) -> serde_json::Value {
5075            serde_json::json!({"type": "object", "properties": {"data": {"type": "string"}}})
5076        }
5077        fn is_readonly(&self) -> bool {
5078            false
5079        }
5080        async fn execute(
5081            &self,
5082            _call_id: &str,
5083            _params: serde_json::Value,
5084            _ctx: crate::tools::ToolContext,
5085        ) -> crate::error::Result<crate::tools::ToolOutput> {
5086            Ok(crate::tools::ToolOutput::text("written"))
5087        }
5088    }
5089
5090    fn single_text_model(text: &str) -> Arc<MockProvider> {
5091        Arc::new(MockProvider::new(vec![text_response(text, 50, 10)]))
5092    }
5093
5094    /// Test: Full mode registers all tools (no filtering).
5095    #[tokio::test]
5096    async fn agent_mode_enforcement_full_registers_all_tools() {
5097        use crate::config::AgentMode;
5098
5099        let provider = single_text_model("ok");
5100        let model = test_model(provider);
5101        let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
5102        agent.mode = AgentMode::Full;
5103
5104        // Register a mix of tools
5105        agent.tools.register(Arc::new(EchoTool)); // "echo" - not in any allow-list
5106        agent.tools.register(Arc::new(NamedWriteTool("write")));
5107
5108        // Full mode allows everything — both tools should be present
5109        assert!(
5110            agent.tools.get("echo").is_some(),
5111            "echo should be registered"
5112        );
5113        assert!(
5114            agent.tools.get("write").is_some(),
5115            "write should be registered"
5116        );
5117        assert!(agent.mode.allows_tool("echo"));
5118        assert!(agent.mode.allows_tool("write"));
5119        assert!(agent.mode.allows_tool("any_future_tool"));
5120    }
5121
5122    /// Test: Orchestrator mode excludes write-category tools at registration time.
5123    #[test]
5124    fn agent_mode_enforcement_orchestrator_excludes_write_tools() {
5125        use crate::config::AgentMode;
5126        use crate::tools::ToolRegistry;
5127
5128        let mut registry = ToolRegistry::new();
5129        registry.register(Arc::new(EchoTool)); // "echo" — not in orchestrator allow-list
5130        registry.register(Arc::new(NamedWriteTool("write")));
5131        registry.register(Arc::new(NamedWriteTool("edit")));
5132        registry.register(Arc::new(NamedWriteTool("bash")));
5133
5134        // Apply the mode filter exactly as AgentBuilder would
5135        let mode = AgentMode::Orchestrator;
5136        registry.retain(|name| mode.allows_tool(name));
5137
5138        // Write-category tools must be absent
5139        assert!(
5140            registry.get("write").is_none(),
5141            "write must be filtered out"
5142        );
5143        assert!(registry.get("edit").is_none(), "edit must be filtered out");
5144        assert!(registry.get("bash").is_none(), "bash must be filtered out");
5145        // echo is not in any mode allow-list either
5146        assert!(registry.get("echo").is_none(), "echo must be filtered out");
5147    }
5148
5149    /// Test: Execution-time guard blocks a disallowed tool call and returns an error result.
5150    #[tokio::test]
5151    async fn agent_mode_enforcement_guard_blocks_disallowed() {
5152        use crate::config::AgentMode;
5153
5154        let provider = Arc::new(MockProvider::new(vec![
5155            // Turn 0: model calls "write" — disallowed in orchestrator mode
5156            tool_call_response(
5157                "call_1",
5158                "write",
5159                serde_json::json!({"data": "content"}),
5160                50,
5161                10,
5162            ),
5163            // Turn 1: model responds after seeing the error
5164            text_response("Understood, I cannot write directly.", 50, 10),
5165        ]));
5166
5167        let model = test_model(provider);
5168        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
5169        agent.mode = AgentMode::Orchestrator;
5170        // Register write so it passes schema validation — the mode guard fires first
5171        agent.tools.register(Arc::new(NamedWriteTool("write")));
5172
5173        let events_task = tokio::spawn(collect_events(handle));
5174        agent.run("Write something".to_string()).await.unwrap();
5175        drop(agent);
5176
5177        let events = events_task.await.unwrap();
5178
5179        // The tool execution end event should carry an error result
5180        let tool_end = events
5181            .iter()
5182            .find(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }));
5183        assert!(tool_end.is_some(), "should have a ToolExecutionEnd event");
5184
5185        if let Some(AgentEvent::ToolExecutionEnd { result, .. }) = tool_end {
5186            assert!(result.is_error, "mode guard should produce an error result");
5187            let text = result.content.iter().find_map(|c| {
5188                if let ContentBlock::Text { text } = c {
5189                    Some(text.as_str())
5190                } else {
5191                    None
5192                }
5193            });
5194            let text = text.expect("error result should have text");
5195            assert!(
5196                text.contains("write") && text.contains("mode"),
5197                "error should name the tool and mention mode, got: {text}"
5198            );
5199        }
5200    }
5201
5202    /// Test: Execution-time guard allows a permitted tool call through cleanly.
5203    #[tokio::test]
5204    async fn agent_mode_enforcement_guard_allows_permitted() {
5205        use crate::config::AgentMode;
5206
5207        let provider = Arc::new(MockProvider::new(vec![
5208            // Turn 0: model calls "read" — allowed in orchestrator mode
5209            tool_call_response(
5210                "call_1",
5211                "echo",
5212                serde_json::json!({"text": "hello"}),
5213                50,
5214                10,
5215            ),
5216            text_response("Echo succeeded", 50, 10),
5217        ]));
5218
5219        let model = test_model(provider);
5220        let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
5221        // Full mode keeps custom tools available
5222        agent.mode = AgentMode::Full;
5223        agent.tools.register(Arc::new(EchoTool));
5224
5225        let events_task = tokio::spawn(collect_events(handle));
5226        agent.run("Echo something".to_string()).await.unwrap();
5227        drop(agent);
5228
5229        let events = events_task.await.unwrap();
5230
5231        // Tool should have succeeded (not an error)
5232        let tool_end = events
5233            .iter()
5234            .find(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }));
5235        assert!(tool_end.is_some());
5236
5237        if let Some(AgentEvent::ToolExecutionEnd { result, .. }) = tool_end {
5238            assert!(!result.is_error, "permitted tool should succeed");
5239        }
5240    }
5241
5242    /// Test: System prompt filters tool descriptions by mode.
5243    #[test]
5244    fn agent_mode_enforcement_system_prompt_filters() {
5245        use crate::config::AgentMode;
5246        use crate::system_prompt::{assemble, AssembleParams};
5247        use crate::tools::ToolRegistry;
5248
5249        let mut registry = ToolRegistry::new();
5250        registry.register(Arc::new(NamedWriteTool("write")));
5251        registry.register(Arc::new(NamedWriteTool("edit")));
5252        registry.register(Arc::new(NamedWriteTool("bash")));
5253
5254        // Provide read-category tools too
5255        struct ReadTool;
5256        #[async_trait]
5257        impl crate::tools::Tool for ReadTool {
5258            fn name(&self) -> &str {
5259                "read"
5260            }
5261            fn label(&self) -> &str {
5262                "Read"
5263            }
5264            fn description(&self) -> &str {
5265                "Read a file"
5266            }
5267            fn parameters(&self) -> serde_json::Value {
5268                serde_json::json!({"type": "object"})
5269            }
5270            fn is_readonly(&self) -> bool {
5271                true
5272            }
5273            async fn execute(
5274                &self,
5275                _: &str,
5276                _: serde_json::Value,
5277                _: crate::tools::ToolContext,
5278            ) -> crate::error::Result<crate::tools::ToolOutput> {
5279                Ok(crate::tools::ToolOutput::text(""))
5280            }
5281        }
5282        registry.register(Arc::new(ReadTool));
5283
5284        let mode = AgentMode::Orchestrator;
5285        let result = assemble(&AssembleParams {
5286            tools: &registry,
5287            agents_md: &[],
5288            skills: &[],
5289            facts: &[],
5290            project_memory_status: None,
5291            personality: None,
5292            soul: None,
5293            task: None,
5294            role: None,
5295            mode: &mode,
5296            memory: None,
5297            user_profile: None,
5298            cwd: None,
5299            learning_enabled: false,
5300            guardrail_profile: None,
5301        });
5302
5303        // Orchestrator allows "read" — should appear in system prompt
5304        assert!(
5305            result.text.contains("- read:"),
5306            "read should be in orchestrator prompt"
5307        );
5308
5309        // Write tools must be absent from the system prompt
5310        assert!(
5311            !result.text.contains("- write:"),
5312            "write must not appear in orchestrator prompt"
5313        );
5314        assert!(
5315            !result.text.contains("- edit:"),
5316            "edit must not appear in orchestrator prompt"
5317        );
5318        assert!(
5319            !result.text.contains("- bash:"),
5320            "bash must not appear in orchestrator prompt"
5321        );
5322    }
5323
5324    /// Test: System prompt includes mode instructions for non-Full modes.
5325    #[test]
5326    fn agent_mode_enforcement_system_prompt_instructions() {
5327        use crate::config::AgentMode;
5328        use crate::system_prompt::{assemble, AssembleParams};
5329        use crate::tools::ToolRegistry;
5330
5331        let registry = ToolRegistry::new();
5332
5333        // Full mode — no extra instructions
5334        let full_result = assemble(&AssembleParams {
5335            tools: &registry,
5336            agents_md: &[],
5337            skills: &[],
5338            facts: &[],
5339            project_memory_status: None,
5340            personality: None,
5341            soul: None,
5342            task: None,
5343            role: None,
5344            mode: &AgentMode::Full,
5345            memory: None,
5346            user_profile: None,
5347            cwd: None,
5348            learning_enabled: false,
5349            guardrail_profile: None,
5350        });
5351        // Full mode has no instructions
5352        assert!(
5353            !full_result.text.contains("orchestrator"),
5354            "Full mode should not mention orchestrator"
5355        );
5356        assert!(
5357            !full_result.text.contains("You are a worker agent."),
5358            "Full mode should not include worker mode instructions"
5359        );
5360
5361        // Orchestrator mode — should include mode instructions
5362        let orch_result = assemble(&AssembleParams {
5363            tools: &registry,
5364            agents_md: &[],
5365            skills: &[],
5366            facts: &[],
5367            project_memory_status: None,
5368            personality: None,
5369            soul: None,
5370            task: None,
5371            role: None,
5372            mode: &AgentMode::Orchestrator,
5373            memory: None,
5374            user_profile: None,
5375            cwd: None,
5376            learning_enabled: false,
5377            guardrail_profile: None,
5378        });
5379        assert!(
5380            orch_result.text.contains("orchestrator"),
5381            "orchestrator prompt should contain mode instructions, got: {}",
5382            orch_result.text
5383        );
5384
5385        // Worker mode — should include mode instructions
5386        let worker_result = assemble(&AssembleParams {
5387            tools: &registry,
5388            agents_md: &[],
5389            skills: &[],
5390            facts: &[],
5391            project_memory_status: None,
5392            personality: None,
5393            soul: None,
5394            task: None,
5395            role: None,
5396            mode: &AgentMode::Worker,
5397            memory: None,
5398            user_profile: None,
5399            cwd: None,
5400            learning_enabled: false,
5401            guardrail_profile: None,
5402        });
5403        assert!(
5404            worker_result.text.contains("worker"),
5405            "worker prompt should contain mode instructions"
5406        );
5407
5408        // Reviewer mode — should include mode instructions
5409        let reviewer_result = assemble(&AssembleParams {
5410            tools: &registry,
5411            agents_md: &[],
5412            skills: &[],
5413            facts: &[],
5414            project_memory_status: None,
5415            personality: None,
5416            soul: None,
5417            task: None,
5418            role: None,
5419            mode: &AgentMode::Reviewer,
5420            memory: None,
5421            user_profile: None,
5422            cwd: None,
5423            learning_enabled: false,
5424            guardrail_profile: None,
5425        });
5426        assert!(
5427            reviewer_result.text.contains("reviewer") || reviewer_result.text.contains("read"),
5428            "reviewer prompt should contain mode instructions"
5429        );
5430    }
5431}
imp_core/agent/mod.rs

imp_core/agent/
mod.rs