1use std::path::{Path, PathBuf};
2use std::sync::{Arc, Mutex};
3use std::time::Instant;
4
5use imp_llm::{
6 AssistantMessage, ContentBlock, Message, Model, StopReason as LlmStopReason, ThinkingLevel,
7 Usage,
8};
9#[cfg(test)]
10use imp_llm::{Context, RequestOptions, StreamEvent};
11use tokio::sync::mpsc;
12
13use imp_llm::provider::RetryPolicy;
14
15use crate::config::{AgentMode, Config, ContextConfig, ContinuePolicy};
16use crate::guardrails::{GuardrailConfig, GuardrailProfile};
17use crate::hooks::{HookBackgroundEvent, HookEvent, HookRunner};
18use crate::mana_review::{
19 ManaMutationAction, ManaMutationRecord, ManaReviewScope, ManaReviewScopeKind, ManaReviewState,
20 ManaUnitSnapshot, TurnManaReview, TurnManaReviewAccumulator,
21};
22use crate::policy::RunPolicy;
23use crate::roles::Role;
24use crate::tools::{LuaToolLoader, ToolRegistry};
25use crate::trace::TraceWriter;
26use crate::workflow::WorkflowContract;
27
28mod events;
29mod loop_policy;
30mod loop_state;
31mod mana_loop;
32#[cfg(not(test))]
33pub(crate) use mana_loop::ManaPolicyDecision;
34#[cfg(test)]
35pub(crate) use mana_loop::{evaluate_mana_policy, ManaPolicyDecision};
36mod recovery;
37mod run_loop;
38mod tool_execution;
39
40pub use events::{
41 AgentEvent, RecoveryCheckpoint, RecoveryCheckpointKind, TimingEvent, TimingStage,
42};
43pub use loop_state::{
44 ContinueReason, LoopDecision, PlannedToolCall, RunFinalStatus, StopReason, ToolExecutionMode,
45 ToolPlan, ToolRisk, TurnPhase, TurnState,
46};
47pub use recovery::{
48 IncompleteToolRecovery, IncompleteToolState, RecoveryLedger, RecoveryReconciliation,
49};
50
51#[derive(Debug, Clone)]
53pub enum AgentCommand {
54 Cancel,
55 Steer(String),
56 FollowUp(String),
57}
58
59mod turn_assessment;
60
61use turn_assessment::{
62 ContinueRecommendation, ManaEvidence, PostTurnAssessment, RuntimeEvidence, TextFallbackEvidence,
63};
64pub use turn_assessment::{NextActionAssessment, NextActionDebugView};
65
66pub struct Agent {
68 pub model: Model,
69 pub thinking_level: ThinkingLevel,
70 pub tools: ToolRegistry,
71 pub messages: Vec<Message>,
72 pub system_prompt: String,
73 pub cwd: PathBuf,
74 pub max_tokens: Option<u32>,
75 pub role: Option<Role>,
76 pub hooks: HookRunner,
77 pub api_key: String,
78 pub auth_store: Option<std::sync::Arc<tokio::sync::Mutex<imp_llm::auth::AuthStore>>>,
81 pub ui: Arc<dyn crate::ui::UserInterface>,
82 pub context_config: ContextConfig,
84 pub retry_policy: RetryPolicy,
86 pub mode: AgentMode,
88 pub has_mana_skill: bool,
90 pub has_mana_basics_skill: bool,
92 pub has_mana_delegation_skill: bool,
94 pub guardrail_config: GuardrailConfig,
96 pub guardrail_profile: Option<GuardrailProfile>,
98 pub lua_tool_loader: Option<LuaToolLoader>,
100 pub file_cache: Arc<crate::tools::FileCache>,
102 pub checkpoint_state: Arc<crate::tools::CheckpointState>,
104 pub file_tracker: Arc<std::sync::Mutex<crate::tools::FileTracker>>,
106 pub anchor_store: Arc<crate::tools::AnchorStore>,
108 pub read_max_lines: usize,
110 pub cache_options: imp_llm::CacheOptions,
112 pub recovery_ledger: Arc<std::sync::Mutex<RecoveryLedger>>,
114 last_tool_call: std::sync::Arc<std::sync::Mutex<Option<RepeatedToolCallState>>>,
116 queued_mana_externalization_nudge: bool,
118 pub continue_policy: ContinuePolicy,
120 queued_confidence_continue_nudge: bool,
122 queued_execution_debt_follow_up: bool,
124 turn_mana_review: Arc<std::sync::Mutex<TurnManaReviewAccumulator>>,
126 pub config: Arc<Config>,
128 pub run_policy: RunPolicy,
130 pub verification_gates: Vec<crate::workflow::VerificationGate>,
132
133 pub workflow_contract: WorkflowContract,
135 trace_writer: Arc<Mutex<Option<TraceWriter>>>,
137 run_id: Arc<Mutex<Option<String>>>,
139
140 event_tx: mpsc::Sender<AgentEvent>,
141 command_tx: mpsc::Sender<AgentCommand>,
142 command_rx: mpsc::Receiver<AgentCommand>,
143 cancel_token: Arc<std::sync::atomic::AtomicBool>,
144}
145
146pub struct AgentHandle {
148 pub event_rx: mpsc::Receiver<AgentEvent>,
149 pub command_tx: mpsc::Sender<AgentCommand>,
150 pub cancel_token: Arc<std::sync::atomic::AtomicBool>,
151}
152
153#[derive(Debug, Clone)]
154struct RepeatedToolCallState {
155 tool_name: String,
156 args_json: String,
157 consecutive: usize,
158}
159
160#[derive(Debug, Clone)]
161enum RepeatedToolCallCheck {
162 Ok,
163 Warn(String),
164 Block(imp_llm::ToolResultMessage),
165}
166
167impl Agent {
168 pub fn new(model: Model, cwd: PathBuf) -> (Self, AgentHandle) {
169 let (event_tx, event_rx) = mpsc::channel(256);
170 let (command_tx, command_rx) = mpsc::channel(32);
171 let cancel_token = Arc::new(std::sync::atomic::AtomicBool::new(false));
172 let mut hooks = HookRunner::new();
173 let background_event_tx = event_tx.clone();
174 hooks.set_background_reporter(Arc::new(move |event: HookBackgroundEvent| {
175 let background_event_tx = background_event_tx.clone();
176 tokio::spawn(async move {
177 let _ = background_event_tx
178 .send(AgentEvent::Warning {
179 message: event.to_string(),
180 })
181 .await;
182 });
183 }));
184
185 let agent = Self {
186 model,
187 thinking_level: ThinkingLevel::Medium,
188 tools: ToolRegistry::new(),
189 messages: Vec::new(),
190 system_prompt: String::new(),
191 cwd: cwd.clone(),
192 max_tokens: None,
193 role: None,
194 hooks,
195 api_key: String::new(),
196 ui: Arc::new(crate::ui::NullInterface),
197 context_config: ContextConfig::default(),
198 retry_policy: RetryPolicy::default(),
199 mode: AgentMode::Full,
200 has_mana_skill: false,
201 has_mana_basics_skill: false,
202 has_mana_delegation_skill: false,
203 guardrail_config: GuardrailConfig::default(),
204 guardrail_profile: None,
205 file_cache: Arc::new(crate::tools::FileCache::new()),
206 checkpoint_state: Arc::new(crate::tools::CheckpointState::new()),
207 file_tracker: Arc::new(std::sync::Mutex::new(crate::tools::FileTracker::new())),
208 anchor_store: Arc::new(crate::tools::AnchorStore::new()),
209 read_max_lines: 500,
210 auth_store: None,
211 cache_options: imp_llm::CacheOptions {
212 cache_system_prompt: true,
213 cache_tools: true,
214 cache_recent_turns: 2,
215 extended_ttl: false,
216 global_scope: false,
217 },
218 recovery_ledger: Arc::new(std::sync::Mutex::new(RecoveryLedger::new())),
219 last_tool_call: Arc::new(std::sync::Mutex::new(None)),
220 queued_mana_externalization_nudge: false,
221 continue_policy: ContinuePolicy::Disabled,
222 queued_confidence_continue_nudge: false,
223 queued_execution_debt_follow_up: false,
224 turn_mana_review: Arc::new(std::sync::Mutex::new(TurnManaReviewAccumulator::default())),
225 config: Arc::new(Config::default()),
226 run_policy: RunPolicy::default(),
227 workflow_contract: WorkflowContract::implicit_from(
228 crate::workflow::ImplicitWorkflowContractInput::prompt("").cwd(&cwd),
229 ),
230 verification_gates: Vec::new(),
231 trace_writer: Arc::new(Mutex::new(None)),
232 run_id: Arc::new(Mutex::new(None)),
233 lua_tool_loader: None,
234
235 event_tx,
236 command_tx: command_tx.clone(),
237 command_rx,
238 cancel_token: Arc::clone(&cancel_token),
239 };
240
241 let handle = AgentHandle {
242 event_rx,
243 command_tx,
244 cancel_token,
245 };
246
247 (agent, handle)
248 }
249
250 fn assess_post_turn(
251 &self,
252 message: &AssistantMessage,
253 tool_results: &[imp_llm::ToolResultMessage],
254 _used_tools: bool,
255 mana_review: &TurnManaReview,
256 ) -> PostTurnAssessment {
257 let repeated_action = tool_results_indicate_repeated_action(tool_results);
258 let runtime_execution_stop_reason =
259 tool_results_indicate_execution_blocker(tool_results, self.mode);
260 let work_completed = tool_results_indicate_work_completed(tool_results, self.mode);
261 let execution_debt = tool_results_indicate_execution_debt(tool_results, self.mode);
262 let execution_evidence = tool_results_indicate_execution_evidence(tool_results, self.mode);
263 let planning_only_progress = execution_debt && !execution_evidence;
264 let mana_stop_reason = mana_review_stop_reason(mana_review, self.mode);
265 let planner_text_stop_reason = planner_stop_reason(message, self.mode);
266 let execution_text_stop_reason = execution_stop_reason(message, self.mode);
267
268 let continue_recommendation = if should_queue_mana_externalization_follow_up(
269 message,
270 self.mode,
271 self.has_mana_skill,
272 self.queued_mana_externalization_nudge,
273 ) {
274 Some(ContinueRecommendation {
275 prompt: mana_externalization_follow_up_text().to_string(),
276 reason: ContinueReason::ExternalizationNeeded,
277 })
278 } else if !matches!(self.mode, AgentMode::Planner)
279 && should_queue_execution_debt_follow_up(
280 execution_debt,
281 execution_evidence,
282 self.queued_execution_debt_follow_up,
283 !assistant_message_text(message).trim().is_empty(),
284 )
285 {
286 Some(ContinueRecommendation {
287 prompt: execution_debt_follow_up_text().to_string(),
288 reason: ContinueReason::ExecutionDebt,
289 })
290 } else if should_queue_confidence_continue_follow_up(
291 message,
292 self.mode,
293 self.continue_policy,
294 self.queued_confidence_continue_nudge,
295 ) {
296 Some(ContinueRecommendation {
297 prompt: confidence_continue_follow_up_text().to_string(),
298 reason: ContinueReason::HighConfidenceVisibleNextStep,
299 })
300 } else {
301 None
302 };
303
304 PostTurnAssessment {
305 runtime: RuntimeEvidence {
306 repeated_action,
307 execution_stop_reason: runtime_execution_stop_reason,
308 work_completed,
309 execution_debt,
310 execution_evidence,
311 planning_only_progress,
312 },
313 mana: ManaEvidence {
314 stop_reason: mana_stop_reason,
315 },
316 text_fallback: TextFallbackEvidence {
317 planner_stop_reason: planner_text_stop_reason,
318 execution_stop_reason: execution_text_stop_reason,
319 },
320 continue_recommendation,
321 }
322 }
323
324 fn mark_continue_reason(&mut self, reason: ContinueReason) {
325 match reason {
326 ContinueReason::ExternalizationNeeded => {
327 self.queued_mana_externalization_nudge = true;
328 }
329 ContinueReason::HighConfidenceVisibleNextStep => {
330 self.queued_confidence_continue_nudge = true;
331 }
332 ContinueReason::ExecutionDebt => {
333 self.queued_execution_debt_follow_up = true;
334 }
335 ContinueReason::ToolResultsNeedInterpretation
336 | ContinueReason::QueuedUserFollowUp
337 | ContinueReason::RecoveryContinuation => {}
338 }
339 }
340
341 pub(crate) async fn emit(&self, event: AgentEvent) {
342 match &event {
344 AgentEvent::AgentEnd { .. } => {
345 self.hooks
346 .fire(&HookEvent::OnAgentEnd {
347 messages: &self.messages,
348 })
349 .await;
350 }
351 AgentEvent::TurnEnd { index, message, .. } => {
352 self.hooks
353 .fire(&HookEvent::OnTurnEnd {
354 index: *index,
355 message,
356 })
357 .await;
358 }
359 _ => {}
360 }
361 self.write_trace_event(&event);
362 let _ = self.event_tx.send(event).await;
363 }
364
365 fn write_trace_event(&self, event: &AgentEvent) {
366 let Some(run_id) = self.run_id.lock().ok().and_then(|run_id| run_id.clone()) else {
367 return;
368 };
369 let mut trace_event = event.to_trace_event(run_id);
370 if let Some(workflow_id) = self
371 .workflow_contract
372 .id
373 .as_ref()
374 .or(self.workflow_contract.mana_unit_ref.as_ref())
375 {
376 trace_event = trace_event.with_workflow_id(workflow_id.clone());
377 }
378 if let Ok(mut writer) = self.trace_writer.lock() {
379 if let Some(writer) = writer.as_mut() {
380 let _ = writer.write_event(trace_event);
381 let _ = writer.flush();
382 }
383 }
384 }
385
386 async fn emit_timing(
387 &self,
388 turn: u32,
389 stage: TimingStage,
390 turn_started_at: Instant,
391 llm_request_started_at: Option<Instant>,
392 ) {
393 self.emit_timing_with_details(TimingEvent::new(
394 turn,
395 stage,
396 turn_started_at,
397 llm_request_started_at,
398 ))
399 .await;
400 }
401
402 async fn emit_timing_with_details(&self, timing: TimingEvent) {
403 self.write_trace_event(&AgentEvent::Timing {
404 timing: timing.clone(),
405 });
406 let _ = self.event_tx.send(AgentEvent::Timing { timing }).await;
407 }
408
409 pub async fn emit_recovery_checkpoint(&self, checkpoint: RecoveryCheckpoint) {
410 if let Ok(mut ledger) = self.recovery_ledger.lock() {
411 ledger.record(checkpoint.clone());
412 }
413 self.write_trace_event(&AgentEvent::RecoveryCheckpoint {
414 checkpoint: checkpoint.clone(),
415 });
416 let _ = self
417 .event_tx
418 .send(AgentEvent::RecoveryCheckpoint { checkpoint })
419 .await;
420 }
421
422 fn recovery_checkpoint(
423 turn: u32,
424 kind: RecoveryCheckpointKind,
425 tool_call_id: Option<String>,
426 tool_name: Option<String>,
427 args_hash: Option<String>,
428 success: Option<bool>,
429 error_class: Option<String>,
430 ) -> RecoveryCheckpoint {
431 RecoveryCheckpoint {
432 version: 1,
433 turn,
434 kind,
435 tool_call_id,
436 tool_name,
437 args_hash,
438 success,
439 error_class,
440 timestamp: imp_llm::now(),
441 }
442 }
443
444 fn tool_args_hash(args: &serde_json::Value) -> String {
445 format!("{:016x}", crate::tools::stable_hash(args.to_string()))
446 }
447
448 fn finish_turn_mana_review(&self, turn: u32) -> TurnManaReview {
449 match self.turn_mana_review.lock() {
450 Ok(review) => {
451 let review = review.finalize();
452 if review.turn_index == turn {
453 review
454 } else {
455 TurnManaReview::no_change(turn)
456 }
457 }
458 Err(_) => TurnManaReview::no_change(turn),
459 }
460 }
461}
462fn push_stream_text_block(content: &mut Vec<ContentBlock>, text: String) {
463 if text.is_empty() {
464 return;
465 }
466
467 if let Some(ContentBlock::Text { text: existing }) = content.last_mut() {
468 existing.push_str(&text);
469 } else {
470 content.push(ContentBlock::Text { text });
471 }
472}
473
474fn push_stream_thinking_block(content: &mut Vec<ContentBlock>, text: String) {
475 if text.is_empty() {
476 return;
477 }
478
479 if let Some(ContentBlock::Thinking { text: existing }) = content.last_mut() {
480 existing.push_str(&text);
481 } else {
482 content.push(ContentBlock::Thinking { text });
483 }
484}
485
486fn assistant_message_text(message: &AssistantMessage) -> String {
487 message
488 .content
489 .iter()
490 .filter_map(|block| match block {
491 ContentBlock::Text { text } => Some(text.as_str()),
492 _ => None,
493 })
494 .collect::<Vec<_>>()
495 .join("\n")
496}
497
498fn assistant_message_contains_mana_tool_call(message: &AssistantMessage) -> bool {
499 message.content.iter().any(|block| match block {
500 ContentBlock::ToolCall { name, .. } => name == "mana",
501 _ => false,
502 })
503}
504
505fn should_queue_execution_debt_follow_up(
506 execution_debt: bool,
507 execution_evidence: bool,
508 already_queued: bool,
509 assistant_finalized: bool,
510) -> bool {
511 execution_debt && !execution_evidence && !already_queued && assistant_finalized
512}
513
514fn should_queue_mana_externalization_follow_up(
515 message: &AssistantMessage,
516 mode: AgentMode,
517 _has_mana_skill: bool,
518 already_queued: bool,
519) -> bool {
520 if already_queued {
521 return false;
522 }
523
524 if !matches!(
525 mode,
526 AgentMode::Full | AgentMode::Planner | AgentMode::Orchestrator
527 ) {
528 return false;
529 }
530
531 if assistant_message_contains_mana_tool_call(message) {
532 return false;
533 }
534
535 let text = assistant_message_text(message);
536 durable_mana_externalization_signal(&text)
537}
538
539fn durable_mana_externalization_signal(text: &str) -> bool {
540 let lower = text.to_ascii_lowercase();
541 let durable_state_signal = [
542 "acceptance",
543 "architecture decision",
544 "blocker",
545 "blocked by",
546 "dependency",
547 "dependencies",
548 "durable",
549 "follow-up work",
550 "handoff",
551 "migration",
552 "orchestration",
553 "parallel",
554 "phase 1",
555 "phase 2",
556 "verify gate",
557 "worker",
558 "workers",
559 ]
560 .iter()
561 .any(|needle| lower.contains(needle));
562
563 let explicit_mana_signal = [
564 "create mana",
565 "create a mana",
566 "externalize",
567 "mana unit",
568 "mana units",
569 "record this",
570 "save this plan",
571 "split this into units",
572 "turn this into mana",
573 ]
574 .iter()
575 .any(|needle| lower.contains(needle));
576
577 durable_state_signal || explicit_mana_signal
578}
579
580fn mana_externalization_follow_up_text() -> &'static str {
581 "Before you continue: externalize the durable plan or decomposition you just described into mana now. Create or update the relevant unit(s) with native mana actions, prefer root scope for cross-project work, and avoid extra chat restatement when the mana tool/UI already makes the delta obvious."
582}
583
584fn should_queue_confidence_continue_follow_up(
585 message: &AssistantMessage,
586 mode: AgentMode,
587 continue_policy: ContinuePolicy,
588 already_queued: bool,
589) -> bool {
590 if already_queued || matches!(continue_policy, ContinuePolicy::Disabled) {
591 return false;
592 }
593
594 if !matches!(
595 mode,
596 AgentMode::Full | AgentMode::Planner | AgentMode::Orchestrator
597 ) {
598 return false;
599 }
600
601 if !assistant_message_contains_mana_tool_call(message) {
602 return false;
603 }
604
605 let text = assistant_message_text(message);
606 if text.trim().is_empty() {
607 return false;
608 }
609
610 let lower = text.to_ascii_lowercase();
611 let positive_signal = [
612 "done",
613 "completed",
614 "finished",
615 "updated",
616 "created",
617 "next",
618 "continue",
619 "proceed",
620 "follow-up",
621 "follow up",
622 ]
623 .iter()
624 .filter(|needle| lower.contains(**needle))
625 .count();
626
627 let blocker_signal = [
628 "blocked",
629 "unclear",
630 "need your input",
631 "which should",
632 "approval",
633 ]
634 .iter()
635 .any(|needle| lower.contains(needle));
636
637 if blocker_signal {
638 return false;
639 }
640
641 let threshold = match continue_policy {
642 ContinuePolicy::Disabled => return false,
643 ContinuePolicy::Conservative => 3,
644 ContinuePolicy::Balanced => 2,
645 ContinuePolicy::Aggressive => 1,
646 };
647
648 positive_signal >= threshold
649}
650
651fn confidence_continue_follow_up_text() -> &'static str {
652 "Confidence is high and the mana delta is already visible. Continue to the next small, well-bounded step now using native mana-backed workflow, unless a consequential decision or blocker appears. Do not re-summarize the same visible mana change in chat unless new context needs to be called out."
653}
654
655fn execution_debt_follow_up_text() -> &'static str {
656 "You have recorded or planned work, but the requested outcome is not satisfied yet. Continue working until the user's requested outcome is satisfied, or until concrete evidence shows it cannot be completed. Do not stop merely because you recorded a plan, updated mana, or completed one intermediate step."
657}
658
659fn mana_result_action(result: &imp_llm::ToolResultMessage) -> Option<&str> {
660 if result.tool_name != "mana" {
661 return None;
662 }
663
664 result
665 .details
666 .get("action")
667 .and_then(|value| value.as_str())
668 .or_else(|| {
669 result
670 .details
671 .get("mana_loop_policy")
672 .and_then(|policy| policy.get("action"))
673 .and_then(|value| value.as_str())
674 })
675}
676
677fn mana_review_scope_from_result(result: &imp_llm::ToolResultMessage) -> ManaReviewScope {
678 let display = result
679 .details
680 .get("path")
681 .or_else(|| result.details.get("mana_dir"))
682 .and_then(|value| value.as_str())
683 .unwrap_or("auto")
684 .to_string();
685
686 ManaReviewScope {
687 kind: if display == "auto" {
688 ManaReviewScopeKind::None
689 } else {
690 ManaReviewScopeKind::ExplicitPath
691 },
692 display,
693 }
694}
695
696fn unit_snapshot_from_value(value: &serde_json::Value) -> Option<ManaUnitSnapshot> {
697 serde_json::from_value(value.clone()).ok()
698}
699
700fn unit_snapshot_from_result(result: &imp_llm::ToolResultMessage) -> Option<ManaUnitSnapshot> {
701 result
702 .details
703 .get("unit")
704 .and_then(unit_snapshot_from_value)
705}
706
707fn mana_mutation_action(action: &str) -> Option<ManaMutationAction> {
708 match action {
709 "create" => Some(ManaMutationAction::Create),
710 "close" => Some(ManaMutationAction::Close),
711 "update" => Some(ManaMutationAction::Update),
712 "notes_append" => Some(ManaMutationAction::NotesAppend),
713 "decision_add" => Some(ManaMutationAction::DecisionAdd),
714 "decision_resolve" => Some(ManaMutationAction::DecisionResolve),
715 "reopen" => Some(ManaMutationAction::Reopen),
716 "fail" => Some(ManaMutationAction::Fail),
717 "delete" => Some(ManaMutationAction::Delete),
718 "dep_add" => Some(ManaMutationAction::DepAdd),
719 "dep_remove" => Some(ManaMutationAction::DepRemove),
720 "fact_create" => Some(ManaMutationAction::FactCreate),
721 _ => None,
722 }
723}
724
725fn mutation_record_from_mana_result(
726 result: &imp_llm::ToolResultMessage,
727) -> Option<ManaMutationRecord> {
728 if result.is_error || result.tool_name != "mana" {
729 return None;
730 }
731
732 let action_name = mana_result_action(result)?;
733 let action = mana_mutation_action(action_name)?;
734 let after_unit = unit_snapshot_from_result(result);
735 let deleted_unit = if action == ManaMutationAction::Delete {
736 let id = result.details.get("id")?.as_str()?.to_string();
737 let title = result
738 .details
739 .get("title")
740 .and_then(|value| value.as_str())
741 .unwrap_or(&id)
742 .to_string();
743 Some(crate::mana_review::ManaUnitRef::new(id, title, None))
744 } else {
745 None
746 };
747
748 if after_unit.is_none()
749 && deleted_unit.is_none()
750 && !matches!(
751 action,
752 ManaMutationAction::DepAdd | ManaMutationAction::DepRemove
753 )
754 {
755 return None;
756 }
757
758 Some(ManaMutationRecord {
759 action,
760 scope: mana_review_scope_from_result(result),
761 before_unit: None,
762 after_unit,
763 deleted_unit,
764 parent_unit: None,
765 related_unit: None,
766 field_changes: Vec::new(),
767 notes_appended: Vec::new(),
768 decision_events: Vec::new(),
769 })
770}
771
772fn record_mana_mutation_results(
773 turn_mana_review: &std::sync::Arc<std::sync::Mutex<TurnManaReviewAccumulator>>,
774 tool_results: &[imp_llm::ToolResultMessage],
775) {
776 let Ok(mut review) = turn_mana_review.lock() else {
777 return;
778 };
779
780 for result in tool_results {
781 if let Some(record) = mutation_record_from_mana_result(result) {
782 review.push(record);
783 }
784 }
785}
786
787fn tool_results_indicate_repeated_action(tool_results: &[imp_llm::ToolResultMessage]) -> bool {
788 tool_results.iter().any(|result| {
789 result.is_error
790 && result.content.iter().any(|block| match block {
791 ContentBlock::Text { text } => {
792 text.contains("Blocked: identical tool call repeated")
793 }
794 _ => false,
795 })
796 })
797}
798
799fn tool_results_indicate_execution_blocker(
800 tool_results: &[imp_llm::ToolResultMessage],
801 mode: AgentMode,
802) -> Option<StopReason> {
803 if !matches!(
804 mode,
805 AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
806 ) {
807 return None;
808 }
809
810 let saw_edit_like_success = tool_results.iter().any(|result| {
811 !result.is_error && matches!(result.tool_name.as_str(), "write" | "edit" | "multi_edit")
812 });
813
814 for result in tool_results {
815 let action = result.details.get("action").and_then(|v| v.as_str());
816
817 if action == Some("verify")
818 && result.details.get("passed").and_then(|v| v.as_bool()) == Some(false)
819 {
820 return Some(StopReason::ExecutionBlocked);
821 }
822
823 if result.tool_name == "ask_user" && !result.is_error {
824 return Some(StopReason::UserBlocker);
825 }
826
827 if result.tool_name == "bash" || result.tool_name == "shell" {
828 let exit_code = result.details.get("exit_code").and_then(|v| v.as_i64());
829 let timed_out = result.details.get("timed_out").and_then(|v| v.as_bool()) == Some(true);
830 let cancelled = result.details.get("cancelled").and_then(|v| v.as_bool()) == Some(true);
831 let command = result
832 .details
833 .get("command")
834 .and_then(|v| v.as_str())
835 .unwrap_or("")
836 .to_ascii_lowercase();
837 let looks_like_check = command.contains("check")
838 || command.contains("test")
839 || command.contains("verify")
840 || command.contains("pytest")
841 || command.contains("cargo test")
842 || command.contains("cargo check");
843
844 if looks_like_check
845 && (timed_out || cancelled || exit_code.is_some_and(|code| code != 0))
846 {
847 return Some(StopReason::ExecutionBlocked);
848 }
849
850 if saw_edit_like_success
851 && (timed_out || cancelled || exit_code.is_some_and(|code| code != 0))
852 {
853 return Some(StopReason::ExecutionBlocked);
854 }
855 }
856 }
857
858 None
859}
860
861fn tool_results_indicate_execution_debt(
862 tool_results: &[imp_llm::ToolResultMessage],
863 mode: AgentMode,
864) -> bool {
865 if !matches!(
866 mode,
867 AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
868 ) {
869 return false;
870 }
871
872 tool_results.iter().any(|result| {
873 !result.is_error
874 && result.tool_name == "mana"
875 && result
876 .details
877 .get("action")
878 .and_then(|v| v.as_str())
879 .is_some_and(|action| {
880 matches!(
881 mana_loop::classify_mana_action(action),
882 mana_loop::ManaActionClass::ProgressCheckpoint
883 | mana_loop::ManaActionClass::GraphMutation
884 | mana_loop::ManaActionClass::DecisionFact
885 )
886 })
887 })
888}
889
890fn tool_results_indicate_execution_evidence(
891 tool_results: &[imp_llm::ToolResultMessage],
892 mode: AgentMode,
893) -> bool {
894 if !matches!(
895 mode,
896 AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
897 ) {
898 return false;
899 }
900
901 tool_results.iter().any(|result| {
902 if result.is_error {
903 return false;
904 }
905
906 match result.tool_name.as_str() {
907 "write" | "edit" | "multi_edit" | "openrouter_secret_run" => true,
908 "bash" | "shell" => true,
909 "mana" => result
910 .details
911 .get("action")
912 .and_then(|v| v.as_str())
913 .is_some_and(|action| {
914 matches!(
915 mana_loop::classify_mana_action(action),
916 mana_loop::ManaActionClass::Lifecycle
917 | mana_loop::ManaActionClass::Orchestration
918 )
919 }),
920 _ => false,
921 }
922 })
923}
924
925fn tool_results_indicate_work_completed(
926 tool_results: &[imp_llm::ToolResultMessage],
927 mode: AgentMode,
928) -> bool {
929 if !matches!(
930 mode,
931 AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
932 ) {
933 return false;
934 }
935
936 let mut saw_edit_like_success = false;
937 let mut saw_successful_check = false;
938
939 for result in tool_results {
940 if result.is_error {
941 continue;
942 }
943
944 if matches!(result.tool_name.as_str(), "write" | "edit" | "multi_edit") {
945 saw_edit_like_success = true;
946 }
947
948 let action = result.details.get("action").and_then(|v| v.as_str());
949 let has_closed_unit = result
950 .details
951 .get("unit")
952 .and_then(|unit| unit.get("status"))
953 .and_then(|v| v.as_str())
954 == Some("closed");
955
956 if let Some(command) = result.details.get("command").and_then(|v| v.as_str()) {
957 let exit_code_ok = result.details.get("exit_code").and_then(|v| v.as_i64()) == Some(0);
958 let command_lower = command.to_ascii_lowercase();
959 let looks_like_check = command_lower.contains("check")
960 || command_lower.contains("test")
961 || command_lower.contains("verify")
962 || command_lower.contains("pytest")
963 || command_lower.contains("cargo test")
964 || command_lower.contains("cargo check");
965 if exit_code_ok && looks_like_check {
966 saw_successful_check = true;
967 }
968 }
969
970 match action {
971 Some("close") => return true,
972 Some("verify")
973 if result.details.get("passed").and_then(|v| v.as_bool()) == Some(true) =>
974 {
975 return true;
976 }
977 _ if has_closed_unit => return true,
978 _ => {}
979 }
980 }
981
982 saw_edit_like_success && saw_successful_check
983}
984
985fn mana_review_stop_reason(mana_review: &TurnManaReview, mode: AgentMode) -> Option<StopReason> {
986 match mana_review.state {
987 ManaReviewState::NeedsDecision => Some(StopReason::UserBlocker),
988 ManaReviewState::Changed if matches!(mode, AgentMode::Planner) => {
989 if !mana_review.proposed_children.is_empty()
990 || !mana_review.touched_units.is_empty()
991 || !mana_review.material_field_changes.is_empty()
992 || !mana_review.notes_appended.is_empty()
993 || !mana_review.decision_events.is_empty()
994 {
995 Some(StopReason::DecompositionCompleted)
996 } else {
997 None
998 }
999 }
1000 _ => None,
1001 }
1002}
1003
1004fn planner_stop_reason(message: &AssistantMessage, mode: AgentMode) -> Option<StopReason> {
1005 if !matches!(mode, AgentMode::Planner) {
1006 return None;
1007 }
1008
1009 classify_stop_reason_from_text(message, true)
1010}
1011
1012fn execution_stop_reason(message: &AssistantMessage, mode: AgentMode) -> Option<StopReason> {
1013 if !matches!(
1014 mode,
1015 AgentMode::Full | AgentMode::Orchestrator | AgentMode::Worker
1016 ) {
1017 return None;
1018 }
1019
1020 match classify_stop_reason_from_text(message, false) {
1021 Some(reason @ (StopReason::UserBlocker | StopReason::WorkCompleted)) => Some(reason),
1022 _ => None,
1023 }
1024}
1025
1026fn classify_stop_reason_from_text(
1027 message: &AssistantMessage,
1028 planner_mode: bool,
1029) -> Option<StopReason> {
1030 let text = assistant_message_text(message);
1031 if text.trim().is_empty() {
1032 return None;
1033 }
1034
1035 let lower = text.to_ascii_lowercase();
1036
1037 let blocker_signal = [
1038 "blocked",
1039 "need your input",
1040 "which should",
1041 "waiting on you",
1042 "approval",
1043 "before i continue",
1044 "before continuing",
1045 ]
1046 .iter()
1047 .any(|needle| lower.contains(needle));
1048 if blocker_signal {
1049 return Some(StopReason::UserBlocker);
1050 }
1051
1052 if planner_mode {
1053 let decomposition_complete_signal = [
1054 "externalized into mana",
1055 "created the units",
1056 "created child units",
1057 "decomposition is complete",
1058 "plan is complete",
1059 "ready for handoff",
1060 ]
1061 .iter()
1062 .any(|needle| lower.contains(needle));
1063 if decomposition_complete_signal {
1064 return Some(StopReason::DecompositionCompleted);
1065 }
1066 } else {
1067 let work_complete_signal = [
1068 "all done",
1069 "done",
1070 "completed",
1071 "finished",
1072 "implemented",
1073 "fixed",
1074 "handled",
1075 ]
1076 .iter()
1077 .any(|needle| lower.contains(needle));
1078 if work_complete_signal {
1079 return Some(StopReason::WorkCompleted);
1080 }
1081 }
1082
1083 None
1084}
1085
1086fn build_assistant_message(
1089 content: &[ContentBlock],
1090 tool_calls: &[(String, String, serde_json::Value)],
1091 usage: Option<Usage>,
1092) -> AssistantMessage {
1093 let stop_reason = if tool_calls.is_empty() {
1094 LlmStopReason::EndTurn
1095 } else {
1096 LlmStopReason::ToolUse
1097 };
1098
1099 AssistantMessage {
1100 content: content.to_vec(),
1101 usage,
1102 stop_reason,
1103 timestamp: imp_llm::now(),
1104 }
1105}
1106
1107fn clone_model(model: &Model) -> Model {
1108 Model {
1109 meta: model.meta.clone(),
1110 provider: Arc::clone(&model.provider),
1111 }
1112}
1113
1114fn extract_file_path(cwd: &Path, args: &serde_json::Value) -> Option<PathBuf> {
1115 let raw_path = args.get("path")?.as_str()?;
1116 if raw_path.is_empty() {
1117 return None;
1118 }
1119
1120 let path = PathBuf::from(raw_path);
1121 if path.is_absolute() {
1122 Some(path)
1123 } else {
1124 Some(cwd.join(path))
1125 }
1126}
1127
1128fn mana_bash_equivalent_hint(command: &str) -> Option<&'static str> {
1129 let trimmed = command.trim();
1130 let rest = trimmed.strip_prefix("mana")?;
1131 if rest.chars().next().is_some_and(|c| !c.is_whitespace()) {
1132 return None;
1133 }
1134
1135 let action = rest.split_whitespace().next().unwrap_or("");
1136 match action {
1137 "status" | "list" | "ls" | "show" | "read" | "create" | "close" | "update" | "run"
1138 | "run_state" | "evaluate" | "agents" | "logs" | "next" | "claim" | "release" | "tree" => {
1139 Some("Use the native mana tool instead of `bash` for this mana command. For orchestration, the native tool supports canonical target selection (`id`, `targets`, or all ready work) plus background run tracking.")
1140 }
1141 _ => None,
1142 }
1143}
1144
1145fn mana_skill_follow_up_hint(
1146 prompt: &str,
1147 mode: AgentMode,
1148 tools_available: bool,
1149 _has_mana_skill: bool,
1150 _has_mana_basics_skill: bool,
1151 _has_mana_delegation_skill: bool,
1152) -> Option<&'static str> {
1153 if !tools_available {
1154 return None;
1155 }
1156
1157 let lower = prompt.to_ascii_lowercase();
1158
1159 let orchestration_signal = [
1160 "decompose",
1161 "decomposition",
1162 "split this",
1163 "break this up",
1164 "break it up",
1165 "parallel",
1166 "parallel helper",
1167 "bounded helper",
1168 "orchestrate",
1169 "orchestration",
1170 "create a unit",
1171 "create units",
1172 "mana run",
1173 ]
1174 .iter()
1175 .any(|needle| lower.contains(needle));
1176
1177 let mana_signal = [
1178 " mana ",
1179 "mana status",
1180 "mana list",
1181 "mana show",
1182 "mana update",
1183 "mana create",
1184 "mana run",
1185 ]
1186 .iter()
1187 .any(|needle| lower.contains(needle));
1188
1189 match mode {
1190 AgentMode::Full | AgentMode::Orchestrator | AgentMode::Planner
1191 if orchestration_signal || mana_signal =>
1192 {
1193 Some("Before you continue: use native mana `guide` or `template` actions if you need extra help with unit design, decomposition, retries, or worker handoff.")
1194 }
1195 AgentMode::Worker | AgentMode::Auditor if mana_signal => {
1196 Some("Before you continue: use the native mana tool and stay within this mode's allowed mana workflow. Use the `guide` action if you need help.")
1197 }
1198 _ => None,
1199 }
1200}
1201
1202#[cfg(test)]
1203mod tests {
1204 use super::*;
1205 use crate::agent::turn_assessment::NextAction;
1206 use crate::builder::AgentBuilder;
1207 use std::pin::Pin;
1208 use std::sync::{
1209 atomic::{AtomicUsize, Ordering},
1210 Arc, Mutex as StdMutex,
1211 };
1212 use std::time::Duration;
1213
1214 use async_trait::async_trait;
1215 use futures_core::Stream;
1216 use imp_llm::auth::{ApiKey, AuthStore};
1217 use imp_llm::model::{Capabilities, ModelMeta, ModelPricing};
1218 use imp_llm::provider::Provider;
1219 use imp_llm::ToolResultMessage;
1220 use tokio::sync::{Mutex, Notify};
1221
1222 struct MockProvider {
1225 responses: Mutex<Vec<Vec<imp_llm::Result<StreamEvent>>>>,
1226 }
1227
1228 impl MockProvider {
1229 fn new(responses: Vec<Vec<StreamEvent>>) -> Self {
1230 Self {
1231 responses: Mutex::new(
1232 responses
1233 .into_iter()
1234 .map(|events| events.into_iter().map(Ok).collect())
1235 .collect(),
1236 ),
1237 }
1238 }
1239
1240 fn new_results(responses: Vec<Vec<imp_llm::Result<StreamEvent>>>) -> Self {
1241 Self {
1242 responses: Mutex::new(responses),
1243 }
1244 }
1245 }
1246
1247 #[async_trait]
1248 impl Provider for MockProvider {
1249 fn stream(
1250 &self,
1251 _model: &Model,
1252 _context: Context,
1253 _options: RequestOptions,
1254 _api_key: &str,
1255 ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
1256 let mut responses = self.responses.try_lock().expect("MockProvider lock");
1259 let events = if responses.is_empty() {
1260 vec![Ok(StreamEvent::Error {
1261 error: "No more mock responses".to_string(),
1262 })]
1263 } else {
1264 responses.remove(0)
1265 };
1266 let stream = futures::stream::iter(events);
1267 Box::pin(stream)
1268 }
1269
1270 async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
1271 Ok("mock-key".to_string())
1272 }
1273
1274 fn id(&self) -> &str {
1275 "mock"
1276 }
1277
1278 fn models(&self) -> &[ModelMeta] {
1279 &[]
1280 }
1281 }
1282
1283 fn test_model(provider: Arc<dyn Provider>) -> Model {
1284 test_model_with_context_window(provider, 200_000)
1285 }
1286
1287 fn test_model_with_context_window(provider: Arc<dyn Provider>, context_window: u32) -> Model {
1288 Model {
1289 meta: ModelMeta {
1290 id: "test-model".to_string(),
1291 provider: "mock".to_string(),
1292 name: "Test Model".to_string(),
1293 context_window,
1294 max_output_tokens: 16_384,
1295 pricing: ModelPricing {
1296 input_per_mtok: 3.0,
1297 output_per_mtok: 15.0,
1298 cache_read_per_mtok: 0.3,
1299 cache_write_per_mtok: 3.75,
1300 },
1301 capabilities: Capabilities {
1302 reasoning: true,
1303 images: false,
1304 tool_use: true,
1305 },
1306 },
1307 provider,
1308 }
1309 }
1310
1311 fn text_response(text: &str, input_tokens: u32, output_tokens: u32) -> Vec<StreamEvent> {
1312 vec![
1313 StreamEvent::MessageStart {
1314 model: "test-model".to_string(),
1315 },
1316 StreamEvent::TextDelta {
1317 text: text.to_string(),
1318 },
1319 StreamEvent::MessageEnd {
1320 message: AssistantMessage {
1321 content: vec![ContentBlock::Text {
1322 text: text.to_string(),
1323 }],
1324 usage: Some(Usage {
1325 input_tokens,
1326 output_tokens,
1327 cache_read_tokens: 0,
1328 cache_write_tokens: 0,
1329 }),
1330 stop_reason: LlmStopReason::EndTurn,
1331 timestamp: 1000,
1332 },
1333 },
1334 ]
1335 }
1336
1337 fn tool_call_response(
1338 call_id: &str,
1339 tool_name: &str,
1340 args: serde_json::Value,
1341 input_tokens: u32,
1342 output_tokens: u32,
1343 ) -> Vec<StreamEvent> {
1344 vec![
1345 StreamEvent::MessageStart {
1346 model: "test-model".to_string(),
1347 },
1348 StreamEvent::ToolCall {
1349 id: call_id.to_string(),
1350 name: tool_name.to_string(),
1351 arguments: args.clone(),
1352 },
1353 StreamEvent::MessageEnd {
1354 message: AssistantMessage {
1355 content: vec![ContentBlock::ToolCall {
1356 id: call_id.to_string(),
1357 name: tool_name.to_string(),
1358 arguments: args,
1359 }],
1360 usage: Some(Usage {
1361 input_tokens,
1362 output_tokens,
1363 cache_read_tokens: 0,
1364 cache_write_tokens: 0,
1365 }),
1366 stop_reason: LlmStopReason::ToolUse,
1367 timestamp: 1000,
1368 },
1369 },
1370 ]
1371 }
1372
1373 fn multi_tool_call_response(
1374 calls: &[(&str, &str, serde_json::Value)],
1375 input_tokens: u32,
1376 output_tokens: u32,
1377 ) -> Vec<StreamEvent> {
1378 let mut events = vec![StreamEvent::MessageStart {
1379 model: "test-model".to_string(),
1380 }];
1381
1382 let mut content = Vec::new();
1383 for (id, name, args) in calls {
1384 events.push(StreamEvent::ToolCall {
1385 id: id.to_string(),
1386 name: name.to_string(),
1387 arguments: args.clone(),
1388 });
1389 content.push(ContentBlock::ToolCall {
1390 id: id.to_string(),
1391 name: name.to_string(),
1392 arguments: args.clone(),
1393 });
1394 }
1395
1396 events.push(StreamEvent::MessageEnd {
1397 message: AssistantMessage {
1398 content,
1399 usage: Some(Usage {
1400 input_tokens,
1401 output_tokens,
1402 cache_read_tokens: 0,
1403 cache_write_tokens: 0,
1404 }),
1405 stop_reason: LlmStopReason::ToolUse,
1406 timestamp: 1000,
1407 },
1408 });
1409
1410 events
1411 }
1412
1413 fn make_assistant_tool_call(
1414 call_id: &str,
1415 tool_name: &str,
1416 args: serde_json::Value,
1417 ) -> Message {
1418 Message::Assistant(AssistantMessage {
1419 content: vec![ContentBlock::ToolCall {
1420 id: call_id.to_string(),
1421 name: tool_name.to_string(),
1422 arguments: args,
1423 }],
1424 usage: None,
1425 stop_reason: LlmStopReason::ToolUse,
1426 timestamp: imp_llm::now(),
1427 })
1428 }
1429
1430 fn make_tool_result(call_id: &str, tool_name: &str, output: &str) -> Message {
1431 Message::ToolResult(imp_llm::ToolResultMessage {
1432 tool_call_id: call_id.to_string(),
1433 tool_name: tool_name.to_string(),
1434 content: vec![ContentBlock::Text {
1435 text: output.to_string(),
1436 }],
1437 is_error: false,
1438 details: serde_json::Value::Null,
1439 timestamp: imp_llm::now(),
1440 })
1441 }
1442
1443 fn tool_result_text(message: &Message) -> Option<&str> {
1444 match message {
1445 Message::ToolResult(result) => result.content.iter().find_map(|block| match block {
1446 ContentBlock::Text { text } => Some(text.as_str()),
1447 _ => None,
1448 }),
1449 _ => None,
1450 }
1451 }
1452
1453 struct EchoTool;
1455
1456 #[async_trait]
1457 impl crate::tools::Tool for EchoTool {
1458 fn name(&self) -> &str {
1459 "echo"
1460 }
1461 fn label(&self) -> &str {
1462 "Echo"
1463 }
1464 fn description(&self) -> &str {
1465 "Echoes back the input"
1466 }
1467 fn parameters(&self) -> serde_json::Value {
1468 serde_json::json!({
1469 "type": "object",
1470 "properties": {
1471 "text": { "type": "string" }
1472 },
1473 "required": ["text"]
1474 })
1475 }
1476 fn is_readonly(&self) -> bool {
1477 true
1478 }
1479 async fn execute(
1480 &self,
1481 _call_id: &str,
1482 params: serde_json::Value,
1483 _ctx: crate::tools::ToolContext,
1484 ) -> crate::error::Result<crate::tools::ToolOutput> {
1485 let text = params["text"].as_str().unwrap_or("no text");
1486 Ok(crate::tools::ToolOutput::text(format!("echo: {text}")))
1487 }
1488 }
1489
1490 #[allow(dead_code)]
1492 struct WriteTool;
1493
1494 #[async_trait]
1495 impl crate::tools::Tool for WriteTool {
1496 fn name(&self) -> &str {
1497 "write"
1498 }
1499 fn label(&self) -> &str {
1500 "Write"
1501 }
1502 fn description(&self) -> &str {
1503 "Writes data"
1504 }
1505 fn parameters(&self) -> serde_json::Value {
1506 serde_json::json!({
1507 "type": "object",
1508 "properties": {
1509 "data": { "type": "string" }
1510 },
1511 "required": ["data"]
1512 })
1513 }
1514 fn is_readonly(&self) -> bool {
1515 false
1516 }
1517 async fn execute(
1518 &self,
1519 _call_id: &str,
1520 params: serde_json::Value,
1521 _ctx: crate::tools::ToolContext,
1522 ) -> crate::error::Result<crate::tools::ToolOutput> {
1523 let data = params["data"].as_str().unwrap_or("no data");
1524 Ok(crate::tools::ToolOutput::text(format!("wrote: {data}")))
1525 }
1526 }
1527
1528 struct ConcurrentReadonlyState {
1529 readonly_expected: usize,
1530 readonly_started: AtomicUsize,
1531 readonly_finished: AtomicUsize,
1532 mutable_observed_finished: AtomicUsize,
1533 log: StdMutex<Vec<String>>,
1534 notify: Notify,
1535 }
1536
1537 impl ConcurrentReadonlyState {
1538 fn new(readonly_expected: usize) -> Self {
1539 Self {
1540 readonly_expected,
1541 readonly_started: AtomicUsize::new(0),
1542 readonly_finished: AtomicUsize::new(0),
1543 mutable_observed_finished: AtomicUsize::new(0),
1544 log: StdMutex::new(Vec::new()),
1545 notify: Notify::new(),
1546 }
1547 }
1548
1549 fn record(&self, entry: impl Into<String>) {
1550 self.log
1551 .lock()
1552 .expect("concurrent log lock")
1553 .push(entry.into());
1554 }
1555
1556 async fn wait_for_all_readonly_to_start(&self) {
1557 while self.readonly_started.load(Ordering::SeqCst) < self.readonly_expected {
1558 self.notify.notified().await;
1559 }
1560 }
1561 }
1562
1563 struct CoordinatedReadonlyTool {
1564 name: &'static str,
1565 shared: Arc<ConcurrentReadonlyState>,
1566 }
1567
1568 #[async_trait]
1569 impl crate::tools::Tool for CoordinatedReadonlyTool {
1570 fn name(&self) -> &str {
1571 self.name
1572 }
1573 fn label(&self) -> &str {
1574 self.name
1575 }
1576 fn description(&self) -> &str {
1577 "Read-only tool used to verify concurrent execution"
1578 }
1579 fn parameters(&self) -> serde_json::Value {
1580 serde_json::json!({
1581 "type": "object",
1582 "properties": {
1583 "text": { "type": "string" }
1584 },
1585 "required": ["text"]
1586 })
1587 }
1588 fn is_readonly(&self) -> bool {
1589 true
1590 }
1591 async fn execute(
1592 &self,
1593 _call_id: &str,
1594 params: serde_json::Value,
1595 _ctx: crate::tools::ToolContext,
1596 ) -> crate::error::Result<crate::tools::ToolOutput> {
1597 self.shared.record(format!("{}:start", self.name));
1598 self.shared.readonly_started.fetch_add(1, Ordering::SeqCst);
1599 self.shared.notify.notify_waiters();
1600 self.shared.wait_for_all_readonly_to_start().await;
1601 self.shared.record(format!("{}:end", self.name));
1602 self.shared.readonly_finished.fetch_add(1, Ordering::SeqCst);
1603
1604 let text = params["text"].as_str().unwrap_or(self.name);
1605 Ok(crate::tools::ToolOutput::text(format!(
1606 "{}: {text}",
1607 self.name
1608 )))
1609 }
1610 }
1611
1612 struct CoordinatedMutableTool {
1613 shared: Arc<ConcurrentReadonlyState>,
1614 }
1615
1616 #[async_trait]
1617 impl crate::tools::Tool for CoordinatedMutableTool {
1618 fn name(&self) -> &str {
1619 "write_after_reads"
1620 }
1621 fn label(&self) -> &str {
1622 "Write After Reads"
1623 }
1624 fn description(&self) -> &str {
1625 "Mutable tool used to verify read-only tools finish first"
1626 }
1627 fn parameters(&self) -> serde_json::Value {
1628 serde_json::json!({
1629 "type": "object",
1630 "properties": {
1631 "data": { "type": "string" }
1632 },
1633 "required": ["data"]
1634 })
1635 }
1636 fn is_readonly(&self) -> bool {
1637 false
1638 }
1639 async fn execute(
1640 &self,
1641 _call_id: &str,
1642 params: serde_json::Value,
1643 _ctx: crate::tools::ToolContext,
1644 ) -> crate::error::Result<crate::tools::ToolOutput> {
1645 let finished = self.shared.readonly_finished.load(Ordering::SeqCst);
1646 self.shared
1647 .mutable_observed_finished
1648 .store(finished, Ordering::SeqCst);
1649 self.shared.record("write_after_reads:start");
1650
1651 let data = params["data"].as_str().unwrap_or("no data");
1652 Ok(crate::tools::ToolOutput::text(format!(
1653 "wrote after reads: {data}"
1654 )))
1655 }
1656 }
1657
1658 async fn collect_events(mut handle: AgentHandle) -> Vec<AgentEvent> {
1660 let mut events = Vec::new();
1661 while let Some(event) = handle.event_rx.recv().await {
1662 events.push(event);
1663 }
1664 events
1665 }
1666
1667 #[test]
1668 fn agent_queues_mana_hint_for_planner_requests() {
1669 let provider = Arc::new(MockProvider::new(vec![
1670 text_response("Loaded mana skill", 100, 20),
1671 text_response("Done", 120, 25),
1672 ]));
1673
1674 let model = test_model(provider);
1675 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
1676 agent.has_mana_skill = true;
1677 agent.mode = AgentMode::Planner;
1678
1679 let rt = tokio::runtime::Runtime::new().unwrap();
1680 rt.block_on(async {
1681 agent
1682 .run("Please split this into units for workers".to_string())
1683 .await
1684 .unwrap();
1685 });
1686
1687 let user_texts: Vec<String> = agent
1688 .messages
1689 .iter()
1690 .filter_map(|message| match message {
1691 Message::User(user) => user.content.iter().find_map(|block| match block {
1692 ContentBlock::Text { text } => Some(text.clone()),
1693 _ => None,
1694 }),
1695 _ => None,
1696 })
1697 .collect();
1698
1699 assert_eq!(user_texts.len(), 1);
1700 assert_eq!(user_texts[0], "Please split this into units for workers");
1701 }
1702
1703 #[tokio::test]
1704 async fn agent_queues_mana_externalization_follow_up_after_planning_turn() {
1705 let provider = Arc::new(MockProvider::new(vec![
1706 text_response("Here is the plan: split this into phases, add dependencies, and define verify steps.", 100, 20),
1707 text_response("Externalized into mana.", 120, 25),
1708 ]));
1709
1710 let model = test_model(provider);
1711 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
1712 agent.has_mana_skill = true;
1713 agent.mode = AgentMode::Planner;
1714
1715 agent.run("Plan the rollout".to_string()).await.unwrap();
1716
1717 let user_texts: Vec<String> = agent
1718 .messages
1719 .iter()
1720 .filter_map(|message| match message {
1721 Message::User(user) => user.content.iter().find_map(|block| match block {
1722 ContentBlock::Text { text } => Some(text.clone()),
1723 _ => None,
1724 }),
1725 _ => None,
1726 })
1727 .collect();
1728
1729 assert_eq!(user_texts.len(), 2);
1730 assert_eq!(user_texts[0], "Plan the rollout");
1731 assert!(user_texts[1].contains("externalize the durable plan"));
1732 }
1733
1734 #[tokio::test]
1735 async fn turn_assessment_debug_view_reports_execution_blocker() {
1736 let (agent, _handle) = Agent::new(
1737 test_model(Arc::new(MockProvider::new(vec![]))),
1738 PathBuf::from("/tmp"),
1739 );
1740 let assessment = agent.assess_post_turn(
1741 &AssistantMessage {
1742 content: vec![ContentBlock::Text {
1743 text: "Verify failed.".to_string(),
1744 }],
1745 usage: None,
1746 stop_reason: LlmStopReason::EndTurn,
1747 timestamp: 0,
1748 },
1749 &[imp_llm::ToolResultMessage {
1750 tool_call_id: "call_verify".to_string(),
1751 tool_name: "mana".to_string(),
1752 content: vec![ContentBlock::Text {
1753 text: "Verify failed".to_string(),
1754 }],
1755 is_error: true,
1756 details: serde_json::json!({
1757 "action": "verify",
1758 "passed": false,
1759 "exit_code": 1
1760 }),
1761 timestamp: 0,
1762 }],
1763 true,
1764 &TurnManaReview::no_change(0),
1765 );
1766
1767 let debug = assessment.debug_view();
1768 assert_eq!(
1769 debug.runtime.execution_stop_reason.as_deref(),
1770 Some("execution_blocked")
1771 );
1772 assert_eq!(
1773 debug.chosen_action,
1774 NextActionDebugView::Stop {
1775 reason: "execution_blocked".to_string(),
1776 }
1777 );
1778 }
1779
1780 #[test]
1781 fn turn_assessment_debug_view_reports_continue_recommendation() {
1782 let assessment = PostTurnAssessment {
1783 runtime: RuntimeEvidence {
1784 repeated_action: false,
1785 execution_stop_reason: None,
1786 work_completed: false,
1787 execution_debt: false,
1788 execution_evidence: false,
1789 planning_only_progress: false,
1790 },
1791 mana: ManaEvidence { stop_reason: None },
1792 text_fallback: TextFallbackEvidence {
1793 planner_stop_reason: None,
1794 execution_stop_reason: None,
1795 },
1796 continue_recommendation: Some(ContinueRecommendation {
1797 prompt: "continue".to_string(),
1798 reason: ContinueReason::HighConfidenceVisibleNextStep,
1799 }),
1800 };
1801
1802 let debug = assessment.debug_view();
1803 let recommendation = debug
1804 .continue_recommendation
1805 .expect("continue recommendation present");
1806 assert_eq!(recommendation.reason, "high_confidence_visible_next_step");
1807 assert!(matches!(
1808 debug.chosen_action,
1809 NextActionDebugView::Continue { .. }
1810 ));
1811 }
1812
1813 #[tokio::test]
1814 async fn agent_run_artifacts_writes_trace_and_evidence_packet() {
1815 let temp = tempfile::TempDir::new().unwrap();
1816 let provider = Arc::new(MockProvider::new(vec![text_response("done", 10, 5)]));
1817 let model = test_model(provider);
1818 let (mut agent, _handle) = AgentBuilder::new(
1819 Config::default(),
1820 temp.path().to_path_buf(),
1821 model,
1822 String::new(),
1823 )
1824 .verify_command("printf verify-ok", true)
1825 .build()
1826 .unwrap();
1827 agent.workflow_contract.autonomy_mode = crate::workflow::AutonomyMode::AllowAll;
1828
1829 agent.run("Do the work".to_string()).await.unwrap();
1830
1831 let runs_dir = temp.path().join(".imp").join("runs");
1832 let mut runs = std::fs::read_dir(&runs_dir)
1833 .unwrap()
1834 .collect::<Result<Vec<_>, _>>()
1835 .unwrap();
1836 assert_eq!(runs.len(), 1);
1837 let run_dir = runs.pop().unwrap().path();
1838 let trace = std::fs::read_to_string(run_dir.join("trace.jsonl")).unwrap();
1839 assert!(trace.contains("agent.start"));
1840 assert!(trace.contains("agent.end"));
1841 let evidence = std::fs::read_to_string(run_dir.join("evidence.md")).unwrap();
1842 assert!(evidence.contains("# Evidence Packet"));
1843 assert!(evidence.contains("Do the work"));
1844 assert!(evidence.contains("**Autonomy:** allow-all"));
1845 assert!(evidence.contains("allow-all mode was active"));
1846 assert!(evidence.contains("hard-rail bypass: none recorded"));
1847 assert!(evidence.contains("policy.checked trace events"));
1848 assert!(evidence.contains("trace.jsonl"));
1849 assert!(evidence.contains("verify-ok"));
1850 assert!(evidence.contains("passed"));
1851 assert!(run_dir.join("verification/verify-1/status.json").exists());
1852 assert!(run_dir.join("workflow-contract.json").exists());
1853 }
1854
1855 #[tokio::test]
1856 async fn default_safe_compatibility_allows_readonly_tool() {
1857 let provider = Arc::new(MockProvider::new(vec![
1858 tool_call_response(
1859 "call_read",
1860 "echo",
1861 serde_json::json!({"text": "hello"}),
1862 100,
1863 30,
1864 ),
1865 text_response("done", 100, 10),
1866 ]));
1867 let model = test_model(provider);
1868 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1869 agent.tools.register(Arc::new(EchoTool));
1870
1871 let events_task = tokio::spawn(collect_events(handle));
1872 agent.run("Echo hello".to_string()).await.unwrap();
1873 drop(agent);
1874 let events = events_task.await.unwrap();
1875
1876 let policy = first_policy_record(&events).expect("policy checked");
1877 assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1878 assert!(policy.decision.is_allowed());
1879 let result = first_tool_result(&events).expect("tool end event");
1880 assert!(!result.is_error);
1881 assert_eq!(
1882 tool_result_text(&Message::ToolResult(result.clone())),
1883 Some("echo: hello")
1884 );
1885 }
1886
1887 #[tokio::test]
1888 async fn default_safe_compatibility_allows_write_tool() {
1889 let provider = Arc::new(MockProvider::new(vec![
1890 tool_call_response(
1891 "call_write",
1892 "write",
1893 serde_json::json!({"data": "hello"}),
1894 100,
1895 30,
1896 ),
1897 text_response("done", 100, 10),
1898 ]));
1899 let model = test_model(provider);
1900 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1901 agent.tools.register(Arc::new(WriteTool));
1902
1903 let events_task = tokio::spawn(collect_events(handle));
1904 agent.run("Write hello".to_string()).await.unwrap();
1905 drop(agent);
1906 let events = events_task.await.unwrap();
1907
1908 let policy = first_policy_record(&events).expect("policy checked");
1909 assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1910 assert!(policy.decision.is_allowed());
1911 let result = first_tool_result(&events).expect("tool end event");
1912 assert!(!result.is_error);
1913 assert_eq!(
1914 tool_result_text(&Message::ToolResult(result.clone())),
1915 Some("wrote: hello")
1916 );
1917 }
1918
1919 #[tokio::test]
1920 async fn default_safe_compatibility_preserves_run_policy_tool_deny() {
1921 let provider = Arc::new(MockProvider::new(vec![
1922 tool_call_response(
1923 "call_echo",
1924 "echo",
1925 serde_json::json!({"text": "hello"}),
1926 100,
1927 30,
1928 ),
1929 text_response("done", 100, 10),
1930 ]));
1931 let model = test_model(provider);
1932 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1933 agent.tools.register(Arc::new(EchoTool));
1934 agent.run_policy = crate::policy::RunPolicy::new().deny_tool("echo");
1935
1936 let events_task = tokio::spawn(collect_events(handle));
1937 agent.run("Echo hello".to_string()).await.unwrap();
1938 drop(agent);
1939 let events = events_task.await.unwrap();
1940
1941 let policy = first_policy_record(&events).expect("policy checked");
1942 assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1943 assert!(matches!(
1944 policy.decision,
1945 crate::reference_monitor::ToolPolicyDecision::Deny { .. }
1946 ));
1947 let result = first_tool_result(&events).expect("tool end event");
1948 assert!(result.is_error);
1949 assert_eq!(
1950 tool_result_text(&Message::ToolResult(result.clone())),
1951 Some("Tool `echo` denied by run policy.")
1952 );
1953 }
1954
1955 #[tokio::test]
1956 async fn default_safe_compatibility_preserves_agent_mode_tool_deny() {
1957 let provider = Arc::new(MockProvider::new(vec![
1958 tool_call_response(
1959 "call_write",
1960 "write",
1961 serde_json::json!({"data": "hello"}),
1962 100,
1963 30,
1964 ),
1965 text_response("done", 100, 10),
1966 ]));
1967 let model = test_model(provider);
1968 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
1969 agent.mode = AgentMode::Reviewer;
1970 agent.tools.register(Arc::new(WriteTool));
1971
1972 let events_task = tokio::spawn(collect_events(handle));
1973 agent.run("Write hello".to_string()).await.unwrap();
1974 drop(agent);
1975 let events = events_task.await.unwrap();
1976
1977 let policy = first_policy_record(&events).expect("policy checked");
1978 assert_eq!(policy.autonomy_mode, crate::workflow::AutonomyMode::Safe);
1979 assert!(matches!(
1980 policy.decision,
1981 crate::reference_monitor::ToolPolicyDecision::Deny { .. }
1982 ));
1983 let result = first_tool_result(&events).expect("tool end event");
1984 assert!(result.is_error);
1985 assert_eq!(
1986 tool_result_text(&Message::ToolResult(result.clone())),
1987 Some("Tool 'write' is not available in reviewer mode")
1988 );
1989 }
1990
1991 fn first_policy_record(
1992 events: &[AgentEvent],
1993 ) -> Option<&crate::reference_monitor::PolicyTraceRecord> {
1994 events.iter().find_map(|event| match event {
1995 AgentEvent::PolicyChecked { record } => Some(record),
1996 _ => None,
1997 })
1998 }
1999
2000 fn first_tool_result(events: &[AgentEvent]) -> Option<&ToolResultMessage> {
2001 events.iter().find_map(|event| match event {
2002 AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
2003 _ => None,
2004 })
2005 }
2006
2007 #[tokio::test]
2008 async fn tool_execution_policy_routes_run_policy_deny_through_reference_monitor() {
2009 let provider = Arc::new(MockProvider::new(vec![
2010 tool_call_response(
2011 "call_1",
2012 "echo",
2013 serde_json::json!({"text": "hello"}),
2014 100,
2015 30,
2016 ),
2017 text_response("done", 100, 10),
2018 ]));
2019 let model = test_model(provider);
2020 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2021 agent.tools.register(Arc::new(EchoTool));
2022 agent.run_policy = crate::policy::RunPolicy::new().deny_tool("echo");
2023
2024 let events_task = tokio::spawn(collect_events(handle));
2025 agent.run("Echo hello".to_string()).await.unwrap();
2026 drop(agent);
2027 let events = events_task.await.unwrap();
2028
2029 let policy_event = events
2030 .iter()
2031 .find_map(|event| match event {
2032 AgentEvent::PolicyChecked { record } => Some(record),
2033 _ => None,
2034 })
2035 .expect("policy checked event");
2036 assert_eq!(policy_event.tool_name, "echo");
2037 assert!(policy_event.args_hash.is_some());
2038 assert!(matches!(
2039 policy_event.decision,
2040 crate::reference_monitor::ToolPolicyDecision::Deny { .. }
2041 ));
2042
2043 let result = events
2044 .iter()
2045 .find_map(|event| match event {
2046 AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
2047 _ => None,
2048 })
2049 .expect("tool end event");
2050 assert!(result.is_error);
2051 assert_eq!(
2052 tool_result_text(&Message::ToolResult(result.clone())),
2053 Some("Tool `echo` denied by run policy.")
2054 );
2055
2056 let checkpoint = events.iter().rev().find_map(|event| match event {
2057 AgentEvent::RecoveryCheckpoint { checkpoint } => checkpoint.error_class.as_deref(),
2058 _ => None,
2059 });
2060 assert_eq!(checkpoint, Some("run_policy_blocked"));
2061 }
2062
2063 #[tokio::test]
2064 async fn tool_execution_policy_routes_agent_mode_deny_through_reference_monitor() {
2065 let provider = Arc::new(MockProvider::new(vec![
2066 tool_call_response(
2067 "call_1",
2068 "write",
2069 serde_json::json!({"data": "hello"}),
2070 100,
2071 30,
2072 ),
2073 text_response("done", 100, 10),
2074 ]));
2075 let model = test_model(provider);
2076 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2077 agent.mode = AgentMode::Reviewer;
2078 agent.tools.register(Arc::new(WriteTool));
2079
2080 let events_task = tokio::spawn(collect_events(handle));
2081 agent.run("Write hello".to_string()).await.unwrap();
2082 drop(agent);
2083 let events = events_task.await.unwrap();
2084
2085 let policy_event = events
2086 .iter()
2087 .find_map(|event| match event {
2088 AgentEvent::PolicyChecked { record } => Some(record),
2089 _ => None,
2090 })
2091 .expect("policy checked event");
2092 assert_eq!(policy_event.tool_name, "write");
2093 assert_eq!(
2094 policy_event.autonomy_mode,
2095 crate::workflow::AutonomyMode::default()
2096 );
2097 assert!(matches!(
2098 policy_event.decision,
2099 crate::reference_monitor::ToolPolicyDecision::Deny { .. }
2100 ));
2101
2102 let result = events
2103 .iter()
2104 .find_map(|event| match event {
2105 AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
2106 _ => None,
2107 })
2108 .expect("tool end event");
2109 assert!(result.is_error);
2110 assert_eq!(
2111 tool_result_text(&Message::ToolResult(result.clone())),
2112 Some("Tool 'write' is not available in reviewer mode")
2113 );
2114
2115 let checkpoint = events.iter().rev().find_map(|event| match event {
2116 AgentEvent::RecoveryCheckpoint { checkpoint } => checkpoint.error_class.as_deref(),
2117 _ => None,
2118 });
2119 assert_eq!(checkpoint, Some("mode_blocked"));
2120 }
2121
2122 #[tokio::test]
2123 async fn emits_turn_assessment_event_for_execution_blocker() {
2124 let provider = Arc::new(MockProvider::new(vec![
2125 tool_call_response(
2126 "call_check",
2127 "bash",
2128 serde_json::json!({"command": "cargo check -p definitely_missing_crate", "timeout": 1}),
2129 100,
2130 20,
2131 ),
2132 text_response("The check failed.", 120, 20),
2133 ]));
2134
2135 let model = test_model(provider);
2136 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2137 agent.mode = AgentMode::Full;
2138 agent.tools.register(Arc::new(crate::tools::bash::BashTool));
2139
2140 let events_task = tokio::spawn(collect_events(handle));
2141 agent.run("Run the check".to_string()).await.unwrap();
2142 drop(agent);
2143 let events = events_task.await.unwrap();
2144
2145 let assessment = events.iter().find_map(|event| match event {
2146 AgentEvent::TurnAssessment { assessment, .. } => Some(assessment),
2147 _ => None,
2148 });
2149
2150 let assessment = assessment.expect("turn assessment emitted");
2151 assert_eq!(
2152 assessment.runtime.execution_stop_reason.as_deref(),
2153 Some("execution_blocked")
2154 );
2155 assert_eq!(
2156 assessment.chosen_action,
2157 NextActionDebugView::Stop {
2158 reason: "execution_blocked".to_string(),
2159 }
2160 );
2161 }
2162
2163 #[tokio::test]
2164 async fn emits_turn_assessment_event_for_continue_recommendation() {
2165 let provider = Arc::new(MockProvider::new(vec![
2166 vec![
2167 StreamEvent::MessageStart {
2168 model: "test-model".to_string(),
2169 },
2170 StreamEvent::ToolCall {
2171 id: "call_1".to_string(),
2172 name: "mana".to_string(),
2173 arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2174 },
2175 StreamEvent::TextDelta {
2176 text: "Done. Updated mana and next step is ready to continue.".to_string(),
2177 },
2178 StreamEvent::MessageEnd {
2179 message: AssistantMessage {
2180 content: vec![
2181 ContentBlock::ToolCall {
2182 id: "call_1".to_string(),
2183 name: "mana".to_string(),
2184 arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2185 },
2186 ContentBlock::Text {
2187 text: "Done. Updated mana and next step is ready to continue."
2188 .to_string(),
2189 },
2190 ],
2191 usage: Some(Usage {
2192 input_tokens: 100,
2193 output_tokens: 20,
2194 cache_read_tokens: 0,
2195 cache_write_tokens: 0,
2196 }),
2197 stop_reason: LlmStopReason::ToolUse,
2198 timestamp: 1000,
2199 },
2200 },
2201 ],
2202 text_response("Stopped after visible mana turn.", 120, 25),
2203 ]));
2204
2205 let model = test_model(provider);
2206 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2207 agent.mode = AgentMode::Planner;
2208 agent.continue_policy = ContinuePolicy::Balanced;
2209 agent
2210 .tools
2211 .register(Arc::new(crate::tools::mana::ManaTool::default()));
2212
2213 let events_task = tokio::spawn(collect_events(handle));
2214 agent.run("Do the next thing".to_string()).await.unwrap();
2215 drop(agent);
2216 let events = events_task.await.unwrap();
2217
2218 let assessment = events.iter().find_map(|event| match event {
2219 AgentEvent::TurnAssessment { assessment, .. } => Some(assessment),
2220 _ => None,
2221 });
2222
2223 let assessment = assessment.expect("turn assessment emitted");
2224 let recommendation = assessment
2225 .continue_recommendation
2226 .as_ref()
2227 .expect("continue recommendation present");
2228 assert_eq!(recommendation.reason, "high_confidence_visible_next_step");
2229 assert!(matches!(
2230 assessment.chosen_action,
2231 NextActionDebugView::Continue { .. }
2232 ));
2233 }
2234
2235 #[test]
2236 fn post_turn_assessment_prefers_execution_blocker_over_completion() {
2237 let assessment = PostTurnAssessment {
2238 runtime: RuntimeEvidence {
2239 repeated_action: false,
2240 execution_stop_reason: Some(StopReason::ExecutionBlocked),
2241 work_completed: true,
2242 execution_debt: false,
2243 execution_evidence: false,
2244 planning_only_progress: false,
2245 },
2246 mana: ManaEvidence {
2247 stop_reason: Some(StopReason::DecompositionCompleted),
2248 },
2249 text_fallback: TextFallbackEvidence {
2250 planner_stop_reason: Some(StopReason::DecompositionCompleted),
2251 execution_stop_reason: Some(StopReason::WorkCompleted),
2252 },
2253 continue_recommendation: Some(ContinueRecommendation {
2254 prompt: "continue".to_string(),
2255 reason: ContinueReason::HighConfidenceVisibleNextStep,
2256 }),
2257 };
2258
2259 assert_eq!(
2260 assessment.into_next_action(),
2261 NextAction::Stop {
2262 reason: StopReason::ExecutionBlocked,
2263 }
2264 );
2265 }
2266
2267 #[test]
2268 fn post_turn_assessment_emits_continue_when_no_stop_reason_exists() {
2269 let assessment = PostTurnAssessment {
2270 runtime: RuntimeEvidence {
2271 repeated_action: false,
2272 execution_stop_reason: None,
2273 work_completed: false,
2274 execution_debt: false,
2275 execution_evidence: false,
2276 planning_only_progress: false,
2277 },
2278 mana: ManaEvidence { stop_reason: None },
2279 text_fallback: TextFallbackEvidence {
2280 planner_stop_reason: None,
2281 execution_stop_reason: None,
2282 },
2283 continue_recommendation: Some(ContinueRecommendation {
2284 prompt: "continue".to_string(),
2285 reason: ContinueReason::HighConfidenceVisibleNextStep,
2286 }),
2287 };
2288
2289 assert_eq!(
2290 assessment.into_next_action(),
2291 NextAction::Continue {
2292 prompt: "continue".to_string(),
2293 reason: ContinueReason::HighConfidenceVisibleNextStep,
2294 }
2295 );
2296 }
2297
2298 #[test]
2299 fn execution_debt_follow_up_is_preferred_before_stopping_for_planning_only_progress() {
2300 let assessment = PostTurnAssessment {
2301 runtime: RuntimeEvidence {
2302 repeated_action: false,
2303 execution_stop_reason: None,
2304 work_completed: false,
2305 execution_debt: true,
2306 execution_evidence: false,
2307 planning_only_progress: false,
2308 },
2309 mana: ManaEvidence { stop_reason: None },
2310 text_fallback: TextFallbackEvidence {
2311 planner_stop_reason: None,
2312 execution_stop_reason: None,
2313 },
2314 continue_recommendation: Some(ContinueRecommendation {
2315 prompt: execution_debt_follow_up_text().to_string(),
2316 reason: ContinueReason::ExecutionDebt,
2317 }),
2318 };
2319
2320 assert_eq!(
2321 assessment.into_next_action(),
2322 NextAction::Continue {
2323 prompt: execution_debt_follow_up_text().to_string(),
2324 reason: ContinueReason::ExecutionDebt,
2325 }
2326 );
2327 }
2328
2329 #[test]
2330 fn mana_planning_without_execution_creates_execution_debt_follow_up() {
2331 let result = imp_llm::ToolResultMessage {
2332 tool_call_id: "call_mana".to_string(),
2333 tool_name: "mana".to_string(),
2334 content: vec![ContentBlock::Text {
2335 text: "Created task".to_string(),
2336 }],
2337 is_error: false,
2338 details: serde_json::json!({ "action": "create" }),
2339 timestamp: 0,
2340 };
2341
2342 assert!(tool_results_indicate_execution_debt(
2343 std::slice::from_ref(&result),
2344 AgentMode::Full
2345 ));
2346 assert!(!tool_results_indicate_execution_evidence(
2347 std::slice::from_ref(&result),
2348 AgentMode::Full
2349 ));
2350 assert!(should_queue_execution_debt_follow_up(
2351 true, false, false, true
2352 ));
2353 }
2354
2355 #[test]
2356 fn mutating_tool_call_satisfies_execution_evidence() {
2357 let result = imp_llm::ToolResultMessage {
2358 tool_call_id: "call_edit".to_string(),
2359 tool_name: "edit".to_string(),
2360 content: vec![ContentBlock::Text {
2361 text: "diff".to_string(),
2362 }],
2363 is_error: false,
2364 details: serde_json::json!({ "path": "src/lib.rs" }),
2365 timestamp: 0,
2366 };
2367
2368 assert!(tool_results_indicate_execution_evidence(
2369 &[result],
2370 AgentMode::Full
2371 ));
2372 assert!(!should_queue_execution_debt_follow_up(
2373 true, true, false, true
2374 ));
2375 }
2376
2377 #[test]
2378 fn tool_results_indicate_execution_blocker_detects_failed_verify() {
2379 let result = imp_llm::ToolResultMessage {
2380 tool_call_id: "call_verify".to_string(),
2381 tool_name: "mana".to_string(),
2382 content: vec![ContentBlock::Text {
2383 text: "Verify failed".to_string(),
2384 }],
2385 is_error: true,
2386 details: serde_json::json!({
2387 "action": "verify",
2388 "passed": false,
2389 "exit_code": 1
2390 }),
2391 timestamp: 0,
2392 };
2393
2394 assert_eq!(
2395 tool_results_indicate_execution_blocker(&[result], AgentMode::Full),
2396 Some(StopReason::ExecutionBlocked)
2397 );
2398 }
2399
2400 #[test]
2401 fn tool_results_indicate_execution_blocker_detects_ask_tool_as_user_blocker() {
2402 let result = imp_llm::ToolResultMessage {
2403 tool_call_id: "call_ask".to_string(),
2404 tool_name: "ask_user".to_string(),
2405 content: vec![ContentBlock::Text {
2406 text: "blue".to_string(),
2407 }],
2408 is_error: false,
2409 details: serde_json::Value::Null,
2410 timestamp: 0,
2411 };
2412
2413 assert_eq!(
2414 tool_results_indicate_execution_blocker(&[result], AgentMode::Full),
2415 Some(StopReason::UserBlocker)
2416 );
2417 }
2418
2419 #[test]
2420 fn tool_results_indicate_work_completed_detects_edit_plus_successful_check() {
2421 let edit_result = imp_llm::ToolResultMessage {
2422 tool_call_id: "call_edit".to_string(),
2423 tool_name: "edit".to_string(),
2424 content: vec![ContentBlock::Text {
2425 text: "diff output".to_string(),
2426 }],
2427 is_error: false,
2428 details: serde_json::json!({
2429 "path": "/tmp/file.rs"
2430 }),
2431 timestamp: 0,
2432 };
2433 let check_result = imp_llm::ToolResultMessage {
2434 tool_call_id: "call_check".to_string(),
2435 tool_name: "bash".to_string(),
2436 content: vec![ContentBlock::Text {
2437 text: "ok".to_string(),
2438 }],
2439 is_error: false,
2440 details: serde_json::json!({
2441 "exit_code": 0,
2442 "command": "cargo check -p imp-core"
2443 }),
2444 timestamp: 0,
2445 };
2446
2447 assert!(tool_results_indicate_work_completed(
2448 &[edit_result, check_result],
2449 AgentMode::Full
2450 ));
2451 }
2452
2453 #[test]
2454 fn tool_results_indicate_work_completed_detects_closed_unit_details() {
2455 let result = imp_llm::ToolResultMessage {
2456 tool_call_id: "call_close".to_string(),
2457 tool_name: "mana".to_string(),
2458 content: vec![ContentBlock::Text {
2459 text: "Closed unit 1".to_string(),
2460 }],
2461 is_error: false,
2462 details: serde_json::json!({
2463 "action": "close",
2464 "unit": {
2465 "id": "1",
2466 "title": "Test unit",
2467 "status": "closed"
2468 }
2469 }),
2470 timestamp: 0,
2471 };
2472
2473 assert!(tool_results_indicate_work_completed(
2474 &[result],
2475 AgentMode::Full
2476 ));
2477 }
2478
2479 #[test]
2480 fn mana_review_needs_decision_maps_to_user_blocker() {
2481 let review = TurnManaReview {
2482 turn_index: 0,
2483 state: ManaReviewState::NeedsDecision,
2484 scope: crate::mana_review::ManaReviewScope::default(),
2485 anchor_unit: None,
2486 touched_units: Vec::new(),
2487 proposed_children: Vec::new(),
2488 material_field_changes: Vec::new(),
2489 notes_appended: Vec::new(),
2490 decision_events: Vec::new(),
2491 unresolved_consequential_choices: Vec::new(),
2492 next_question: Some("Which path should we take?".to_string()),
2493 };
2494
2495 assert_eq!(
2496 mana_review_stop_reason(&review, AgentMode::Planner),
2497 Some(StopReason::UserBlocker)
2498 );
2499 }
2500
2501 #[test]
2502 fn mana_review_changed_with_planner_children_maps_to_decomposition_completed() {
2503 let review = TurnManaReview {
2504 turn_index: 0,
2505 state: ManaReviewState::Changed,
2506 scope: crate::mana_review::ManaReviewScope::default(),
2507 anchor_unit: None,
2508 touched_units: Vec::new(),
2509 proposed_children: vec![crate::mana_review::TurnManaProposedChild {
2510 unit: crate::mana_review::ManaUnitRef::new(
2511 "28.6.1",
2512 "child",
2513 Some("job".to_string()),
2514 ),
2515 parent: crate::mana_review::ManaUnitRef::new(
2516 "28.6",
2517 "parent",
2518 Some("epic".to_string()),
2519 ),
2520 child_kind: crate::mana_review::ManaReviewUnitKind::Job,
2521 child_origin: crate::mana_review::ManaUnitOrigin::CreatedInTurn,
2522 }],
2523 material_field_changes: Vec::new(),
2524 notes_appended: Vec::new(),
2525 decision_events: Vec::new(),
2526 unresolved_consequential_choices: Vec::new(),
2527 next_question: None,
2528 };
2529
2530 assert_eq!(
2531 mana_review_stop_reason(&review, AgentMode::Planner),
2532 Some(StopReason::DecompositionCompleted)
2533 );
2534 }
2535
2536 #[tokio::test]
2537 async fn planner_stops_after_decomposition_is_externalized() {
2538 let provider = Arc::new(MockProvider::new(vec![text_response(
2539 "Externalized into mana. Plan is complete and ready for handoff.",
2540 100,
2541 20,
2542 )]));
2543
2544 let model = test_model(provider);
2545 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2546 agent.mode = AgentMode::Planner;
2547 agent.has_mana_skill = true;
2548
2549 agent.run("Plan the rollout".to_string()).await.unwrap();
2550
2551 let user_texts: Vec<String> = agent
2552 .messages
2553 .iter()
2554 .filter_map(|message| match message {
2555 Message::User(user) => user.content.iter().find_map(|block| match block {
2556 ContentBlock::Text { text } => Some(text.clone()),
2557 _ => None,
2558 }),
2559 _ => None,
2560 })
2561 .collect();
2562
2563 assert_eq!(user_texts, vec!["Plan the rollout".to_string()]);
2564 }
2565
2566 #[tokio::test]
2567 async fn planner_stops_for_user_blocker_instead_of_auto_follow_up() {
2568 let provider = Arc::new(MockProvider::new(vec![text_response(
2569 "Blocked: I need your input on which auth direction we should choose before continuing.",
2570 100,
2571 20,
2572 )]));
2573
2574 let model = test_model(provider);
2575 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2576 agent.mode = AgentMode::Planner;
2577 agent.has_mana_skill = true;
2578
2579 agent.run("Plan the rollout".to_string()).await.unwrap();
2580
2581 let user_texts: Vec<String> = agent
2582 .messages
2583 .iter()
2584 .filter_map(|message| match message {
2585 Message::User(user) => user.content.iter().find_map(|block| match block {
2586 ContentBlock::Text { text } => Some(text.clone()),
2587 _ => None,
2588 }),
2589 _ => None,
2590 })
2591 .collect();
2592
2593 assert_eq!(user_texts, vec!["Plan the rollout".to_string()]);
2594 }
2595
2596 #[tokio::test]
2597 async fn agent_queues_confidence_continue_follow_up_after_visible_mana_turn() {
2598 let provider = Arc::new(MockProvider::new(vec![
2599 vec![
2600 StreamEvent::MessageStart {
2601 model: "test-model".to_string(),
2602 },
2603 StreamEvent::ToolCall {
2604 id: "call_1".to_string(),
2605 name: "mana".to_string(),
2606 arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2607 },
2608 StreamEvent::TextDelta {
2609 text: "Done. Updated mana and next step is ready to continue.".to_string(),
2610 },
2611 StreamEvent::MessageEnd {
2612 message: AssistantMessage {
2613 content: vec![
2614 ContentBlock::ToolCall {
2615 id: "call_1".to_string(),
2616 name: "mana".to_string(),
2617 arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2618 },
2619 ContentBlock::Text {
2620 text: "Done. Updated mana and next step is ready to continue."
2621 .to_string(),
2622 },
2623 ],
2624 usage: Some(Usage {
2625 input_tokens: 100,
2626 output_tokens: 20,
2627 cache_read_tokens: 0,
2628 cache_write_tokens: 0,
2629 }),
2630 stop_reason: LlmStopReason::ToolUse,
2631 timestamp: 1000,
2632 },
2633 },
2634 ],
2635 text_response("Continuing.", 120, 25),
2636 ]));
2637
2638 let model = test_model(provider);
2639 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2640 agent.mode = AgentMode::Planner;
2641 agent.continue_policy = ContinuePolicy::Balanced;
2642 agent
2643 .tools
2644 .register(Arc::new(crate::tools::mana::ManaTool::default()));
2645
2646 agent.run("Do the next thing".to_string()).await.unwrap();
2647
2648 let user_texts: Vec<String> = agent
2649 .messages
2650 .iter()
2651 .filter_map(|message| match message {
2652 Message::User(user) => user.content.iter().find_map(|block| match block {
2653 ContentBlock::Text { text } => Some(text.clone()),
2654 _ => None,
2655 }),
2656 _ => None,
2657 })
2658 .collect();
2659
2660 assert_eq!(user_texts.len(), 2);
2661 assert!(user_texts[1].contains("Confidence is high"));
2662 }
2663
2664 #[tokio::test]
2665 async fn agent_does_not_queue_confidence_continue_when_policy_disabled() {
2666 let provider = Arc::new(MockProvider::new(vec![
2667 vec![
2668 StreamEvent::MessageStart {
2669 model: "test-model".to_string(),
2670 },
2671 StreamEvent::ToolCall {
2672 id: "call_1".to_string(),
2673 name: "mana".to_string(),
2674 arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2675 },
2676 StreamEvent::TextDelta {
2677 text: "Done. Updated mana and next step is ready to continue.".to_string(),
2678 },
2679 StreamEvent::MessageEnd {
2680 message: AssistantMessage {
2681 content: vec![
2682 ContentBlock::ToolCall {
2683 id: "call_1".to_string(),
2684 name: "mana".to_string(),
2685 arguments: serde_json::json!({"action": "update", "id": "1", "notes": "done"}),
2686 },
2687 ContentBlock::Text {
2688 text: "Done. Updated mana and next step is ready to continue."
2689 .to_string(),
2690 },
2691 ],
2692 usage: Some(Usage {
2693 input_tokens: 100,
2694 output_tokens: 20,
2695 cache_read_tokens: 0,
2696 cache_write_tokens: 0,
2697 }),
2698 stop_reason: LlmStopReason::ToolUse,
2699 timestamp: 1000,
2700 },
2701 },
2702 ],
2703 text_response("Stopped after visible mana turn.", 120, 25),
2704 ]));
2705
2706 let model = test_model(provider);
2707 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2708 agent.mode = AgentMode::Planner;
2709 agent.continue_policy = ContinuePolicy::Disabled;
2710 agent
2711 .tools
2712 .register(Arc::new(crate::tools::mana::ManaTool::default()));
2713
2714 agent.run("Do the next thing".to_string()).await.unwrap();
2715
2716 let user_texts: Vec<String> = agent
2717 .messages
2718 .iter()
2719 .filter_map(|message| match message {
2720 Message::User(user) => user.content.iter().find_map(|block| match block {
2721 ContentBlock::Text { text } => Some(text.clone()),
2722 _ => None,
2723 }),
2724 _ => None,
2725 })
2726 .collect();
2727
2728 assert_eq!(user_texts, vec!["Do the next thing".to_string()]);
2729 }
2730
2731 #[tokio::test]
2732 async fn agent_does_not_queue_externalization_follow_up_after_mana_tool_turn() {
2733 let provider = Arc::new(MockProvider::new(vec![
2734 tool_call_response(
2735 "call_1",
2736 "mana",
2737 serde_json::json!({"action": "status"}),
2738 100,
2739 20,
2740 ),
2741 text_response("Done after mana", 120, 25),
2742 ]));
2743
2744 let model = test_model(provider);
2745 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2746 agent.has_mana_skill = true;
2747 agent.mode = AgentMode::Planner;
2748 agent
2749 .tools
2750 .register(Arc::new(crate::tools::mana::ManaTool::default()));
2751
2752 agent.run("Plan the rollout".to_string()).await.unwrap();
2753
2754 let user_texts: Vec<String> = agent
2755 .messages
2756 .iter()
2757 .filter_map(|message| match message {
2758 Message::User(user) => user.content.iter().find_map(|block| match block {
2759 ContentBlock::Text { text } => Some(text.clone()),
2760 _ => None,
2761 }),
2762 _ => None,
2763 })
2764 .collect();
2765
2766 assert_eq!(user_texts, vec!["Plan the rollout".to_string()]);
2767 }
2768
2769 #[tokio::test]
2770 async fn agent_queues_mana_basics_hint_for_worker_mana_requests() {
2771 let provider = Arc::new(MockProvider::new(vec![
2772 text_response("Loaded basics skill", 100, 20),
2773 text_response("Done", 120, 25),
2774 ]));
2775
2776 let model = test_model(provider);
2777 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2778 agent.has_mana_basics_skill = true;
2779 agent.mode = AgentMode::Worker;
2780
2781 agent
2782 .run("Check mana status and logs for my unit".to_string())
2783 .await
2784 .unwrap();
2785
2786 let user_texts: Vec<String> = agent
2787 .messages
2788 .iter()
2789 .filter_map(|message| match message {
2790 Message::User(user) => user.content.iter().find_map(|block| match block {
2791 ContentBlock::Text { text } => Some(text.clone()),
2792 _ => None,
2793 }),
2794 _ => None,
2795 })
2796 .collect();
2797
2798 assert_eq!(user_texts.len(), 1);
2799 assert_eq!(user_texts[0], "Check mana status and logs for my unit");
2800 }
2801
2802 #[tokio::test]
2803 async fn agent_does_not_queue_mana_hint_without_matching_signal() {
2804 let provider = Arc::new(MockProvider::new(vec![text_response("No nudge", 100, 20)]));
2805
2806 let model = test_model(provider);
2807 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2808 agent.has_mana_skill = true;
2809 agent.mode = AgentMode::Planner;
2810
2811 agent
2812 .run("Explain how this parser works".to_string())
2813 .await
2814 .unwrap();
2815
2816 let user_texts: Vec<String> = agent
2817 .messages
2818 .iter()
2819 .filter_map(|message| match message {
2820 Message::User(user) => user.content.iter().find_map(|block| match block {
2821 ContentBlock::Text { text } => Some(text.clone()),
2822 _ => None,
2823 }),
2824 _ => None,
2825 })
2826 .collect();
2827
2828 assert_eq!(
2829 user_texts,
2830 vec!["Explain how this parser works".to_string()]
2831 );
2832 }
2833
2834 #[tokio::test]
2835 async fn agent_does_not_queue_mana_basics_hint_when_no_tools_available() {
2836 let provider = Arc::new(MockProvider::new(vec![text_response(
2837 "Loaded basics skill",
2838 100,
2839 20,
2840 )]));
2841
2842 let model = test_model(provider);
2843 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
2844 agent.has_mana_basics_skill = true;
2845 agent.mode = AgentMode::Worker;
2846 agent.tools.retain(|_| false);
2847
2848 agent
2849 .run("Check mana status and logs for my unit".to_string())
2850 .await
2851 .unwrap();
2852
2853 let user_texts: Vec<String> = agent
2854 .messages
2855 .iter()
2856 .filter_map(|message| match message {
2857 Message::User(user) => user.content.iter().find_map(|block| match block {
2858 ContentBlock::Text { text } => Some(text.clone()),
2859 _ => None,
2860 }),
2861 _ => None,
2862 })
2863 .collect();
2864
2865 assert_eq!(
2866 user_texts,
2867 vec!["Check mana status and logs for my unit".to_string()]
2868 );
2869 }
2870
2871 #[tokio::test]
2872 async fn single_text_turn_with_no_tools_exits_cleanly() {
2873 let provider = Arc::new(MockProvider::new(vec![text_response("SMOKE_OK", 50, 10)]));
2874 let model = test_model(provider);
2875 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2876 agent.mode = AgentMode::Worker;
2877 agent.has_mana_basics_skill = true;
2878 agent.tools.retain(|_| false);
2879
2880 let events_task = tokio::spawn(collect_events(handle));
2881 let result = agent.run("Check mana status and finish".to_string()).await;
2882 drop(agent);
2883
2884 assert!(result.is_ok());
2885
2886 let events = events_task.await.unwrap();
2887 assert!(events
2888 .iter()
2889 .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
2890 assert!(!events.iter().any(|e| matches!(
2891 e,
2892 AgentEvent::Error { error } if error.contains("Max turns exceeded")
2893 )));
2894 }
2895
2896 #[tokio::test]
2897 async fn agent_emits_timing_events_in_order() {
2898 let provider = Arc::new(MockProvider::new(vec![text_response("timed", 10, 5)]));
2899 let model = test_model(provider);
2900 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2901
2902 let events_task = tokio::spawn(collect_events(handle));
2903 agent.run("time this".to_string()).await.unwrap();
2904 drop(agent);
2905
2906 let events = events_task.await.unwrap();
2907 let timings: Vec<_> = events
2908 .iter()
2909 .filter_map(|event| match event {
2910 AgentEvent::Timing { timing } => Some(timing.clone()),
2911 _ => None,
2912 })
2913 .collect();
2914
2915 assert!(timings.len() >= 7);
2916 assert_eq!(timings[0].stage, TimingStage::ContextAssemblyStart);
2917 assert_eq!(timings[1].stage, TimingStage::ContextAssemblyEnd);
2918 assert_eq!(timings[2].stage, TimingStage::LlmRequestStart);
2919 assert_eq!(timings[3].stage, TimingStage::FirstStreamEvent);
2920 assert_eq!(timings[4].stage, TimingStage::FirstTextDelta);
2921 assert!(timings
2922 .iter()
2923 .any(|timing| timing.stage == TimingStage::MessageEnd));
2924 assert!(timings
2925 .iter()
2926 .any(|timing| timing.stage == TimingStage::PostTurnAssessmentEnd));
2927
2928 for timing in timings {
2929 assert_eq!(timing.turn, 0);
2930 if let Some(since_llm_request_start_ms) = timing.since_llm_request_start_ms {
2931 assert!(timing.since_turn_start_ms >= since_llm_request_start_ms);
2932 }
2933 }
2934 }
2935
2936 #[tokio::test]
2937 async fn agent_streams_message_delta_before_message_end() {
2938 let provider = Arc::new(MockProvider::new_results(vec![vec![
2939 Ok(StreamEvent::MessageStart {
2940 model: "test-model".to_string(),
2941 }),
2942 Ok(StreamEvent::TextDelta {
2943 text: "streaming".to_string(),
2944 }),
2945 Ok(StreamEvent::MessageEnd {
2946 message: AssistantMessage {
2947 content: vec![ContentBlock::Text {
2948 text: "streaming".to_string(),
2949 }],
2950 usage: Some(Usage {
2951 input_tokens: 10,
2952 output_tokens: 5,
2953 cache_read_tokens: 0,
2954 cache_write_tokens: 0,
2955 }),
2956 stop_reason: LlmStopReason::EndTurn,
2957 timestamp: 1000,
2958 },
2959 }),
2960 ]]));
2961
2962 let model = test_model(provider);
2963 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
2964
2965 let events_task = tokio::spawn(collect_events(handle));
2966 agent.run("Say hi".to_string()).await.unwrap();
2967 drop(agent);
2968
2969 let events = events_task.await.unwrap();
2970 let text_delta_idx = events.iter().position(|event| {
2971 matches!(
2972 event,
2973 AgentEvent::MessageDelta {
2974 delta: StreamEvent::TextDelta { text }
2975 } if text == "streaming"
2976 )
2977 });
2978 let turn_end_idx = events
2979 .iter()
2980 .position(|event| matches!(event, AgentEvent::TurnEnd { .. }));
2981
2982 assert!(text_delta_idx.is_some());
2983 assert!(turn_end_idx.is_some());
2984 assert!(text_delta_idx.unwrap() < turn_end_idx.unwrap());
2985 }
2986
2987 #[tokio::test]
2988 async fn agent_retries_before_first_meaningful_event_but_not_after() {
2989 let provider = Arc::new(MockProvider::new_results(vec![
2990 vec![
2991 Ok(StreamEvent::MessageStart {
2992 model: "test-model".to_string(),
2993 }),
2994 Err(imp_llm::Error::Stream("startup failure".into())),
2995 ],
2996 vec![
2997 Ok(StreamEvent::MessageStart {
2998 model: "test-model".to_string(),
2999 }),
3000 Ok(StreamEvent::TextDelta {
3001 text: "recovered".to_string(),
3002 }),
3003 Ok(StreamEvent::MessageEnd {
3004 message: AssistantMessage {
3005 content: vec![ContentBlock::Text {
3006 text: "recovered".to_string(),
3007 }],
3008 usage: Some(Usage {
3009 input_tokens: 10,
3010 output_tokens: 5,
3011 cache_read_tokens: 0,
3012 cache_write_tokens: 0,
3013 }),
3014 stop_reason: LlmStopReason::EndTurn,
3015 timestamp: 1000,
3016 },
3017 }),
3018 ],
3019 ]));
3020
3021 let model = test_model(provider);
3022 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3023
3024 let events_task = tokio::spawn(collect_events(handle));
3025 agent.run("Recover".to_string()).await.unwrap();
3026 drop(agent);
3027
3028 let events = events_task.await.unwrap();
3029 let text_delta = events.iter().position(|e| {
3030 matches!(
3031 e,
3032 AgentEvent::MessageDelta {
3033 delta: StreamEvent::TextDelta { text }
3034 } if text == "recovered"
3035 )
3036 });
3037 let turn_end = events
3038 .iter()
3039 .position(|e| matches!(e, AgentEvent::TurnEnd { .. }));
3040
3041 assert!(text_delta.is_some());
3042 assert!(turn_end.is_some());
3043 assert!(text_delta.unwrap() < turn_end.unwrap());
3044 }
3045
3046 #[tokio::test]
3047 async fn agent_surfaces_error_after_partial_stream_without_retrying() {
3048 let provider = Arc::new(MockProvider::new_results(vec![vec![
3049 Ok(StreamEvent::TextDelta {
3050 text: "partial".to_string(),
3051 }),
3052 Err(imp_llm::Error::Stream("mid-stream failure".into())),
3053 ]]));
3054
3055 let model = test_model(provider);
3056 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3057
3058 let events_task = tokio::spawn(collect_events(handle));
3059 let result = agent.run("Fail midway".to_string()).await;
3060 drop(agent);
3061
3062 assert!(result.is_err());
3063
3064 let events = events_task.await.unwrap();
3065 let text_delta = events.iter().position(|e| {
3066 matches!(
3067 e,
3068 AgentEvent::MessageDelta {
3069 delta: StreamEvent::TextDelta { text }
3070 } if text == "partial"
3071 )
3072 });
3073 let error_idx = events.iter().position(|e| {
3074 matches!(
3075 e,
3076 AgentEvent::Error { error }
3077 if error.contains("Provider stream failed after partial output")
3078 && error.contains("mid-stream failure")
3079 )
3080 });
3081
3082 assert!(text_delta.is_some());
3083 assert!(error_idx.is_some());
3084 assert!(text_delta.unwrap() < error_idx.unwrap());
3085 }
3086
3087 #[tokio::test]
3088 async fn agent_treats_silent_eof_without_message_end_as_error() {
3089 let provider = Arc::new(MockProvider::new_results(vec![vec![Ok(
3090 StreamEvent::TextDelta {
3091 text: "partial".to_string(),
3092 },
3093 )]]));
3094
3095 let model = test_model(provider);
3096 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3097
3098 let events_task = tokio::spawn(collect_events(handle));
3099 let result = agent.run("Fail with silent eof".to_string()).await;
3100 drop(agent);
3101
3102 assert!(result.is_err());
3103
3104 let events = events_task.await.unwrap();
3105 let text_delta = events.iter().position(|e| {
3106 matches!(
3107 e,
3108 AgentEvent::MessageDelta {
3109 delta: StreamEvent::TextDelta { text }
3110 } if text == "partial"
3111 )
3112 });
3113 let error_idx = events.iter().position(|e| {
3114 matches!(
3115 e,
3116 AgentEvent::Error { error }
3117 if error.contains("missing terminal completion event")
3118 )
3119 });
3120 let turn_end_idx = events
3121 .iter()
3122 .position(|e| matches!(e, AgentEvent::TurnEnd { .. }));
3123
3124 assert!(text_delta.is_some());
3125 assert!(error_idx.is_some());
3126 assert!(turn_end_idx.is_none());
3127 assert!(text_delta.unwrap() < error_idx.unwrap());
3128 }
3129
3130 #[tokio::test]
3133 async fn agent_simple_text_response() {
3134 let provider = Arc::new(MockProvider::new(vec![text_response(
3135 "Hello, world!",
3136 100,
3137 20,
3138 )]));
3139
3140 let model = test_model(provider);
3141 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3142
3143 let events_task = tokio::spawn(collect_events(handle));
3144 agent.run("Say hello".to_string()).await.unwrap();
3145 drop(agent); let events = events_task.await.unwrap();
3148
3149 assert!(matches!(events[0], AgentEvent::AgentStart { .. }));
3151
3152 let turn_start = events
3153 .iter()
3154 .position(|e| matches!(e, AgentEvent::TurnStart { index: 0 }));
3155 assert!(turn_start.is_some());
3156
3157 let turn_end = events
3158 .iter()
3159 .position(|e| matches!(e, AgentEvent::TurnEnd { index: 0, .. }));
3160 assert!(turn_end.is_some());
3161 assert!(turn_end.unwrap() > turn_start.unwrap());
3162
3163 let agent_end = events
3164 .iter()
3165 .position(|e| matches!(e, AgentEvent::AgentEnd { .. }));
3166 assert!(agent_end.is_some());
3167 assert!(agent_end.unwrap() > turn_end.unwrap());
3168
3169 if let AgentEvent::AgentEnd { usage, cost, .. } = &events[agent_end.unwrap()] {
3171 assert_eq!(usage.input_tokens, 100);
3172 assert_eq!(usage.output_tokens, 20);
3173 assert!(cost.total > 0.0);
3174 } else {
3175 panic!("Expected AgentEnd");
3176 }
3177
3178 let turn_starts: Vec<_> = events
3180 .iter()
3181 .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3182 .collect();
3183 assert_eq!(turn_starts.len(), 1);
3184 }
3185
3186 #[tokio::test]
3189 async fn agent_single_tool_call() {
3190 let provider = Arc::new(MockProvider::new(vec![
3191 tool_call_response(
3193 "call_1",
3194 "echo",
3195 serde_json::json!({"text": "hello"}),
3196 100,
3197 30,
3198 ),
3199 text_response("The echo said: hello", 200, 25),
3201 ]));
3202
3203 let model = test_model(provider);
3204 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3205 agent.tools.register(Arc::new(EchoTool));
3206
3207 let events_task = tokio::spawn(collect_events(handle));
3208 agent.run("Echo hello".to_string()).await.unwrap();
3209 drop(agent);
3210
3211 let events = events_task.await.unwrap();
3212
3213 let turn_starts: Vec<_> = events
3215 .iter()
3216 .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3217 .collect();
3218 assert_eq!(turn_starts.len(), 2);
3219
3220 let tool_starts: Vec<_> = events
3222 .iter()
3223 .filter(|e| matches!(e, AgentEvent::ToolExecutionStart { .. }))
3224 .collect();
3225 assert_eq!(tool_starts.len(), 1);
3226
3227 let tool_ends: Vec<_> = events
3228 .iter()
3229 .filter(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }))
3230 .collect();
3231 assert_eq!(tool_ends.len(), 1);
3232
3233 if let Some(AgentEvent::AgentEnd { usage, .. }) = events
3235 .iter()
3236 .find(|e| matches!(e, AgentEvent::AgentEnd { .. }))
3237 {
3238 assert_eq!(usage.input_tokens, 300);
3239 assert_eq!(usage.output_tokens, 55);
3240 } else {
3241 panic!("Expected AgentEnd");
3242 }
3243 }
3244
3245 #[tokio::test]
3248 async fn agent_multiple_tool_calls() {
3249 let provider = Arc::new(MockProvider::new(vec![
3250 multi_tool_call_response(
3252 &[
3253 ("call_1", "echo", serde_json::json!({"text": "first"})),
3254 ("call_2", "echo", serde_json::json!({"text": "second"})),
3255 ],
3256 100,
3257 40,
3258 ),
3259 tool_call_response(
3261 "call_3",
3262 "echo",
3263 serde_json::json!({"text": "third"}),
3264 200,
3265 20,
3266 ),
3267 text_response("All done!", 300, 10),
3269 ]));
3270
3271 let model = test_model(provider);
3272 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3273 agent.tools.register(Arc::new(EchoTool));
3274
3275 let events_task = tokio::spawn(collect_events(handle));
3276 agent.run("Echo three things".to_string()).await.unwrap();
3277 drop(agent);
3278
3279 let events = events_task.await.unwrap();
3280
3281 let turn_starts: Vec<_> = events
3283 .iter()
3284 .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3285 .collect();
3286 assert_eq!(turn_starts.len(), 3);
3287
3288 let tool_starts: Vec<_> = events
3290 .iter()
3291 .filter(|e| matches!(e, AgentEvent::ToolExecutionStart { .. }))
3292 .collect();
3293 assert_eq!(tool_starts.len(), 3);
3294
3295 if let Some(AgentEvent::AgentEnd { usage, .. }) = events
3297 .iter()
3298 .find(|e| matches!(e, AgentEvent::AgentEnd { .. }))
3299 {
3300 assert_eq!(usage.input_tokens, 600);
3301 assert_eq!(usage.output_tokens, 70);
3302 } else {
3303 panic!("Expected AgentEnd");
3304 }
3305 }
3306
3307 #[tokio::test]
3310 async fn execution_stops_after_failed_verify_tool_result_without_blocked_text() {
3311 let provider = Arc::new(MockProvider::new(vec![
3312 tool_call_response(
3313 "call_verify",
3314 "mana",
3315 serde_json::json!({"action": "verify", "id": "1"}),
3316 100,
3317 20,
3318 ),
3319 text_response("Verify failed.", 120, 20),
3320 ]));
3321
3322 let model = test_model(provider);
3323 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3324 agent.mode = AgentMode::Full;
3325 agent
3326 .tools
3327 .register(Arc::new(crate::tools::mana::ManaTool::default()));
3328
3329 agent.run("Verify the unit".to_string()).await.unwrap();
3330
3331 let user_texts: Vec<String> = agent
3332 .messages
3333 .iter()
3334 .filter_map(|message| match message {
3335 Message::User(user) => user.content.iter().find_map(|block| match block {
3336 ContentBlock::Text { text } => Some(text.clone()),
3337 _ => None,
3338 }),
3339 _ => None,
3340 })
3341 .collect();
3342
3343 assert_eq!(user_texts, vec!["Verify the unit".to_string()]);
3344 }
3345
3346 #[tokio::test]
3347 async fn execution_stops_after_mana_close_tool_result_without_done_text() {
3348 let provider = Arc::new(MockProvider::new(vec![
3349 tool_call_response(
3350 "call_close",
3351 "mana",
3352 serde_json::json!({"action": "close", "id": "1"}),
3353 100,
3354 20,
3355 ),
3356 text_response("Unit closed.", 120, 20),
3357 ]));
3358
3359 let model = test_model(provider);
3360 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3361 agent.mode = AgentMode::Full;
3362 agent
3363 .tools
3364 .register(Arc::new(crate::tools::mana::ManaTool::default()));
3365
3366 agent.run("Close the unit".to_string()).await.unwrap();
3367
3368 let user_texts: Vec<String> = agent
3369 .messages
3370 .iter()
3371 .filter_map(|message| match message {
3372 Message::User(user) => user.content.iter().find_map(|block| match block {
3373 ContentBlock::Text { text } => Some(text.clone()),
3374 _ => None,
3375 }),
3376 _ => None,
3377 })
3378 .collect();
3379
3380 assert_eq!(user_texts, vec!["Close the unit".to_string()]);
3381 }
3382
3383 #[tokio::test]
3384 async fn execution_stops_after_work_completed_text() {
3385 let provider = Arc::new(MockProvider::new(vec![text_response(
3386 "All done! Implemented the change and finished the task.",
3387 100,
3388 20,
3389 )]));
3390
3391 let model = test_model(provider);
3392 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3393 agent.mode = AgentMode::Full;
3394
3395 agent.run("Implement the change".to_string()).await.unwrap();
3396
3397 let user_texts: Vec<String> = agent
3398 .messages
3399 .iter()
3400 .filter_map(|message| match message {
3401 Message::User(user) => user.content.iter().find_map(|block| match block {
3402 ContentBlock::Text { text } => Some(text.clone()),
3403 _ => None,
3404 }),
3405 _ => None,
3406 })
3407 .collect();
3408
3409 assert_eq!(user_texts, vec!["Implement the change".to_string()]);
3410 }
3411
3412 #[tokio::test]
3413 async fn execution_stops_for_user_blocker_text() {
3414 let provider = Arc::new(MockProvider::new(vec![text_response(
3415 "Blocked: I need your input on which path to take before continuing.",
3416 100,
3417 20,
3418 )]));
3419
3420 let model = test_model(provider);
3421 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3422 agent.mode = AgentMode::Full;
3423
3424 agent.run("Implement the change".to_string()).await.unwrap();
3425
3426 let user_texts: Vec<String> = agent
3427 .messages
3428 .iter()
3429 .filter_map(|message| match message {
3430 Message::User(user) => user.content.iter().find_map(|block| match block {
3431 ContentBlock::Text { text } => Some(text.clone()),
3432 _ => None,
3433 }),
3434 _ => None,
3435 })
3436 .collect();
3437
3438 assert_eq!(user_texts, vec!["Implement the change".to_string()]);
3439 }
3440
3441 #[tokio::test]
3442 async fn agent_follow_up_runs_after_current_work_finishes() {
3443 let provider = Arc::new(MockProvider::new(vec![
3444 tool_call_response(
3445 "call_1",
3446 "echo",
3447 serde_json::json!({"text": "hello"}),
3448 100,
3449 20,
3450 ),
3451 text_response("Handled follow-up", 120, 25),
3452 ]));
3453
3454 let model = test_model(provider);
3455 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3456 agent.tools.register(Arc::new(EchoTool));
3457
3458 handle
3459 .command_tx
3460 .send(AgentCommand::FollowUp("What next?".into()))
3461 .await
3462 .unwrap();
3463
3464 let events_task = tokio::spawn(collect_events(handle));
3465 agent.run("Do the first thing".to_string()).await.unwrap();
3466 drop(agent);
3467
3468 let events = events_task.await.unwrap();
3469 let turn_starts: Vec<_> = events
3470 .iter()
3471 .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3472 .collect();
3473 assert_eq!(turn_starts.len(), 2);
3474 }
3475
3476 #[tokio::test]
3477 async fn agent_follow_up_preserves_order_with_multiple_messages() {
3478 let provider = Arc::new(MockProvider::new(vec![
3479 tool_call_response(
3480 "call_1",
3481 "echo",
3482 serde_json::json!({"text": "hello"}),
3483 100,
3484 20,
3485 ),
3486 text_response("First follow-up handled", 120, 25),
3487 text_response("Second follow-up handled", 130, 30),
3488 ]));
3489
3490 let model = test_model(provider);
3491 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3492 agent.tools.register(Arc::new(EchoTool));
3493
3494 handle
3495 .command_tx
3496 .send(AgentCommand::FollowUp("follow up one".into()))
3497 .await
3498 .unwrap();
3499 handle
3500 .command_tx
3501 .send(AgentCommand::FollowUp("follow up two".into()))
3502 .await
3503 .unwrap();
3504
3505 agent.run("Do the first thing".to_string()).await.unwrap();
3506
3507 let user_texts: Vec<String> = agent
3508 .messages
3509 .iter()
3510 .filter_map(|message| match message {
3511 Message::User(user) => user.content.iter().find_map(|block| match block {
3512 ContentBlock::Text { text } => Some(text.clone()),
3513 _ => None,
3514 }),
3515 _ => None,
3516 })
3517 .collect();
3518
3519 assert_eq!(
3520 user_texts,
3521 vec![
3522 "Do the first thing".to_string(),
3523 "follow up one".to_string(),
3524 "follow up two".to_string()
3525 ]
3526 );
3527 }
3528
3529 #[tokio::test]
3530 async fn agent_cancel_still_wins_over_follow_up_queue() {
3531 let provider = Arc::new(MockProvider::new(vec![tool_call_response(
3532 "call_1",
3533 "echo",
3534 serde_json::json!({"text": "hello"}),
3535 100,
3536 20,
3537 )]));
3538
3539 let model = test_model(provider);
3540 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3541 agent.tools.register(Arc::new(EchoTool));
3542
3543 handle
3544 .command_tx
3545 .send(AgentCommand::FollowUp("queued later".into()))
3546 .await
3547 .unwrap();
3548 handle.command_tx.send(AgentCommand::Cancel).await.unwrap();
3549
3550 let result = agent.run("Do something".to_string()).await;
3551 assert!(matches!(result, Err(crate::error::Error::Cancelled)));
3552 }
3553
3554 #[test]
3555 fn mana_bash_equivalent_hint_handles_release_and_tree() {
3556 assert!(mana_bash_equivalent_hint("mana release 1").is_some());
3557 assert!(mana_bash_equivalent_hint("mana tree").is_some());
3558 }
3559
3560 #[test]
3561 fn mana_bash_equivalent_hint_ignores_non_mana_prefixes() {
3562 assert!(mana_bash_equivalent_hint("manatee status").is_none());
3563 assert!(mana_bash_equivalent_hint("./mana status").is_none());
3564 }
3565
3566 #[tokio::test]
3567 async fn agent_blocks_bash_mana_when_native_action_exists() {
3568 let provider = Arc::new(MockProvider::new(vec![
3569 tool_call_response(
3570 "call_1",
3571 "bash",
3572 serde_json::json!({"command": "mana status", "timeout": 5}),
3573 100,
3574 20,
3575 ),
3576 text_response("Recovered after native-mana hint", 120, 25),
3577 ]));
3578
3579 let model = test_model(provider);
3580 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3581 agent.tools.register(Arc::new(crate::tools::bash::BashTool));
3582
3583 let events_task = tokio::spawn(collect_events(handle));
3584 agent.run("Check mana state".to_string()).await.unwrap();
3585 drop(agent);
3586
3587 let events = events_task.await.unwrap();
3588 let tool_end = events.iter().find_map(|e| match e {
3589 AgentEvent::ToolExecutionEnd { result, .. } => Some(result),
3590 _ => None,
3591 });
3592 let tool_end = tool_end.expect("expected ToolExecutionEnd");
3593 assert!(tool_end.is_error);
3594 let text = tool_end
3595 .content
3596 .iter()
3597 .find_map(|b| match b {
3598 ContentBlock::Text { text } => Some(text.as_str()),
3599 _ => None,
3600 })
3601 .unwrap_or("");
3602 assert!(text.contains("Use the native mana tool"));
3603 }
3604
3605 #[tokio::test]
3606 async fn agent_allows_non_mana_bash_commands() {
3607 let provider = Arc::new(MockProvider::new(vec![
3608 tool_call_response(
3609 "call_1",
3610 "bash",
3611 serde_json::json!({"command": "printf 'ok'", "timeout": 5}),
3612 100,
3613 20,
3614 ),
3615 text_response("done", 120, 25),
3616 ]));
3617
3618 let model = test_model(provider);
3619 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
3620 agent.tools.register(Arc::new(crate::tools::bash::BashTool));
3621
3622 agent.run("Run a shell command".to_string()).await.unwrap();
3623
3624 let tool_result = agent
3625 .messages
3626 .iter()
3627 .find_map(|message| match message {
3628 Message::ToolResult(result) => Some(result),
3629 _ => None,
3630 })
3631 .expect("expected tool result");
3632 assert!(!tool_result.is_error);
3633 }
3634
3635 #[tokio::test]
3636 async fn agent_cancel_mid_run() {
3637 let provider = Arc::new(MockProvider::new(vec![
3638 tool_call_response(
3640 "call_1",
3641 "echo",
3642 serde_json::json!({"text": "hello"}),
3643 100,
3644 20,
3645 ),
3646 text_response("Should not see this", 100, 20),
3648 ]));
3649
3650 let model = test_model(provider);
3651 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3652 agent.tools.register(Arc::new(EchoTool));
3653
3654 handle.command_tx.send(AgentCommand::Cancel).await.unwrap();
3656
3657 let events_task = tokio::spawn(collect_events(handle));
3658 let result = agent.run("Do something".to_string()).await;
3659 drop(agent);
3660
3661 assert!(matches!(result, Err(crate::error::Error::Cancelled)));
3663
3664 let events = events_task.await.unwrap();
3665
3666 assert!(events
3668 .iter()
3669 .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
3670
3671 let turn_starts: Vec<_> = events
3673 .iter()
3674 .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3675 .collect();
3676 assert!(turn_starts.len() <= 1);
3677 }
3678
3679 #[tokio::test]
3680 async fn single_text_turn_exits_cleanly() {
3681 let provider = Arc::new(MockProvider::new(vec![text_response("SMOKE_OK", 50, 10)]));
3682 let model = test_model(provider);
3683 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3684
3685 let events_task = tokio::spawn(collect_events(handle));
3686 let result = agent.run("Reply once and stop".to_string()).await;
3687 drop(agent);
3688
3689 assert!(result.is_ok());
3690
3691 let events = events_task.await.unwrap();
3692 assert!(events
3693 .iter()
3694 .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
3695 assert!(!events.iter().any(|e| matches!(
3696 e,
3697 AgentEvent::Error { error } if error.contains("Max turns exceeded")
3698 )));
3699 }
3700
3701 #[tokio::test]
3704 async fn agent_unknown_tool_self_corrects() {
3705 let provider = Arc::new(MockProvider::new(vec![
3706 tool_call_response(
3708 "call_1",
3709 "nonexistent",
3710 serde_json::json!({"foo": "bar"}),
3711 100,
3712 20,
3713 ),
3714 text_response("Sorry, I used the wrong tool. Here's the answer.", 200, 30),
3716 ]));
3717
3718 let model = test_model(provider);
3719 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3720 let events_task = tokio::spawn(collect_events(handle));
3723 agent.run("Do something".to_string()).await.unwrap();
3724 drop(agent);
3725
3726 let events = events_task.await.unwrap();
3727
3728 let tool_end = events
3730 .iter()
3731 .find(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }));
3732 assert!(tool_end.is_some());
3733 if let Some(AgentEvent::ToolExecutionEnd { result, .. }) = tool_end {
3734 assert!(result.is_error);
3735 let text = result.content.iter().find_map(|c| {
3736 if let ContentBlock::Text { text } = c {
3737 Some(text.as_str())
3738 } else {
3739 None
3740 }
3741 });
3742 assert!(text.unwrap().contains("Unknown tool"));
3743 }
3744
3745 let turn_starts: Vec<_> = events
3747 .iter()
3748 .filter(|e| matches!(e, AgentEvent::TurnStart { .. }))
3749 .collect();
3750 assert_eq!(turn_starts.len(), 2);
3751
3752 assert!(events
3754 .iter()
3755 .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
3756 }
3757
3758 #[tokio::test]
3759 async fn agent_concurrent_readonly() {
3760 let shared = Arc::new(ConcurrentReadonlyState::new(3));
3761 let provider = Arc::new(MockProvider::new(vec![
3762 multi_tool_call_response(
3763 &[
3764 ("call_ro_1", "echo_a", serde_json::json!({"text": "first"})),
3765 (
3766 "call_write",
3767 "write_after_reads",
3768 serde_json::json!({"data": "mutate"}),
3769 ),
3770 ("call_ro_2", "echo_b", serde_json::json!({"text": "second"})),
3771 ("call_ro_3", "echo_c", serde_json::json!({"text": "third"})),
3772 ],
3773 100,
3774 40,
3775 ),
3776 text_response("All tools finished", 150, 20),
3777 ]));
3778
3779 let model = test_model(provider);
3780 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3781 drop(handle);
3782
3783 agent.tools.register(Arc::new(CoordinatedReadonlyTool {
3784 name: "echo_a",
3785 shared: shared.clone(),
3786 }));
3787 agent.tools.register(Arc::new(CoordinatedReadonlyTool {
3788 name: "echo_b",
3789 shared: shared.clone(),
3790 }));
3791 agent.tools.register(Arc::new(CoordinatedReadonlyTool {
3792 name: "echo_c",
3793 shared: shared.clone(),
3794 }));
3795 agent.tools.register(Arc::new(CoordinatedMutableTool {
3796 shared: shared.clone(),
3797 }));
3798
3799 tokio::time::timeout(
3800 Duration::from_millis(250),
3801 agent.run("Run all tools".to_string()),
3802 )
3803 .await
3804 .expect("read-only tools should not block each other")
3805 .expect("agent should complete successfully");
3806
3807 let tool_result_ids: Vec<_> = agent
3808 .messages
3809 .iter()
3810 .filter_map(|message| match message {
3811 Message::ToolResult(result) => Some(result.tool_call_id.as_str()),
3812 _ => None,
3813 })
3814 .collect();
3815 assert_eq!(
3816 tool_result_ids,
3817 vec!["call_ro_1", "call_write", "call_ro_2", "call_ro_3"]
3818 );
3819
3820 assert_eq!(shared.readonly_started.load(Ordering::SeqCst), 3);
3821 assert_eq!(shared.readonly_finished.load(Ordering::SeqCst), 3);
3822 assert_eq!(shared.mutable_observed_finished.load(Ordering::SeqCst), 3);
3823
3824 let log = shared.log.lock().expect("concurrent log lock").clone();
3825 assert_eq!(
3826 log.last().map(String::as_str),
3827 Some("write_after_reads:start")
3828 );
3829 }
3830
3831 #[tokio::test]
3834 async fn agent_event_ordering() {
3835 let provider = Arc::new(MockProvider::new(vec![
3836 tool_call_response(
3837 "call_1",
3838 "echo",
3839 serde_json::json!({"text": "hello"}),
3840 50,
3841 10,
3842 ),
3843 text_response("Done", 50, 10),
3844 ]));
3845
3846 let model = test_model(provider);
3847 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3848 agent.tools.register(Arc::new(EchoTool));
3849
3850 let events_task = tokio::spawn(collect_events(handle));
3851 agent.run("test".to_string()).await.unwrap();
3852 drop(agent);
3853
3854 let events = events_task.await.unwrap();
3855
3856 let types: Vec<&str> = events
3858 .iter()
3859 .map(|e| match e {
3860 AgentEvent::AgentStart { .. } => "AgentStart",
3861 AgentEvent::AgentEnd { .. } => "AgentEnd",
3862 AgentEvent::TurnStart { .. } => "TurnStart",
3863 AgentEvent::TurnEnd { .. } => "TurnEnd",
3864 AgentEvent::MessageDelta { .. } => "MessageDelta",
3865 AgentEvent::ToolExecutionStart { .. } => "ToolExecStart",
3866 AgentEvent::ToolExecutionEnd { .. } => "ToolExecEnd",
3867 AgentEvent::Warning { .. } => "Warning",
3868 AgentEvent::EvidenceWritten { .. } => "EvidenceWritten",
3869 AgentEvent::VerificationStarted { .. } => "VerificationStarted",
3870 AgentEvent::VerificationCompleted { .. } => "VerificationCompleted",
3871 AgentEvent::PolicyChecked { .. } => "PolicyChecked",
3872 AgentEvent::Error { .. } => "Error",
3873 _ => "Other",
3874 })
3875 .collect();
3876
3877 assert_eq!(types[0], "AgentStart");
3879
3880 assert_eq!(types[types.len() - 1], "AgentEnd");
3882
3883 let mut turn_start_indices: Vec<usize> = Vec::new();
3885 let mut turn_end_indices: Vec<usize> = Vec::new();
3886 for (i, t) in types.iter().enumerate() {
3887 if *t == "TurnStart" {
3888 turn_start_indices.push(i);
3889 }
3890 if *t == "TurnEnd" {
3891 turn_end_indices.push(i);
3892 }
3893 }
3894 assert_eq!(turn_start_indices.len(), 2);
3895 assert_eq!(turn_end_indices.len(), 2);
3896 for i in 0..turn_start_indices.len() {
3897 assert!(turn_start_indices[i] < turn_end_indices[i]);
3898 }
3899
3900 let tool_start = types.iter().position(|t| *t == "ToolExecStart");
3902 let tool_end = types.iter().position(|t| *t == "ToolExecEnd");
3903 assert!(tool_start.is_some());
3904 assert!(tool_end.is_some());
3905 assert!(tool_start.unwrap() < tool_end.unwrap());
3906 }
3907
3908 #[tokio::test]
3909 async fn agent_fires_hooks() {
3910 let provider = Arc::new(MockProvider::new(vec![text_response("hooked", 100, 20)]));
3911 let model = test_model(provider);
3912 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3913 drop(handle);
3914
3915 let hook_calls = Arc::new(AtomicUsize::new(0));
3916 let hook_calls_for_callback = hook_calls.clone();
3917 agent.hooks.register(crate::hooks::HookDefinition {
3918 event: "before_llm_call".to_string(),
3919 match_pattern: None,
3920 action: crate::hooks::HookAction::Callback(Arc::new(move |_event| {
3921 hook_calls_for_callback.fetch_add(1, Ordering::SeqCst);
3922 crate::hooks::HookResult::default()
3923 })),
3924 blocking: true,
3925 threshold: None,
3926 });
3927
3928 agent.run("Run once".to_string()).await.unwrap();
3929
3930 assert_eq!(hook_calls.load(Ordering::SeqCst), 1);
3931 }
3932
3933 #[tokio::test]
3934 async fn agent_context_masking() {
3935 let provider = Arc::new(MockProvider::new(vec![text_response("done", 100, 20)]));
3936
3937 let mut seeded_messages = Vec::new();
3938 for index in 0..12 {
3939 let call_id = format!("call_{index}");
3940 seeded_messages.push(make_assistant_tool_call(
3941 &call_id,
3942 "read",
3943 serde_json::json!({"path": format!("src/file_{index}.rs")}),
3944 ));
3945 seeded_messages.push(make_tool_result(&call_id, "read", &"x".repeat(400)));
3946 }
3947
3948 let mut usage_messages = seeded_messages.clone();
3949 usage_messages.push(Message::user("trigger masking"));
3950 let provisional_model = test_model(provider.clone());
3951 let usage = crate::context::context_usage(&usage_messages, &provisional_model);
3952 let context_window = ((usage.used as f64) / 0.7).ceil() as u32;
3953
3954 let model = test_model_with_context_window(provider, context_window.max(1));
3955 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
3956 drop(handle);
3957 agent.messages = seeded_messages;
3958
3959 agent.run("trigger masking".to_string()).await.unwrap();
3960
3961 let masked = tool_result_text(&agent.messages[1]).expect("first tool result text");
3962 assert!(masked.starts_with("[Output omitted"));
3963
3964 let recent_index = (10 * 2) + 1;
3965 let recent =
3966 tool_result_text(&agent.messages[recent_index]).expect("recent tool result text");
3967 let expected_recent = "x".repeat(400);
3968 assert_eq!(recent, expected_recent.as_str());
3969 }
3970
3971 #[tokio::test]
3972 async fn agent_masks_observations_when_context_is_tight() {
3973 let provider = Arc::new(MockProvider::new(vec![text_response("done", 100, 20)]));
3974
3975 let mut seeded_messages = Vec::new();
3976 for index in 0..12 {
3977 let call_id = format!("call_{index}");
3978 seeded_messages.push(make_assistant_tool_call(
3979 &call_id,
3980 "read",
3981 serde_json::json!({"path": format!("src/file_{index}.rs")}),
3982 ));
3983 seeded_messages.push(make_tool_result(&call_id, "read", &"x".repeat(400)));
3984 }
3985
3986 let mut usage_messages = seeded_messages.clone();
3987 usage_messages.push(Message::user("trigger masking"));
3988 let provisional_model = test_model(provider.clone());
3989 let usage_before = crate::context::context_usage(&usage_messages, &provisional_model);
3990
3991 let mut masked_messages = usage_messages.clone();
3992 crate::context::mask_observations(&mut masked_messages, 10);
3993 let usage_after = crate::context::context_usage(&masked_messages, &provisional_model);
3994
3995 assert!(usage_before.used > usage_after.used);
3996
3997 let context_window = ((usage_before.used as f64) / 0.7).ceil() as u32;
3999
4000 let model = test_model_with_context_window(provider, context_window.max(1));
4001 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4002 let events_task = tokio::spawn(collect_events(handle));
4003 agent.messages = seeded_messages;
4004
4005 agent.run("trigger masking".to_string()).await.unwrap();
4006 drop(agent);
4007
4008 let events = events_task.await.unwrap();
4009
4010 assert!(
4011 events
4012 .iter()
4013 .any(|e| matches!(e, AgentEvent::TurnStart { index: 0 })),
4014 "agent should still run normally"
4015 );
4016 }
4017
4018 #[tokio::test]
4021 async fn agent_usage_cost_accumulation() {
4022 let provider = Arc::new(MockProvider::new(vec![
4023 tool_call_response(
4024 "call_1",
4025 "echo",
4026 serde_json::json!({"text": "a"}),
4027 1_000_000, 500_000, ),
4030 text_response("done", 1_000_000, 500_000),
4031 ]));
4032
4033 let model = test_model(provider);
4034 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4035 agent.tools.register(Arc::new(EchoTool));
4036
4037 let events_task = tokio::spawn(collect_events(handle));
4038 agent.run("test".to_string()).await.unwrap();
4039 drop(agent);
4040
4041 let events = events_task.await.unwrap();
4042
4043 if let Some(AgentEvent::AgentEnd { usage, cost, .. }) = events
4044 .iter()
4045 .find(|e| matches!(e, AgentEvent::AgentEnd { .. }))
4046 {
4047 assert_eq!(usage.input_tokens, 2_000_000);
4049 assert_eq!(usage.output_tokens, 1_000_000);
4050
4051 assert!((cost.input - 6.0).abs() < 1e-10);
4053 assert!((cost.output - 15.0).abs() < 1e-10);
4054 assert!((cost.total - 21.0).abs() < 1e-10);
4055 } else {
4056 panic!("Expected AgentEnd");
4057 }
4058 }
4059
4060 struct RetryMockProvider {
4066 calls: Mutex<Vec<std::result::Result<Vec<StreamEvent>, imp_llm::Error>>>,
4067 }
4068
4069 impl RetryMockProvider {
4070 fn new(calls: Vec<std::result::Result<Vec<StreamEvent>, imp_llm::Error>>) -> Self {
4071 Self {
4072 calls: Mutex::new(calls),
4073 }
4074 }
4075 }
4076
4077 #[async_trait]
4078 impl Provider for RetryMockProvider {
4079 fn stream(
4080 &self,
4081 _model: &Model,
4082 _context: Context,
4083 _options: RequestOptions,
4084 _api_key: &str,
4085 ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
4086 let mut calls = self.calls.try_lock().expect("RetryMockProvider lock");
4087 let outcome = if calls.is_empty() {
4088 Ok(vec![StreamEvent::Error {
4089 error: "No more mock responses".to_string(),
4090 }])
4091 } else {
4092 calls.remove(0)
4093 };
4094 match outcome {
4095 Ok(events) => Box::pin(futures::stream::iter(
4096 events.into_iter().map(imp_llm::Result::Ok),
4097 )),
4098 Err(e) => Box::pin(futures::stream::once(async move {
4099 imp_llm::Result::<StreamEvent>::Err(e)
4100 })),
4101 }
4102 }
4103
4104 async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4105 Ok("mock-key".to_string())
4106 }
4107
4108 fn id(&self) -> &str {
4109 "retry-mock"
4110 }
4111
4112 fn models(&self) -> &[ModelMeta] {
4113 &[]
4114 }
4115 }
4116
4117 #[tokio::test]
4119 async fn retry_succeeds_after_transient_failures() {
4120 use imp_llm::provider::RetryPolicy;
4121
4122 let provider = Arc::new(RetryMockProvider::new(vec![
4123 Err(imp_llm::Error::RateLimited {
4125 retry_after_secs: Some(0),
4126 }),
4127 Err(imp_llm::Error::RateLimited {
4128 retry_after_secs: Some(0),
4129 }),
4130 Ok(text_response("Hello after retries", 100, 20)),
4132 ]));
4133
4134 let model = test_model(provider);
4135 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4136 agent.retry_policy = RetryPolicy {
4138 max_retries: 3,
4139 base_delay: std::time::Duration::from_millis(0),
4140 max_delay: std::time::Duration::from_secs(30),
4141 retry_on: vec![],
4142 };
4143
4144 let events_task = tokio::spawn(collect_events(handle));
4145 agent.run("Say hello".to_string()).await.unwrap();
4146 drop(agent);
4147
4148 let events = events_task.await.unwrap();
4149
4150 assert!(events
4152 .iter()
4153 .any(|e| matches!(e, AgentEvent::AgentEnd { .. })));
4154
4155 let turn_end = events.iter().find_map(|e| match e {
4157 AgentEvent::TurnEnd { message, .. } => Some(message),
4158 _ => None,
4159 });
4160 assert!(turn_end.is_some());
4161 let content_text = turn_end
4162 .unwrap()
4163 .content
4164 .iter()
4165 .find_map(|b| match b {
4166 ContentBlock::Text { text } => Some(text.as_str()),
4167 _ => None,
4168 })
4169 .unwrap_or("");
4170 assert!(
4171 content_text.contains("Hello after retries"),
4172 "expected final text, got: {content_text}"
4173 );
4174 }
4175
4176 #[tokio::test]
4178 async fn retry_fails_when_max_retries_exhausted() {
4179 use imp_llm::provider::RetryPolicy;
4180
4181 let provider = Arc::new(RetryMockProvider::new(vec![
4182 Err(imp_llm::Error::RateLimited {
4183 retry_after_secs: Some(0),
4184 }),
4185 Err(imp_llm::Error::RateLimited {
4186 retry_after_secs: Some(0),
4187 }),
4188 Err(imp_llm::Error::RateLimited {
4189 retry_after_secs: Some(0),
4190 }),
4191 Err(imp_llm::Error::RateLimited {
4192 retry_after_secs: Some(0),
4193 }),
4194 ]));
4195
4196 let model = test_model(provider);
4197 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4198 agent.retry_policy = RetryPolicy {
4199 max_retries: 2, base_delay: std::time::Duration::from_millis(0),
4201 max_delay: std::time::Duration::from_secs(30),
4202 retry_on: vec![],
4203 };
4204 drop(handle);
4205
4206 let result = agent.run("Fail".to_string()).await;
4207 assert!(
4208 result.is_err(),
4209 "should have failed after exhausting retries"
4210 );
4211 }
4212
4213 #[tokio::test]
4215 async fn retry_does_not_retry_auth_errors() {
4216 use imp_llm::provider::RetryPolicy;
4217 use std::sync::atomic::{AtomicUsize, Ordering};
4218
4219 let call_count = Arc::new(AtomicUsize::new(0));
4220 let call_count_clone = call_count.clone();
4221
4222 struct CountingAuthFailProvider {
4223 calls: AtomicUsize,
4224 success_after: usize,
4225 }
4226
4227 #[async_trait]
4228 impl Provider for CountingAuthFailProvider {
4229 fn stream(
4230 &self,
4231 _model: &Model,
4232 _context: Context,
4233 _options: RequestOptions,
4234 _api_key: &str,
4235 ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
4236 let n = self.calls.fetch_add(1, Ordering::SeqCst);
4237 if n < self.success_after {
4238 Box::pin(futures::stream::once(async {
4239 Err(imp_llm::Error::Auth("Invalid API key".to_string()))
4240 }))
4241 } else {
4242 Box::pin(futures::stream::iter(
4243 text_response("ok", 10, 5).into_iter().map(Ok),
4244 ))
4245 }
4246 }
4247
4248 async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4249 Ok("mock-key".to_string())
4250 }
4251
4252 fn id(&self) -> &str {
4253 "auth-fail-mock"
4254 }
4255
4256 fn models(&self) -> &[ModelMeta] {
4257 &[]
4258 }
4259 }
4260
4261 let _ = call_count_clone; let provider = Arc::new(CountingAuthFailProvider {
4264 calls: AtomicUsize::new(0),
4265 success_after: 999, });
4267 let call_ref = &provider.calls;
4268
4269 let model = test_model(provider.clone());
4270 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
4271 agent.retry_policy = RetryPolicy {
4272 max_retries: 5, base_delay: std::time::Duration::from_millis(0),
4274 max_delay: std::time::Duration::from_secs(30),
4275 retry_on: vec![],
4276 };
4277 drop(handle);
4278
4279 let result = agent.run("Auth test".to_string()).await;
4280 assert!(result.is_err(), "should fail on auth error");
4281
4282 assert_eq!(
4284 call_ref.load(std::sync::atomic::Ordering::SeqCst),
4285 1,
4286 "auth errors should not be retried"
4287 );
4288 }
4289}
4290
4291#[cfg(test)]
4294mod integration {
4295 use super::*;
4296 use std::path::PathBuf;
4297 use std::pin::Pin;
4298 use std::sync::Arc;
4299
4300 use async_trait::async_trait;
4301 use futures_core::Stream;
4302 use imp_llm::auth::{ApiKey, AuthStore};
4303 use imp_llm::model::{Capabilities, ModelMeta, ModelPricing};
4304 use imp_llm::provider::Provider;
4305 use tokio::sync::Mutex;
4306
4307 use crate::tools::{bash::BashTool, edit::EditTool, read::ReadTool, write::WriteTool};
4308
4309 struct MockProvider {
4312 responses: Mutex<Vec<Vec<StreamEvent>>>,
4313 }
4314
4315 impl MockProvider {
4316 fn new(responses: Vec<Vec<StreamEvent>>) -> Self {
4317 Self {
4318 responses: Mutex::new(responses),
4319 }
4320 }
4321 }
4322
4323 #[async_trait]
4324 impl Provider for MockProvider {
4325 fn stream(
4326 &self,
4327 _model: &Model,
4328 _context: Context,
4329 _options: RequestOptions,
4330 _api_key: &str,
4331 ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<StreamEvent>> + Send>> {
4332 let mut responses = self.responses.try_lock().expect("MockProvider lock");
4333 let events = if responses.is_empty() {
4334 vec![StreamEvent::Error {
4335 error: "No more mock responses".to_string(),
4336 }]
4337 } else {
4338 responses.remove(0)
4339 };
4340 Box::pin(futures::stream::iter(events.into_iter().map(Ok)))
4341 }
4342
4343 async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4344 Ok("mock-key".to_string())
4345 }
4346
4347 fn id(&self) -> &str {
4348 "mock"
4349 }
4350
4351 fn models(&self) -> &[ModelMeta] {
4352 &[]
4353 }
4354 }
4355
4356 fn test_model(provider: Arc<dyn Provider>) -> Model {
4357 Model {
4358 meta: ModelMeta {
4359 id: "test-model".to_string(),
4360 provider: "mock".to_string(),
4361 name: "Test Model".to_string(),
4362 context_window: 200_000,
4363 max_output_tokens: 16_384,
4364 pricing: ModelPricing {
4365 input_per_mtok: 3.0,
4366 output_per_mtok: 15.0,
4367 cache_read_per_mtok: 0.3,
4368 cache_write_per_mtok: 3.75,
4369 },
4370 capabilities: Capabilities {
4371 reasoning: true,
4372 images: false,
4373 tool_use: true,
4374 },
4375 },
4376 provider,
4377 }
4378 }
4379
4380 fn text_response(text: &str, input_tokens: u32, output_tokens: u32) -> Vec<StreamEvent> {
4381 vec![
4382 StreamEvent::MessageStart {
4383 model: "test-model".to_string(),
4384 },
4385 StreamEvent::TextDelta {
4386 text: text.to_string(),
4387 },
4388 StreamEvent::MessageEnd {
4389 message: AssistantMessage {
4390 content: vec![ContentBlock::Text {
4391 text: text.to_string(),
4392 }],
4393 usage: Some(Usage {
4394 input_tokens,
4395 output_tokens,
4396 cache_read_tokens: 0,
4397 cache_write_tokens: 0,
4398 }),
4399 stop_reason: LlmStopReason::EndTurn,
4400 timestamp: 1000,
4401 },
4402 },
4403 ]
4404 }
4405
4406 fn tool_call_response(
4407 call_id: &str,
4408 tool_name: &str,
4409 args: serde_json::Value,
4410 input_tokens: u32,
4411 output_tokens: u32,
4412 ) -> Vec<StreamEvent> {
4413 vec![
4414 StreamEvent::MessageStart {
4415 model: "test-model".to_string(),
4416 },
4417 StreamEvent::ToolCall {
4418 id: call_id.to_string(),
4419 name: tool_name.to_string(),
4420 arguments: args.clone(),
4421 },
4422 StreamEvent::MessageEnd {
4423 message: AssistantMessage {
4424 content: vec![ContentBlock::ToolCall {
4425 id: call_id.to_string(),
4426 name: tool_name.to_string(),
4427 arguments: args,
4428 }],
4429 usage: Some(Usage {
4430 input_tokens,
4431 output_tokens,
4432 cache_read_tokens: 0,
4433 cache_write_tokens: 0,
4434 }),
4435 stop_reason: LlmStopReason::ToolUse,
4436 timestamp: 1000,
4437 },
4438 },
4439 ]
4440 }
4441
4442 fn create_agent_with_tools(provider: Arc<dyn Provider>, cwd: PathBuf) -> (Agent, AgentHandle) {
4444 let model = test_model(provider);
4445 let (mut agent, handle) = Agent::new(model, cwd);
4446 agent.tools.register(Arc::new(WriteTool));
4447 agent.tools.register(Arc::new(ReadTool));
4448 agent.tools.register(Arc::new(EditTool));
4449 agent.tools.register(Arc::new(BashTool));
4450 (agent, handle)
4451 }
4452
4453 fn create_agent_with_reduced_tools(
4455 provider: Arc<dyn Provider>,
4456 cwd: PathBuf,
4457 ) -> (Agent, AgentHandle) {
4458 let model = test_model(provider);
4459 let (mut agent, handle) = Agent::new(model, cwd);
4460 agent.tools.register(Arc::new(WriteTool));
4461 agent.tools.register(Arc::new(ReadTool));
4462 agent.tools.register(Arc::new(EditTool));
4463 agent.tools.register(Arc::new(BashTool));
4464 (agent, handle)
4465 }
4466
4467 #[tokio::test]
4470 async fn agent_reads_and_writes_file() {
4471 let tmp = tempfile::tempdir().unwrap();
4472 let provider = Arc::new(MockProvider::new(vec![
4473 tool_call_response(
4474 "call_write",
4475 "write",
4476 serde_json::json!({"path": "test.txt", "content": "hello world"}),
4477 100,
4478 20,
4479 ),
4480 tool_call_response(
4481 "call_read",
4482 "read",
4483 serde_json::json!({"path": "test.txt"}),
4484 100,
4485 20,
4486 ),
4487 text_response("The file contains: hello world", 100, 20),
4488 ]));
4489
4490 let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4491 drop(handle);
4492
4493 agent
4494 .run("Write and read a file".to_string())
4495 .await
4496 .unwrap();
4497
4498 let on_disk = std::fs::read_to_string(tmp.path().join("test.txt")).unwrap();
4500 assert_eq!(on_disk, "hello world");
4501
4502 let read_result = agent
4504 .messages
4505 .iter()
4506 .find_map(|m| match m {
4507 Message::ToolResult(r) if r.tool_call_id == "call_read" => Some(r),
4508 _ => None,
4509 })
4510 .expect("should have a read tool result");
4511 let read_text = read_result
4512 .content
4513 .iter()
4514 .find_map(|b| match b {
4515 ContentBlock::Text { text } => Some(text.as_str()),
4516 _ => None,
4517 })
4518 .unwrap();
4519 assert!(
4520 read_text.contains("hello world"),
4521 "read result should contain file content, got: {read_text}"
4522 );
4523
4524 let assistant_count = agent
4526 .messages
4527 .iter()
4528 .filter(|m| matches!(m, Message::Assistant(_)))
4529 .count();
4530 assert_eq!(assistant_count, 3);
4531 }
4532
4533 #[tokio::test]
4536 async fn agent_edit_tool_modifies_file() {
4537 let tmp = tempfile::tempdir().unwrap();
4538 let provider = Arc::new(MockProvider::new(vec![
4539 tool_call_response(
4540 "call_write",
4541 "write",
4542 serde_json::json!({
4543 "path": "src/main.rs",
4544 "content": "fn main() {\n println!(\"old\");\n}"
4545 }),
4546 100,
4547 20,
4548 ),
4549 tool_call_response(
4550 "call_edit",
4551 "edit",
4552 serde_json::json!({
4553 "path": "src/main.rs",
4554 "oldText": "old",
4555 "newText": "new"
4556 }),
4557 100,
4558 20,
4559 ),
4560 tool_call_response(
4561 "call_read",
4562 "read",
4563 serde_json::json!({"path": "src/main.rs"}),
4564 100,
4565 20,
4566 ),
4567 text_response("Done", 100, 20),
4568 ]));
4569
4570 let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4571 drop(handle);
4572
4573 agent.run("Edit a file".to_string()).await.unwrap();
4574
4575 let on_disk = std::fs::read_to_string(tmp.path().join("src/main.rs")).unwrap();
4577 assert!(on_disk.contains("new"), "file should contain 'new'");
4578 assert!(!on_disk.contains("old"), "file should not contain 'old'");
4579
4580 let edit_result = agent
4582 .messages
4583 .iter()
4584 .find_map(|m| match m {
4585 Message::ToolResult(r) if r.tool_call_id == "call_edit" => Some(r),
4586 _ => None,
4587 })
4588 .expect("should have an edit tool result");
4589 let edit_text = edit_result
4590 .content
4591 .iter()
4592 .find_map(|b| match b {
4593 ContentBlock::Text { text } => Some(text.as_str()),
4594 _ => None,
4595 })
4596 .unwrap();
4597 assert!(
4598 edit_text.contains("---") || edit_text.contains("+++"),
4599 "edit result should include a diff, got: {edit_text}"
4600 );
4601 }
4602
4603 #[tokio::test]
4606 async fn agent_bash_search_finds_pattern() {
4607 let tmp = tempfile::tempdir().unwrap();
4608 std::fs::write(
4609 tmp.path().join("search_me.txt"),
4610 "line one\nunique_pattern_xyz here\nline three\n",
4611 )
4612 .unwrap();
4613 let provider = Arc::new(MockProvider::new(vec![
4614 tool_call_response(
4615 "call_bash",
4616 "bash",
4617 serde_json::json!({"command": "grep --no-color -rn 'unique_pattern_xyz' ."}),
4618 100,
4619 20,
4620 ),
4621 text_response("Found it!", 100, 20),
4622 ]));
4623
4624 let (mut agent, handle) =
4625 create_agent_with_reduced_tools(provider, tmp.path().to_path_buf());
4626 drop(handle);
4627
4628 agent.run("Search for a pattern".to_string()).await.unwrap();
4629
4630 let bash_result = agent
4631 .messages
4632 .iter()
4633 .find_map(|m| match m {
4634 Message::ToolResult(r) if r.tool_call_id == "call_bash" => Some(r),
4635 _ => None,
4636 })
4637 .expect("should have a bash tool result");
4638 let bash_text = bash_result
4639 .content
4640 .iter()
4641 .find_map(|b| match b {
4642 ContentBlock::Text { text } => Some(text.as_str()),
4643 _ => None,
4644 })
4645 .unwrap();
4646 assert!(
4647 !bash_text.trim().is_empty(),
4648 "bash grep output should not be empty"
4649 );
4650 }
4651
4652 #[tokio::test]
4655 async fn agent_repeated_tool_calls_warn_then_block() {
4656 let tmp = tempfile::tempdir().unwrap();
4657 std::fs::write(tmp.path().join("repeat.txt"), "same content\n").unwrap();
4658
4659 let provider = Arc::new(MockProvider::new(vec![
4660 tool_call_response(
4661 "call_1",
4662 "read",
4663 serde_json::json!({"path": "repeat.txt"}),
4664 100,
4665 20,
4666 ),
4667 tool_call_response(
4668 "call_2",
4669 "read",
4670 serde_json::json!({"path": "repeat.txt"}),
4671 100,
4672 20,
4673 ),
4674 tool_call_response(
4675 "call_3",
4676 "read",
4677 serde_json::json!({"path": "repeat.txt"}),
4678 100,
4679 20,
4680 ),
4681 tool_call_response(
4682 "call_4",
4683 "read",
4684 serde_json::json!({"path": "repeat.txt"}),
4685 100,
4686 20,
4687 ),
4688 text_response("Done", 100, 20),
4689 ]));
4690
4691 let (mut agent, handle) =
4692 create_agent_with_reduced_tools(provider, tmp.path().to_path_buf());
4693 drop(handle);
4694
4695 agent
4696 .run("Read the same file repeatedly".to_string())
4697 .await
4698 .unwrap();
4699
4700 let third = agent
4701 .messages
4702 .iter()
4703 .find_map(|m| match m {
4704 Message::ToolResult(r) if r.tool_call_id == "call_3" => Some(r),
4705 _ => None,
4706 })
4707 .expect("third tool result");
4708 let fourth = agent
4709 .messages
4710 .iter()
4711 .find_map(|m| match m {
4712 Message::ToolResult(r) if r.tool_call_id == "call_4" => Some(r),
4713 _ => None,
4714 })
4715 .expect("fourth tool result");
4716
4717 let third_text = third
4718 .content
4719 .iter()
4720 .filter_map(|b| match b {
4721 ContentBlock::Text { text } => Some(text.as_str()),
4722 _ => None,
4723 })
4724 .collect::<Vec<_>>()
4725 .join("\n");
4726 let fourth_text = fourth
4727 .content
4728 .iter()
4729 .filter_map(|b| match b {
4730 ContentBlock::Text { text } => Some(text.as_str()),
4731 _ => None,
4732 })
4733 .collect::<Vec<_>>()
4734 .join("\n");
4735
4736 assert!(third_text.contains("Warning: identical tool call repeated 3 times"));
4737 assert!(fourth.is_error);
4738 assert!(fourth_text.contains("Blocked: identical tool call repeated 4 times"));
4739 assert_eq!(
4740 agent
4741 .messages
4742 .iter()
4743 .filter(|message| matches!(message, Message::User(_)))
4744 .count(),
4745 1,
4746 "agent should stop after repeated-action block rather than enqueueing more follow-ups"
4747 );
4748 }
4749
4750 #[test]
4751 fn tool_results_indicate_repeated_action_detects_blocked_repeat_message() {
4752 let result = imp_llm::ToolResultMessage {
4753 tool_call_id: "call_repeat".to_string(),
4754 tool_name: "read".to_string(),
4755 content: vec![ContentBlock::Text {
4756 text: "Blocked: identical tool call repeated 4 times in a row for 'read'."
4757 .to_string(),
4758 }],
4759 is_error: true,
4760 details: serde_json::Value::Null,
4761 timestamp: 0,
4762 };
4763
4764 assert!(tool_results_indicate_repeated_action(&[result]));
4765 }
4766
4767 #[tokio::test]
4770 async fn agent_bash_runs_command() {
4771 let tmp = tempfile::tempdir().unwrap();
4772 let provider = Arc::new(MockProvider::new(vec![
4773 tool_call_response(
4774 "call_bash",
4775 "bash",
4776 serde_json::json!({"command": "echo hello && echo world"}),
4777 100,
4778 20,
4779 ),
4780 text_response("Done", 100, 20),
4781 ]));
4782
4783 let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4784 drop(handle);
4785
4786 agent.run("Run a command".to_string()).await.unwrap();
4787
4788 let bash_result = agent
4790 .messages
4791 .iter()
4792 .find_map(|m| match m {
4793 Message::ToolResult(r) if r.tool_call_id == "call_bash" => Some(r),
4794 _ => None,
4795 })
4796 .expect("should have a bash tool result");
4797 let bash_text = bash_result
4798 .content
4799 .iter()
4800 .find_map(|b| match b {
4801 ContentBlock::Text { text } => Some(text.as_str()),
4802 _ => None,
4803 })
4804 .unwrap();
4805 assert!(
4806 bash_text.contains("hello"),
4807 "bash output should contain 'hello', got: {bash_text}"
4808 );
4809 assert!(
4810 bash_text.contains("world"),
4811 "bash output should contain 'world', got: {bash_text}"
4812 );
4813
4814 assert_eq!(bash_result.details["exit_code"], 0);
4816 }
4817
4818 #[tokio::test]
4821 async fn agent_handles_tool_error_gracefully() {
4822 let tmp = tempfile::tempdir().unwrap();
4823 let provider = Arc::new(MockProvider::new(vec![
4824 tool_call_response(
4825 "call_read",
4826 "read",
4827 serde_json::json!({"path": "nonexistent.txt"}),
4828 100,
4829 20,
4830 ),
4831 text_response("File not found, let me try something else", 100, 20),
4832 ]));
4833
4834 let (mut agent, handle) = create_agent_with_tools(provider, tmp.path().to_path_buf());
4835 drop(handle);
4836
4837 agent.run("Read a file".to_string()).await.unwrap();
4838
4839 let read_result = agent
4841 .messages
4842 .iter()
4843 .find_map(|m| match m {
4844 Message::ToolResult(r) if r.tool_call_id == "call_read" => Some(r),
4845 _ => None,
4846 })
4847 .expect("should have a read tool result");
4848 assert!(
4849 read_result.is_error,
4850 "reading nonexistent file should produce an error result"
4851 );
4852
4853 let assistant_count = agent
4855 .messages
4856 .iter()
4857 .filter(|m| matches!(m, Message::Assistant(_)))
4858 .count();
4859 assert_eq!(
4860 assistant_count, 2,
4861 "agent should have 2 turns: error + recovery"
4862 );
4863
4864 }
4866}
4867
4868#[cfg(test)]
4871mod mode_tests {
4872 use super::*;
4873 use std::path::PathBuf;
4874 use std::pin::Pin;
4875 use std::sync::Arc;
4876
4877 use async_trait::async_trait;
4878 use futures_core::Stream;
4879 use imp_llm::auth::{ApiKey, AuthStore};
4880 use imp_llm::model::ModelMeta;
4881 use imp_llm::provider::Provider;
4882 use tokio::sync::Mutex;
4883
4884 struct MockProvider {
4887 responses: Mutex<Vec<Vec<imp_llm::StreamEvent>>>,
4888 }
4889
4890 impl MockProvider {
4891 fn new(responses: Vec<Vec<imp_llm::StreamEvent>>) -> Self {
4892 Self {
4893 responses: Mutex::new(responses),
4894 }
4895 }
4896 }
4897
4898 #[async_trait]
4899 impl Provider for MockProvider {
4900 fn stream(
4901 &self,
4902 _model: &imp_llm::Model,
4903 _context: imp_llm::Context,
4904 _options: imp_llm::RequestOptions,
4905 _api_key: &str,
4906 ) -> Pin<Box<dyn Stream<Item = imp_llm::Result<imp_llm::StreamEvent>> + Send>> {
4907 let mut responses = self.responses.try_lock().expect("MockProvider lock");
4908 let events = if responses.is_empty() {
4909 vec![imp_llm::StreamEvent::Error {
4910 error: "No more mock responses".to_string(),
4911 }]
4912 } else {
4913 responses.remove(0)
4914 };
4915 Box::pin(futures::stream::iter(events.into_iter().map(Ok)))
4916 }
4917
4918 async fn resolve_auth(&self, _auth: &AuthStore) -> imp_llm::Result<ApiKey> {
4919 Ok("mock-key".to_string())
4920 }
4921
4922 fn id(&self) -> &str {
4923 "mock"
4924 }
4925
4926 fn models(&self) -> &[imp_llm::model::ModelMeta] {
4927 &[]
4928 }
4929 }
4930
4931 fn test_model(provider: Arc<dyn Provider>) -> imp_llm::Model {
4932 imp_llm::Model {
4933 meta: ModelMeta {
4934 id: "test-model".to_string(),
4935 provider: "mock".to_string(),
4936 name: "Test Model".to_string(),
4937 context_window: 200_000,
4938 max_output_tokens: 16_384,
4939 pricing: imp_llm::model::ModelPricing {
4940 input_per_mtok: 3.0,
4941 output_per_mtok: 15.0,
4942 cache_read_per_mtok: 0.3,
4943 cache_write_per_mtok: 3.75,
4944 },
4945 capabilities: imp_llm::model::Capabilities {
4946 reasoning: true,
4947 images: false,
4948 tool_use: true,
4949 },
4950 },
4951 provider,
4952 }
4953 }
4954
4955 fn text_response(text: &str, input: u32, output: u32) -> Vec<imp_llm::StreamEvent> {
4956 vec![
4957 imp_llm::StreamEvent::MessageStart {
4958 model: "test-model".to_string(),
4959 },
4960 imp_llm::StreamEvent::TextDelta {
4961 text: text.to_string(),
4962 },
4963 imp_llm::StreamEvent::MessageEnd {
4964 message: imp_llm::AssistantMessage {
4965 content: vec![imp_llm::ContentBlock::Text {
4966 text: text.to_string(),
4967 }],
4968 usage: Some(imp_llm::Usage {
4969 input_tokens: input,
4970 output_tokens: output,
4971 cache_read_tokens: 0,
4972 cache_write_tokens: 0,
4973 }),
4974 stop_reason: imp_llm::StopReason::EndTurn,
4975 timestamp: 1000,
4976 },
4977 },
4978 ]
4979 }
4980
4981 fn tool_call_response(
4982 call_id: &str,
4983 tool_name: &str,
4984 args: serde_json::Value,
4985 input: u32,
4986 output: u32,
4987 ) -> Vec<imp_llm::StreamEvent> {
4988 vec![
4989 imp_llm::StreamEvent::MessageStart {
4990 model: "test-model".to_string(),
4991 },
4992 imp_llm::StreamEvent::ToolCall {
4993 id: call_id.to_string(),
4994 name: tool_name.to_string(),
4995 arguments: args.clone(),
4996 },
4997 imp_llm::StreamEvent::MessageEnd {
4998 message: imp_llm::AssistantMessage {
4999 content: vec![imp_llm::ContentBlock::ToolCall {
5000 id: call_id.to_string(),
5001 name: tool_name.to_string(),
5002 arguments: args,
5003 }],
5004 usage: Some(imp_llm::Usage {
5005 input_tokens: input,
5006 output_tokens: output,
5007 cache_read_tokens: 0,
5008 cache_write_tokens: 0,
5009 }),
5010 stop_reason: imp_llm::StopReason::ToolUse,
5011 timestamp: 1000,
5012 },
5013 },
5014 ]
5015 }
5016
5017 async fn collect_events(mut handle: AgentHandle) -> Vec<AgentEvent> {
5018 let mut events = Vec::new();
5019 while let Some(event) = handle.event_rx.recv().await {
5020 events.push(event);
5021 }
5022 events
5023 }
5024
5025 struct EchoTool;
5028
5029 #[async_trait]
5030 impl crate::tools::Tool for EchoTool {
5031 fn name(&self) -> &str {
5032 "echo"
5033 }
5034 fn label(&self) -> &str {
5035 "Echo"
5036 }
5037 fn description(&self) -> &str {
5038 "Echoes back the input"
5039 }
5040 fn parameters(&self) -> serde_json::Value {
5041 serde_json::json!({
5042 "type": "object",
5043 "properties": { "text": { "type": "string" } },
5044 "required": ["text"]
5045 })
5046 }
5047 fn is_readonly(&self) -> bool {
5048 true
5049 }
5050 async fn execute(
5051 &self,
5052 _call_id: &str,
5053 params: serde_json::Value,
5054 _ctx: crate::tools::ToolContext,
5055 ) -> crate::error::Result<crate::tools::ToolOutput> {
5056 let text = params["text"].as_str().unwrap_or("no text");
5057 Ok(crate::tools::ToolOutput::text(format!("echo: {text}")))
5058 }
5059 }
5060
5061 struct NamedWriteTool(&'static str);
5062
5063 #[async_trait]
5064 impl crate::tools::Tool for NamedWriteTool {
5065 fn name(&self) -> &str {
5066 self.0
5067 }
5068 fn label(&self) -> &str {
5069 self.0
5070 }
5071 fn description(&self) -> &str {
5072 "A write tool"
5073 }
5074 fn parameters(&self) -> serde_json::Value {
5075 serde_json::json!({"type": "object", "properties": {"data": {"type": "string"}}})
5076 }
5077 fn is_readonly(&self) -> bool {
5078 false
5079 }
5080 async fn execute(
5081 &self,
5082 _call_id: &str,
5083 _params: serde_json::Value,
5084 _ctx: crate::tools::ToolContext,
5085 ) -> crate::error::Result<crate::tools::ToolOutput> {
5086 Ok(crate::tools::ToolOutput::text("written"))
5087 }
5088 }
5089
5090 fn single_text_model(text: &str) -> Arc<MockProvider> {
5091 Arc::new(MockProvider::new(vec![text_response(text, 50, 10)]))
5092 }
5093
5094 #[tokio::test]
5096 async fn agent_mode_enforcement_full_registers_all_tools() {
5097 use crate::config::AgentMode;
5098
5099 let provider = single_text_model("ok");
5100 let model = test_model(provider);
5101 let (mut agent, _handle) = Agent::new(model, PathBuf::from("/tmp"));
5102 agent.mode = AgentMode::Full;
5103
5104 agent.tools.register(Arc::new(EchoTool)); agent.tools.register(Arc::new(NamedWriteTool("write")));
5107
5108 assert!(
5110 agent.tools.get("echo").is_some(),
5111 "echo should be registered"
5112 );
5113 assert!(
5114 agent.tools.get("write").is_some(),
5115 "write should be registered"
5116 );
5117 assert!(agent.mode.allows_tool("echo"));
5118 assert!(agent.mode.allows_tool("write"));
5119 assert!(agent.mode.allows_tool("any_future_tool"));
5120 }
5121
5122 #[test]
5124 fn agent_mode_enforcement_orchestrator_excludes_write_tools() {
5125 use crate::config::AgentMode;
5126 use crate::tools::ToolRegistry;
5127
5128 let mut registry = ToolRegistry::new();
5129 registry.register(Arc::new(EchoTool)); registry.register(Arc::new(NamedWriteTool("write")));
5131 registry.register(Arc::new(NamedWriteTool("edit")));
5132 registry.register(Arc::new(NamedWriteTool("bash")));
5133
5134 let mode = AgentMode::Orchestrator;
5136 registry.retain(|name| mode.allows_tool(name));
5137
5138 assert!(
5140 registry.get("write").is_none(),
5141 "write must be filtered out"
5142 );
5143 assert!(registry.get("edit").is_none(), "edit must be filtered out");
5144 assert!(registry.get("bash").is_none(), "bash must be filtered out");
5145 assert!(registry.get("echo").is_none(), "echo must be filtered out");
5147 }
5148
5149 #[tokio::test]
5151 async fn agent_mode_enforcement_guard_blocks_disallowed() {
5152 use crate::config::AgentMode;
5153
5154 let provider = Arc::new(MockProvider::new(vec![
5155 tool_call_response(
5157 "call_1",
5158 "write",
5159 serde_json::json!({"data": "content"}),
5160 50,
5161 10,
5162 ),
5163 text_response("Understood, I cannot write directly.", 50, 10),
5165 ]));
5166
5167 let model = test_model(provider);
5168 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
5169 agent.mode = AgentMode::Orchestrator;
5170 agent.tools.register(Arc::new(NamedWriteTool("write")));
5172
5173 let events_task = tokio::spawn(collect_events(handle));
5174 agent.run("Write something".to_string()).await.unwrap();
5175 drop(agent);
5176
5177 let events = events_task.await.unwrap();
5178
5179 let tool_end = events
5181 .iter()
5182 .find(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }));
5183 assert!(tool_end.is_some(), "should have a ToolExecutionEnd event");
5184
5185 if let Some(AgentEvent::ToolExecutionEnd { result, .. }) = tool_end {
5186 assert!(result.is_error, "mode guard should produce an error result");
5187 let text = result.content.iter().find_map(|c| {
5188 if let ContentBlock::Text { text } = c {
5189 Some(text.as_str())
5190 } else {
5191 None
5192 }
5193 });
5194 let text = text.expect("error result should have text");
5195 assert!(
5196 text.contains("write") && text.contains("mode"),
5197 "error should name the tool and mention mode, got: {text}"
5198 );
5199 }
5200 }
5201
5202 #[tokio::test]
5204 async fn agent_mode_enforcement_guard_allows_permitted() {
5205 use crate::config::AgentMode;
5206
5207 let provider = Arc::new(MockProvider::new(vec![
5208 tool_call_response(
5210 "call_1",
5211 "echo",
5212 serde_json::json!({"text": "hello"}),
5213 50,
5214 10,
5215 ),
5216 text_response("Echo succeeded", 50, 10),
5217 ]));
5218
5219 let model = test_model(provider);
5220 let (mut agent, handle) = Agent::new(model, PathBuf::from("/tmp"));
5221 agent.mode = AgentMode::Full;
5223 agent.tools.register(Arc::new(EchoTool));
5224
5225 let events_task = tokio::spawn(collect_events(handle));
5226 agent.run("Echo something".to_string()).await.unwrap();
5227 drop(agent);
5228
5229 let events = events_task.await.unwrap();
5230
5231 let tool_end = events
5233 .iter()
5234 .find(|e| matches!(e, AgentEvent::ToolExecutionEnd { .. }));
5235 assert!(tool_end.is_some());
5236
5237 if let Some(AgentEvent::ToolExecutionEnd { result, .. }) = tool_end {
5238 assert!(!result.is_error, "permitted tool should succeed");
5239 }
5240 }
5241
5242 #[test]
5244 fn agent_mode_enforcement_system_prompt_filters() {
5245 use crate::config::AgentMode;
5246 use crate::system_prompt::{assemble, AssembleParams};
5247 use crate::tools::ToolRegistry;
5248
5249 let mut registry = ToolRegistry::new();
5250 registry.register(Arc::new(NamedWriteTool("write")));
5251 registry.register(Arc::new(NamedWriteTool("edit")));
5252 registry.register(Arc::new(NamedWriteTool("bash")));
5253
5254 struct ReadTool;
5256 #[async_trait]
5257 impl crate::tools::Tool for ReadTool {
5258 fn name(&self) -> &str {
5259 "read"
5260 }
5261 fn label(&self) -> &str {
5262 "Read"
5263 }
5264 fn description(&self) -> &str {
5265 "Read a file"
5266 }
5267 fn parameters(&self) -> serde_json::Value {
5268 serde_json::json!({"type": "object"})
5269 }
5270 fn is_readonly(&self) -> bool {
5271 true
5272 }
5273 async fn execute(
5274 &self,
5275 _: &str,
5276 _: serde_json::Value,
5277 _: crate::tools::ToolContext,
5278 ) -> crate::error::Result<crate::tools::ToolOutput> {
5279 Ok(crate::tools::ToolOutput::text(""))
5280 }
5281 }
5282 registry.register(Arc::new(ReadTool));
5283
5284 let mode = AgentMode::Orchestrator;
5285 let result = assemble(&AssembleParams {
5286 tools: ®istry,
5287 agents_md: &[],
5288 skills: &[],
5289 facts: &[],
5290 project_memory_status: None,
5291 personality: None,
5292 soul: None,
5293 task: None,
5294 role: None,
5295 mode: &mode,
5296 memory: None,
5297 user_profile: None,
5298 cwd: None,
5299 learning_enabled: false,
5300 guardrail_profile: None,
5301 });
5302
5303 assert!(
5305 result.text.contains("- read:"),
5306 "read should be in orchestrator prompt"
5307 );
5308
5309 assert!(
5311 !result.text.contains("- write:"),
5312 "write must not appear in orchestrator prompt"
5313 );
5314 assert!(
5315 !result.text.contains("- edit:"),
5316 "edit must not appear in orchestrator prompt"
5317 );
5318 assert!(
5319 !result.text.contains("- bash:"),
5320 "bash must not appear in orchestrator prompt"
5321 );
5322 }
5323
5324 #[test]
5326 fn agent_mode_enforcement_system_prompt_instructions() {
5327 use crate::config::AgentMode;
5328 use crate::system_prompt::{assemble, AssembleParams};
5329 use crate::tools::ToolRegistry;
5330
5331 let registry = ToolRegistry::new();
5332
5333 let full_result = assemble(&AssembleParams {
5335 tools: ®istry,
5336 agents_md: &[],
5337 skills: &[],
5338 facts: &[],
5339 project_memory_status: None,
5340 personality: None,
5341 soul: None,
5342 task: None,
5343 role: None,
5344 mode: &AgentMode::Full,
5345 memory: None,
5346 user_profile: None,
5347 cwd: None,
5348 learning_enabled: false,
5349 guardrail_profile: None,
5350 });
5351 assert!(
5353 !full_result.text.contains("orchestrator"),
5354 "Full mode should not mention orchestrator"
5355 );
5356 assert!(
5357 !full_result.text.contains("You are a worker agent."),
5358 "Full mode should not include worker mode instructions"
5359 );
5360
5361 let orch_result = assemble(&AssembleParams {
5363 tools: ®istry,
5364 agents_md: &[],
5365 skills: &[],
5366 facts: &[],
5367 project_memory_status: None,
5368 personality: None,
5369 soul: None,
5370 task: None,
5371 role: None,
5372 mode: &AgentMode::Orchestrator,
5373 memory: None,
5374 user_profile: None,
5375 cwd: None,
5376 learning_enabled: false,
5377 guardrail_profile: None,
5378 });
5379 assert!(
5380 orch_result.text.contains("orchestrator"),
5381 "orchestrator prompt should contain mode instructions, got: {}",
5382 orch_result.text
5383 );
5384
5385 let worker_result = assemble(&AssembleParams {
5387 tools: ®istry,
5388 agents_md: &[],
5389 skills: &[],
5390 facts: &[],
5391 project_memory_status: None,
5392 personality: None,
5393 soul: None,
5394 task: None,
5395 role: None,
5396 mode: &AgentMode::Worker,
5397 memory: None,
5398 user_profile: None,
5399 cwd: None,
5400 learning_enabled: false,
5401 guardrail_profile: None,
5402 });
5403 assert!(
5404 worker_result.text.contains("worker"),
5405 "worker prompt should contain mode instructions"
5406 );
5407
5408 let reviewer_result = assemble(&AssembleParams {
5410 tools: ®istry,
5411 agents_md: &[],
5412 skills: &[],
5413 facts: &[],
5414 project_memory_status: None,
5415 personality: None,
5416 soul: None,
5417 task: None,
5418 role: None,
5419 mode: &AgentMode::Reviewer,
5420 memory: None,
5421 user_profile: None,
5422 cwd: None,
5423 learning_enabled: false,
5424 guardrail_profile: None,
5425 });
5426 assert!(
5427 reviewer_result.text.contains("reviewer") || reviewer_result.text.contains("read"),
5428 "reviewer prompt should contain mode instructions"
5429 );
5430 }
5431}