use crate::agent::policy_metrics_snapshot;
use crate::testing::{
setup_full_stack_test_agent_with_extra_tools, setup_test_agent,
setup_test_agent_root_with_extra_tools_and_llm_timeout, setup_test_agent_with_models,
MockProvider, MockTool,
};
use crate::traits::{
ChatOptions, ProviderResponse, ResponseMode, TokenUsage, Tool, ToolCall, ToolCallMetadata,
ToolCallOutcome, ToolCallSemantics, ToolChoiceMode, ToolTargetHintKind, ToolVerificationMode,
};
use crate::types::{ChannelContext, StatusUpdate, UserRole};
use async_trait::async_trait;
use serde_json::{json, Value};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use tokio::sync::Mutex;
#[tokio::test]
async fn response_metrics_capture_direct_return_and_fallthrough_paths() {
let before = policy_metrics_snapshot();
let direct_provider = MockProvider::with_responses(vec![]);
let direct_harness =
setup_test_agent_with_models(direct_provider, "primary-model", "smart-model")
.await
.unwrap();
let direct_reply = direct_harness
.agent
.handle_message(
"metrics_direct",
"Check deployment tomorrow at 9am",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
direct_reply.contains("Reply **confirm** to proceed"),
"expected schedule confirmation direct-return, got: {direct_reply}"
);
assert_eq!(
direct_harness.provider.call_count().await,
0,
"expected deterministic pre-routing to avoid first LLM call"
);
let fallthrough_provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("System inspected."),
]);
let fallthrough_harness =
setup_test_agent_with_models(fallthrough_provider, "primary-model", "smart-model")
.await
.unwrap();
let fallthrough_reply = fallthrough_harness
.agent
.handle_message(
"metrics_fallthrough",
"Check my system status",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(fallthrough_reply, "System inspected.");
let after = policy_metrics_snapshot();
let direct_delta = after
.response_direct_return_total
.saturating_sub(before.response_direct_return_total);
let fallthrough_delta = after
.response_fallthrough_total
.saturating_sub(before.response_fallthrough_total);
assert!(
direct_delta >= 1,
"expected response_direct_return_total to increase by at least 1; before={} after={}",
before.response_direct_return_total,
after.response_direct_return_total
);
assert!(
fallthrough_delta >= 1,
"expected response_fallthrough_total to increase by at least 1; before={} after={}",
before.response_fallthrough_total,
after.response_fallthrough_total
);
}
#[tokio::test]
#[ignore = "tokens_failed_tasks_total / no_progress_iterations_total not yet wired to agent loop"]
async fn failed_task_and_no_progress_metrics_are_observable() {
let before = policy_metrics_snapshot();
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("no_such_tool", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"metrics_failure_no_progress",
"Run system checks repeatedly",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let after = policy_metrics_snapshot();
let failed_tokens_delta = after
.tokens_failed_tasks_total
.saturating_sub(before.tokens_failed_tasks_total);
let no_progress_delta = after
.no_progress_iterations_total
.saturating_sub(before.no_progress_iterations_total);
assert!(
failed_tokens_delta > 0,
"expected tokens_failed_tasks_total to increase; before={} after={}",
before.tokens_failed_tasks_total,
after.tokens_failed_tasks_total
);
assert!(
no_progress_delta >= 1,
"expected no_progress_iterations_total to increase by at least 1; before={} after={}",
before.no_progress_iterations_total,
after.no_progress_iterations_total
);
}
struct RecordingSearchFilesTool {
calls: Arc<Mutex<Vec<String>>>,
}
#[async_trait]
impl Tool for RecordingSearchFilesTool {
fn name(&self) -> &str {
"search_files"
}
fn description(&self) -> &str {
"Mock search_files tool for regression testing"
}
fn schema(&self) -> Value {
json!({
"name": "search_files",
"description": "Mock search",
"parameters": {
"type": "object",
"properties": {
"glob": {"type": "string"},
"path": {"type": "string"}
},
"additionalProperties": true
}
})
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
self.calls.lock().await.push(arguments.to_string());
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let path = args["path"].as_str().unwrap_or(".");
Ok(format!("No matches found (0 files scanned in {})", path))
}
}
struct RecordingProjectInspectTool {
calls: Arc<Mutex<Vec<String>>>,
}
#[async_trait]
impl Tool for RecordingProjectInspectTool {
fn name(&self) -> &str {
"project_inspect"
}
fn description(&self) -> &str {
"Recording project_inspect tool for regression testing"
}
fn schema(&self) -> Value {
json!({
"name": "project_inspect",
"description": "Record project_inspect args",
"parameters": {
"type": "object",
"properties": {
"path": {"type": "string"},
"paths": {"type": "array", "items": {"type": "string"}}
},
"additionalProperties": true
}
})
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
self.calls.lock().await.push(arguments.to_string());
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let primary = args["path"]
.as_str()
.or_else(|| {
args["paths"]
.as_array()
.and_then(|arr| arr.first())
.and_then(|v| v.as_str())
})
.unwrap_or(".");
Ok(format!(
"# Project: {}\n\n## Structure\n```\nindex.html\nstyles.css\n```\n",
primary
))
}
}
struct MockProjectInspectTool;
#[async_trait]
impl Tool for MockProjectInspectTool {
fn name(&self) -> &str {
"project_inspect"
}
fn description(&self) -> &str {
"Mock project_inspect tool for regression testing"
}
fn schema(&self) -> Value {
json!({
"name": "project_inspect",
"description": "Mock inspect",
"parameters": {
"type": "object",
"properties": {
"path": {"type": "string"},
"paths": {"type": "array", "items": {"type": "string"}}
},
"additionalProperties": true
}
})
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let path = args["path"].as_str().unwrap_or(".");
Ok(format!(
"# Project: {}\n\n## Structure\n```\nindex.html\nstyles.css\n```\n",
path
))
}
}
struct CountingSendFileTool {
calls: Arc<AtomicUsize>,
}
#[async_trait]
impl Tool for CountingSendFileTool {
fn name(&self) -> &str {
"send_file"
}
fn description(&self) -> &str {
"Mock send_file tool for force-text characterization"
}
fn schema(&self) -> Value {
json!({
"name": "send_file",
"description": "Mock send file",
"parameters": {
"type": "object",
"properties": {
"file_path": {"type": "string"},
"caption": {"type": "string"}
},
"required": ["file_path"],
"additionalProperties": false
}
})
}
async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
self.calls.fetch_add(1, Ordering::SeqCst);
Ok("File sent successfully.".to_string())
}
}
struct BackgroundDetachTool;
#[async_trait]
impl Tool for BackgroundDetachTool {
fn name(&self) -> &str {
"background_task"
}
fn description(&self) -> &str {
"Mock tool that detaches work to the background"
}
fn schema(&self) -> Value {
json!({
"name": "background_task",
"description": "Mock background detach",
"parameters": {
"type": "object",
"properties": {
"job": {"type": "string"}
},
"additionalProperties": false
}
})
}
async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
Ok("Background job started.".to_string())
}
async fn call_with_status_outcome(
&self,
arguments: &str,
status_tx: Option<tokio::sync::mpsc::Sender<StatusUpdate>>,
) -> anyhow::Result<ToolCallOutcome> {
let _ = (arguments, status_tx);
Ok(ToolCallOutcome {
output: "Background job started.".to_string(),
metadata: ToolCallMetadata {
background_started: true,
detached: true,
completion_notifications_enabled: true,
..ToolCallMetadata::default()
},
})
}
}
struct MockRemoteMutationTool;
#[async_trait]
impl Tool for MockRemoteMutationTool {
fn name(&self) -> &str {
"update_remote"
}
fn description(&self) -> &str {
"Mock tool that updates a remote URL"
}
fn schema(&self) -> Value {
json!({
"name": "update_remote",
"description": "Mock remote update",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string"}
},
"required": ["url"],
"additionalProperties": false
}
})
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let url = args["url"].as_str().unwrap_or("https://example.com/status");
Ok(format!("Updated {}", url))
}
fn call_semantics(&self, arguments: &str) -> ToolCallSemantics {
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let url = args["url"].as_str().unwrap_or("https://example.com/status");
ToolCallSemantics::mutation().with_target_hint(ToolTargetHintKind::Url, url)
}
}
struct MockRemoteObservationTool;
#[async_trait]
impl Tool for MockRemoteObservationTool {
fn name(&self) -> &str {
"check_remote"
}
fn description(&self) -> &str {
"Mock tool that checks a remote URL"
}
fn schema(&self) -> Value {
json!({
"name": "check_remote",
"description": "Mock remote check",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string"}
},
"required": ["url"],
"additionalProperties": false
}
})
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let url = args["url"].as_str().unwrap_or("https://example.com/status");
Ok(format!("Verified {} shows the updated status.", url))
}
fn call_semantics(&self, arguments: &str) -> ToolCallSemantics {
let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
let url = args["url"].as_str().unwrap_or("https://example.com/status");
ToolCallSemantics::observation()
.with_verification_mode(ToolVerificationMode::ResultContent)
.with_target_hint(ToolTargetHintKind::Url, url)
}
}
#[tokio::test]
async fn force_text_characterization_strips_tools_after_duplicate_send_file() {
let send_file_args =
r#"{"file_path":"/tmp/aidaemon-characterization.pdf","caption":"Characterization"}"#;
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::text_response("Done. I already sent the file."),
]);
let send_file_calls = Arc::new(AtomicUsize::new(0));
let harness = setup_full_stack_test_agent_with_extra_tools(
provider,
vec![Arc::new(CountingSendFileTool {
calls: send_file_calls.clone(),
}) as Arc<dyn Tool>],
)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"force_text_characterization",
"Send me the characterization PDF",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Done. I already sent the file.");
assert_eq!(
send_file_calls.load(Ordering::SeqCst),
1,
"duplicate send_file calls should be suppressed before force-text closeout"
);
let call_log = harness.provider.call_log.lock().await.clone();
assert!(
call_log.last().is_some_and(|call| !call.tools.is_empty()
&& call.options.tool_choice == crate::traits::ToolChoiceMode::None),
"force-text closeout retains tool defs (prompt-prefix stability) and \
disables calling via tool_choice=none: {:?}",
call_log.last().map(|call| &call.options.tool_choice)
);
}
#[tokio::test]
async fn verification_characterization_blocks_completion_until_matching_observation() {
let url = "https://example.com/status";
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("update_remote", &json!({"url": url}).to_string()),
MockProvider::text_response("Updated it."),
MockProvider::tool_call_response("check_remote", &json!({"url": url}).to_string()),
MockProvider::text_response("Updated and verified the status page."),
]);
let harness = setup_full_stack_test_agent_with_extra_tools(
provider,
vec![
Arc::new(MockRemoteMutationTool) as Arc<dyn Tool>,
Arc::new(MockRemoteObservationTool) as Arc<dyn Tool>,
],
)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"verification_characterization",
&format!("Update {} and verify it.", url),
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Updated and verified the status page.");
assert_eq!(
harness.provider.call_count().await,
4,
"the premature final text should be blocked so the verification tool can run"
);
let call_log = harness.provider.call_log.lock().await.clone();
assert!(
call_log.iter().any(|call| {
call.messages.iter().any(|message| {
message.get("role").and_then(|v| v.as_str()) == Some("system")
&& message
.get("content")
.and_then(|v| v.as_str())
.is_some_and(|content| {
content.contains("final verification step")
|| content.contains("verification")
})
})
}),
"verification guard should inject a verification-required system directive"
);
}
#[tokio::test]
async fn stall_characterization_stops_repeated_unknown_tool_before_final_text() {
let mut responses = Vec::new();
for attempt in 1..=7 {
responses.push({
let mut resp = MockProvider::tool_call_response("unknown_stall_tool", "{}");
resp.content = Some(format!("I'll retry the same tool, attempt {}.", attempt));
resp
});
}
responses.push(MockProvider::text_response("This should not be reached."));
let provider = MockProvider::with_responses(responses);
let harness = setup_test_agent(provider).await.unwrap();
let reply = harness
.agent
.handle_message(
"stall_characterization",
"Use the unavailable stall tool",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!reply.contains("This should not be reached"),
"stall detection should stop repeated unknown-tool attempts before the scripted final text"
);
assert!(
harness.provider.call_count().await < 8,
"stall detection should stop early; provider calls: {}",
harness.provider.call_count().await
);
}
#[tokio::test]
async fn truncation_characterization_reassembles_mid_sentence_text_continuation() {
let prefix = format!(
"{} ",
std::iter::repeat_n("partial", 205)
.collect::<Vec<_>>()
.join(" ")
);
let continuation = "and the final sentence is complete.";
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(&prefix),
MockProvider::text_response(continuation),
]);
let harness = setup_test_agent(provider).await.unwrap();
let reply = harness
.agent
.handle_message(
"truncation_characterization",
"Draft a long status update",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, format!("{}{}", prefix, continuation));
assert_eq!(
harness.provider.call_count().await,
2,
"truncated first response should trigger exactly one continuation pass"
);
let call_log = harness.provider.call_log.lock().await.clone();
assert!(
call_log.last().is_some_and(|call| {
call.messages.iter().any(|message| {
message.get("role").and_then(|v| v.as_str()) == Some("system")
&& message
.get("content")
.and_then(|v| v.as_str())
.is_some_and(|content| {
content.contains("previous text response was cut off mid-sentence")
&& content.contains("Continue your response")
})
})
}),
"continuation pass should include the truncation recovery directive"
);
}
#[tokio::test]
async fn truncation_characterization_keeps_prefix_when_short_tail_repeats_earlier_phrase() {
let prefix = format!(
"Which company or role are you targeting? {} The AI Expert resume is the ch",
std::iter::repeat_n("detail", 205)
.collect::<Vec<_>>()
.join(" ")
);
let continuation = "osen one even stronger. Which company or role?";
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(&prefix),
MockProvider::text_response(continuation),
]);
let harness = setup_test_agent(provider).await.unwrap();
let reply = harness
.agent
.handle_message(
"truncation_short_overlapping_tail",
"Which resume should I send?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, format!("{}{}", prefix, continuation));
}
#[tokio::test]
async fn background_ack_characterization_forces_text_with_handoff_directive() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("background_task", r#"{"job":"long-running"}"#),
MockProvider::text_response("This model text should be ignored."),
]);
let harness = setup_full_stack_test_agent_with_extra_tools(
provider,
vec![Arc::new(BackgroundDetachTool) as Arc<dyn Tool>],
)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"background_ack_characterization",
"Start a long running background job",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "This model text should be ignored.");
let call_log = harness.provider.call_log.lock().await.clone();
assert_eq!(
call_log.len(),
2,
"background detach currently runs one forced text summary pass after the tool call"
);
assert!(
call_log.last().is_some_and(|call| !call.tools.is_empty()
&& call.options.tool_choice == crate::traits::ToolChoiceMode::None),
"background detach retains tool defs (prompt-prefix stability) and \
disables calling via tool_choice=none on the forced text pass"
);
assert!(
call_log.last().is_some_and(|call| {
call.messages.iter().any(|message| {
message.get("role").and_then(|v| v.as_str()) == Some("system")
&& message
.get("content")
.and_then(|v| v.as_str())
.is_some_and(|content| {
content.contains("A background task is now running")
&& content.contains("completion notifications are enabled")
})
})
}),
"background detach should carry a handoff directive into the forced text pass"
);
}
#[tokio::test]
async fn contradictory_file_evidence_forces_recheck_before_final_answer() {
let project_dir = tempfile::tempdir().unwrap();
let project_dir_str = project_dir.path().to_string_lossy().to_string();
let search_calls: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("search_files", &json!({"glob":"*.html"}).to_string()),
MockProvider::tool_call_response(
"project_inspect",
&json!({"path": project_dir_str}).to_string(),
),
MockProvider::text_response("I couldn't find any HTML files."),
MockProvider::tool_call_response(
"search_files",
&json!({"glob":"*.html", "path": project_dir_str}).to_string(),
),
MockProvider::text_response(
"After re-checking with an explicit path, I still have no HTML matches.",
),
]);
let harness = setup_full_stack_test_agent_with_extra_tools(
provider,
vec![
Arc::new(RecordingSearchFilesTool {
calls: search_calls.clone(),
}) as Arc<dyn Tool>,
Arc::new(MockProjectInspectTool) as Arc<dyn Tool>,
],
)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"contradictory_file_recheck",
&format!("Find HTML files under {}", project_dir_str),
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(
reply,
"After re-checking with an explicit path, I still have no HTML matches."
);
assert_eq!(harness.provider.call_count().await, 5);
let calls = search_calls.lock().await.clone();
assert_eq!(calls.len(), 2, "expected initial search + forced re-check");
assert!(
calls[0].contains("\"path\"") && calls[0].contains(&project_dir_str),
"expected first search_files call to receive injected project path, got: {}",
calls[0]
);
let call_log = harness.provider.call_log.lock().await.clone();
let contradiction_nudge_seen = call_log.iter().any(|entry| {
entry.messages.iter().any(|m| {
m.get("role").and_then(|v| v.as_str()) == Some("system")
&& m.get("content")
.and_then(|v| v.as_str())
.is_some_and(|c| c.contains("Contradictory file evidence was detected"))
})
});
assert!(
contradiction_nudge_seen,
"expected contradiction re-check system nudge in provider context"
);
}
#[tokio::test]
async fn budget_blocked_same_tool_calls_do_not_trigger_false_consecutive_loop_stop() {
let burst_calls: Vec<ToolCall> = (0..20)
.map(|idx| ToolCall {
id: format!("call_{}", idx),
name: "project_inspect".to_string(),
arguments: json!({"path": format!("/tmp/project_{}", idx)}).to_string(),
extra_content: None,
})
.collect();
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: None,
tool_calls: burst_calls,
usage: Some(TokenUsage {
input_tokens: 10,
output_tokens: 10,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
MockProvider::text_response("Summarized project status."),
]);
let harness = setup_full_stack_test_agent_with_extra_tools(
provider,
vec![Arc::new(MockProjectInspectTool) as Arc<dyn Tool>],
)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"budget_vs_loop_ordering",
"Inspect all these project folders and summarize",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Summarized project status.");
}
#[tokio::test]
#[ignore = "project directory scope constraints not yet fully wired"]
async fn mixed_project_inspect_path_and_paths_preserves_primary_path_for_follow_up_tools() {
let primary_dir = tempfile::tempdir().unwrap();
let secondary_dir = tempfile::tempdir().unwrap();
let primary_dir_str = primary_dir.path().to_string_lossy().to_string();
let secondary_dir_str = secondary_dir.path().to_string_lossy().to_string();
let search_calls: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
let inspect_calls: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response(
"project_inspect",
&json!({
"path": primary_dir_str,
"paths": [primary_dir_str, secondary_dir_str]
})
.to_string(),
),
MockProvider::tool_call_response("search_files", &json!({"glob":"*.html"}).to_string()),
MockProvider::tool_call_response(
"search_files",
&json!({"glob":"*.html", "path": primary_dir.path().to_string_lossy()}).to_string(),
),
MockProvider::text_response("Inspection complete."),
]);
let harness = setup_full_stack_test_agent_with_extra_tools(
provider,
vec![
Arc::new(RecordingSearchFilesTool {
calls: search_calls.clone(),
}) as Arc<dyn Tool>,
Arc::new(RecordingProjectInspectTool {
calls: inspect_calls.clone(),
}) as Arc<dyn Tool>,
],
)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"mixed_project_inspect_path_paths",
"Inspect both project folders and find HTML files",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Inspection complete.");
let inspect_args = inspect_calls.lock().await.clone();
assert_eq!(inspect_args.len(), 1, "expected one project_inspect call");
assert!(
inspect_args[0].contains("\"path\"") && inspect_args[0].contains("\"paths\""),
"expected mixed path+paths args in project_inspect call, got: {}",
inspect_args[0]
);
let search_args = search_calls.lock().await.clone();
assert_eq!(
search_args.len(),
2,
"expected one follow-up search_files call plus required explicit re-check"
);
assert!(
search_args[0].contains(&format!("\"path\":\"{}\"", primary_dir.path().display())),
"expected first search_files call to inherit primary path from project_inspect(path), got: {}",
search_args[0]
);
}
#[tokio::test]
async fn replay_trace_yes_do_it_with_sanitized_response_analysis_falls_through_to_tools() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"arguments:\nname: terminal\ncommand: ls\n\
[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":true,\"is_acknowledgment\":true}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Applied the requested changes."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"replay_yes_do_it",
"Yes, do it.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Applied the requested changes.");
assert!(
harness.provider.call_count().await >= 3,
"expected initial routing call + tool-call + final response path"
);
}
#[tokio::test]
async fn replay_trace_deferred_planning_text_does_not_stall_before_first_tool_call() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I'll search for all Rust files with async fn first."),
MockProvider::text_response("Next I'll inspect each file and count async functions."),
MockProvider::text_response("I'm going to run the search now."),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Found the files and compiled the async summary."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"replay_pre_tool_deferral",
"Find all Rust files that contain async fn and give me the top 3 files.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!reply.is_empty(),
"Agent should return a non-empty response"
);
assert!(
harness.provider.call_count().await >= 3,
"expected at least a few retries before deferred/no-tool recovery"
);
let call_log = harness.provider.call_log.lock().await.clone();
assert!(
!call_log
.iter()
.any(|entry| matches!(entry.options.response_mode, ResponseMode::JsonSchema { .. })),
"text-only schema pass should be disabled"
);
}
#[tokio::test]
async fn deferred_no_tool_forced_required_resets_after_first_successful_tool_call() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Need to inspect first.\n\
[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true}",
),
MockProvider::text_response("I'll inspect the machine first."),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("I'll format the final summary next."),
MockProvider::text_response("Final summary: system inspection completed."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"deferred_no_tool_reset_after_success",
"Inspect my system and summarize it.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Final summary: system inspection completed.");
let call_log = harness.provider.call_log.lock().await.clone();
assert!(
!call_log.is_empty(),
"expected provider calls to be recorded"
);
assert!(
call_log
.iter()
.all(|entry| !matches!(entry.options.tool_choice, ToolChoiceMode::Required)),
"expected no Required tool-choice for a non-tool-classified user text"
);
}
#[tokio::test]
async fn failed_specialist_plan_reply_pivots_to_direct_tools() {
let incomplete_plan = "I've started breaking down your goal into specific tasks. I've created \
a plan to first research the 2026 AI job market and then synthesize that into your personalized \
morning briefing.\n\nI attempted to launch a research specialist, but the request timed out. I'm \
monitoring the system and will retry the research task as soon as the connection is stable.\n\n\
Current Plan:\n1. Research Phase: Deep dive into trends, roles, and skills.\n\
2. Synthesis Phase: Organize findings into a morning briefing.";
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response(
"spawn_agent",
r#"{"mission":"Research AI jobs","task":"Produce current findings"}"#,
),
MockProvider::text_response(incomplete_plan),
MockProvider::text_response(incomplete_plan),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response(
"Market Snapshot: applied AI engineering remains the strongest target. \
Target Roles: GenAI engineer and AI product manager. Interview Edge: prepare concrete \
examples of evaluation, deployment, and agent reliability work.",
),
]);
let spawn_tool: Arc<dyn Tool> = Arc::new(MockTool::new(
"spawn_agent",
"Mock failed specialist delegation",
"Error: specialist timed out after 300 seconds",
));
let harness =
setup_test_agent_root_with_extra_tools_and_llm_timeout(provider, vec![spawn_tool], None)
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"failed_specialist_plan_pivot",
"Research the 2026 AI job market and produce my morning briefing.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
reply.contains("Market Snapshot:"),
"unexpected reply: {reply}"
);
assert!(!reply.contains("monitoring the system"));
assert!(
harness.provider.call_count().await >= 5,
"repeated incomplete plans should trigger another tool-backed iteration"
);
let calls = harness.provider.call_log.lock().await;
assert!(
calls.iter().any(|call| {
call.messages.iter().any(|message| {
message
.get("content")
.and_then(Value::as_str)
.is_some_and(|content| {
content.contains("Specialist delegation failed")
&& content.contains("available direct tools")
})
})
}),
"failed delegation should inject direct-tool recovery guidance"
);
}
#[tokio::test]
async fn provider_option_rejection_falls_back_to_default_chat() {
let provider = MockProvider::with_responses(vec![MockProvider::text_response("Got it.")])
.rejecting_non_default_options();
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let reply = harness
.agent
.handle_message(
"provider_option_rejection_fallback",
"Yes",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(reply, "Got it.");
let call_log = harness.provider.call_log.lock().await.clone();
assert!(!call_log.is_empty(), "expected at least one provider call");
assert!(
call_log
.iter()
.all(|entry| entry.options == ChatOptions::default()),
"expected default chat options when the text-only pass is disabled"
);
}