#[tokio::test]
async fn test_full_stack_website_exploration_no_stall() {
let mut responses: Vec<ProviderResponse> = Vec::new();
let commands = [
("Let me explore the project.", r#"{"command": "ls -la"}"#),
("Checking system.", r#"{"command": "pwd"}"#),
("Git status.", r#"{"command": "git status"}"#),
("OS info.", r#"{"command": "uname -a"}"#),
("Who am I.", r#"{"command": "whoami"}"#),
("Current date.", r#"{"command": "date"}"#),
("Disk space.", r#"{"command": "df -h ."}"#),
("Environment.", r#"{"command": "env | head -5"}"#),
("Shell.", r#"{"command": "echo $SHELL"}"#),
("Hostname.", r#"{"command": "hostname"}"#),
("Uptime.", r#"{"command": "uptime"}"#),
("Process list.", r#"{"command": "ps aux | head -3"}"#),
];
for (narration, args) in &commands {
let mut resp = MockProvider::tool_call_response("terminal", args);
resp.content = Some(narration.to_string());
responses.push(resp);
}
responses.push(MockProvider::text_response(
"Done! Here's the complete summary of the system exploration.",
));
let harness = setup_full_stack_test_agent(MockProvider::with_responses(responses))
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test",
"Explore the current system thoroughly — check files, git, OS, user, disk, and processes.",
None,
UserRole::Owner,
ChannelContext {
visibility: ChannelVisibility::Private,
platform: "telegram".to_string(),
channel_name: None,
channel_id: None,
sender_name: Some("Alice".to_string()),
sender_id: Some("telegram:12345".to_string()),
channel_member_names: vec![],
user_id_map: std::collections::HashMap::new(),
trusted: false,
},
None,
)
.await
.unwrap();
assert!(
!response.is_empty(),
"Agent should return a non-empty response"
);
assert!(
!response.contains("stuck in a loop"),
"Should not trigger stall detection for diverse terminal commands"
);
}
#[tokio::test]
async fn test_full_stack_duplicate_commands_no_stall() {
let mut responses: Vec<ProviderResponse> = Vec::new();
let commands = [
("Checking project.", r#"{"command": "ls -la"}"#),
("Git info.", r#"{"command": "git status"}"#),
("Let me re-check.", r#"{"command": "ls -la"}"#),
("Remote.", r#"{"command": "git remote -v"}"#),
("Verify remote.", r#"{"command": "git remote -v"}"#),
("Date check.", r#"{"command": "date"}"#),
("Hostname.", r#"{"command": "hostname"}"#),
("Check again.", r#"{"command": "hostname"}"#),
("User.", r#"{"command": "whoami"}"#),
("Shell.", r#"{"command": "echo $SHELL"}"#),
];
for (narration, args) in &commands {
let mut resp = MockProvider::tool_call_response("terminal", args);
resp.content = Some(narration.to_string());
responses.push(resp);
}
responses.push(MockProvider::text_response(
"Done! Here's what I found about the system.",
));
let harness = setup_full_stack_test_agent(MockProvider::with_responses(responses))
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test",
"Check the project — files, git status, remote, hostname, user.",
None,
UserRole::Owner,
ChannelContext {
visibility: ChannelVisibility::Private,
platform: "telegram".to_string(),
channel_name: None,
channel_id: None,
sender_name: Some("Alice".to_string()),
sender_id: Some("telegram:12345".to_string()),
channel_member_names: vec![],
user_id_map: std::collections::HashMap::new(),
trusted: false,
},
None,
)
.await
.unwrap();
assert!(
!response.is_empty(),
"Agent should return a non-empty response"
);
assert!(
!response.contains("stuck in a loop"),
"Duplicate commands with diverse patterns should not trigger stall"
);
}
#[tokio::test]
async fn test_full_stack_cli_agent_then_terminal_followup() {
let mut responses: Vec<ProviderResponse> = Vec::new();
{
let mut resp = MockProvider::tool_call_response(
"cli_agent",
r#"{"action":"run","tool":"claude","prompt":"build website"}"#,
);
resp.content = Some("I'll delegate the website build to the CLI agent.".to_string());
responses.push(resp);
}
let followup_commands = [
("CLI agent done. Let me verify.", r#"{"command": "ls -la"}"#),
("Git status.", r#"{"command": "git status"}"#),
("Check remote.", r#"{"command": "git remote -v"}"#),
("Who.", r#"{"command": "whoami"}"#),
("Date.", r#"{"command": "date"}"#),
("Pwd.", r#"{"command": "pwd"}"#),
("Uptime.", r#"{"command": "uptime"}"#),
("Host.", r#"{"command": "hostname"}"#),
];
for (narration, args) in &followup_commands {
let mut resp = MockProvider::tool_call_response("terminal", args);
resp.content = Some(narration.to_string());
responses.push(resp);
}
responses.push(MockProvider::text_response(
"Done! Website deployed successfully.",
));
let cli_agent_mock = Arc::new(MockTool::new(
"cli_agent",
"Delegates tasks to CLI agents",
"Website built successfully. Files in /tmp/my-website",
));
let harness = setup_full_stack_test_agent_with_extra_tools(
MockProvider::with_responses(responses),
vec![cli_agent_mock as Arc<dyn crate::traits::Tool>],
)
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test",
"Build a website about cars then verify everything is set up correctly.",
None,
UserRole::Owner,
ChannelContext {
visibility: ChannelVisibility::Private,
platform: "telegram".to_string(),
channel_name: None,
channel_id: None,
sender_name: Some("Alice".to_string()),
sender_id: Some("telegram:12345".to_string()),
channel_member_names: vec![],
user_id_map: std::collections::HashMap::new(),
trusted: false,
},
None,
)
.await
.unwrap();
assert!(
!response.is_empty(),
"Agent should return a non-empty response"
);
}
#[tokio::test]
async fn test_full_stack_status_updates_received() {
let responses = vec![
{
let mut resp =
MockProvider::tool_call_response("terminal", r#"{"command": "echo hello"}"#);
resp.content = Some("Let me check something.".to_string());
resp
},
MockProvider::text_response("Done! All good."),
];
let harness = setup_full_stack_test_agent(MockProvider::with_responses(responses))
.await
.unwrap();
let (status_tx, mut status_rx) = tokio::sync::mpsc::channel::<StatusUpdate>(64);
let response = harness
.agent
.handle_message(
"telegram_test",
"Run echo hello",
Some(status_tx),
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert!(
response.contains("All good"),
"Agent should complete normally. Got: {}",
response
);
let mut updates = Vec::new();
while let Ok(update) = status_rx.try_recv() {
updates.push(update);
}
let has_tool_start = updates
.iter()
.any(|u| matches!(u, StatusUpdate::ToolStart { name, .. } if name == "running a command"));
let has_tool_complete = updates.iter().any(
|u| matches!(u, StatusUpdate::ToolComplete { name, .. } if name == "running a command"),
);
let has_thinking = updates
.iter()
.any(|u| matches!(u, StatusUpdate::Thinking(_)));
assert!(
has_tool_start,
"Should have received ToolStart for terminal. Updates: {:?}",
updates
);
assert!(
has_thinking,
"Should have received at least one Thinking update. Updates: {:?}",
updates
);
assert!(
has_tool_complete,
"Should have received ToolComplete for terminal. Updates: {:?}",
updates
);
}
struct ExternalActionTool;
#[async_trait::async_trait]
impl crate::traits::Tool for ExternalActionTool {
fn name(&self) -> &str {
"external_action"
}
fn description(&self) -> &str {
"Writes to an external service for testing."
}
fn schema(&self) -> serde_json::Value {
json!({
"name": "external_action",
"description": self.description(),
"parameters": {
"type": "object",
"properties": {}
}
})
}
fn capabilities(&self) -> crate::traits::ToolCapabilities {
crate::traits::ToolCapabilities {
read_only: false,
external_side_effect: true,
needs_approval: false,
idempotent: true,
high_impact_write: false,
}
}
async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
Ok("Created remote record id=abc123".to_string())
}
}
struct PreWrappedExternalActionTool;
#[async_trait::async_trait]
impl crate::traits::Tool for PreWrappedExternalActionTool {
fn name(&self) -> &str {
"prewrapped_external_action"
}
fn description(&self) -> &str {
"Writes to an external service and returns already-wrapped output."
}
fn schema(&self) -> serde_json::Value {
json!({
"name": "prewrapped_external_action",
"description": self.description(),
"parameters": {
"type": "object",
"properties": {}
}
})
}
fn capabilities(&self) -> crate::traits::ToolCapabilities {
crate::traits::ToolCapabilities {
read_only: false,
external_side_effect: true,
needs_approval: false,
idempotent: true,
high_impact_write: false,
}
}
async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
Ok(crate::tools::sanitize::wrap_untrusted_output(
"prewrapped_external_action",
"Created remote record id=wrapped123",
))
}
}
struct UrlProbeTool;
#[async_trait::async_trait]
impl crate::traits::Tool for UrlProbeTool {
fn name(&self) -> &str {
"url_probe"
}
fn description(&self) -> &str {
"Reads a URL for verification testing."
}
fn schema(&self) -> serde_json::Value {
json!({
"name": "url_probe",
"description": self.description(),
"parameters": {
"type": "object",
"properties": {
"url": { "type": "string" }
},
"required": ["url"],
"additionalProperties": false
}
})
}
fn capabilities(&self) -> crate::traits::ToolCapabilities {
crate::traits::ToolCapabilities {
read_only: true,
external_side_effect: false,
needs_approval: false,
idempotent: true,
high_impact_write: false,
}
}
async fn call(&self, arguments: &str) -> anyhow::Result<String> {
let args: serde_json::Value = serde_json::from_str(arguments)?;
let url = args
.get("url")
.and_then(|value| value.as_str())
.ok_or_else(|| anyhow::anyhow!("missing url"))?;
Ok(format!("Checked {} and confirmed the posts are visible.", url))
}
}
#[tokio::test]
async fn test_successful_external_action_timeout_returns_deterministic_completion() {
let responses = vec![
{
let mut resp = MockProvider::tool_call_response("external_action", "{}");
resp.content = Some("I'll handle that.".to_string());
resp
},
MockProvider::text_response("This reply should time out before it is used."),
];
let harness = crate::testing::setup_test_agent_with_extra_tools_and_llm_timeout(
MockProvider::with_delayed_responses(
responses,
vec![std::time::Duration::ZERO, std::time::Duration::from_secs(2)],
),
vec![Arc::new(ExternalActionTool)],
Some(1),
)
.await
.unwrap();
let (status_tx, mut status_rx) = tokio::sync::mpsc::channel::<StatusUpdate>(64);
let response = tokio::time::timeout(
std::time::Duration::from_secs(3),
harness.agent.handle_message(
"external_action_timeout",
"Create the remote record.",
Some(status_tx),
UserRole::Owner,
ChannelContext::private("test"),
None,
),
)
.await
.expect("request should not hang")
.unwrap();
assert!(
response.contains("The requested action completed successfully."),
"response should use deterministic completion ack: {}",
response
);
assert!(
response.contains("Created remote record id=abc123"),
"response should include the latest external action result: {}",
response
);
let mut updates = Vec::new();
while let Ok(update) = status_rx.try_recv() {
updates.push(update);
}
assert!(
updates.iter().any(|update| matches!(
update,
StatusUpdate::ToolComplete { name, summary }
if name == "external_action" && summary.contains("Created remote record id=abc123")
)),
"expected ToolComplete status update for external_action, got: {:?}",
updates
);
}
#[tokio::test]
async fn test_prewrapped_external_action_timeout_keeps_latest_result() {
let responses = vec![
{
let mut resp = MockProvider::tool_call_response("prewrapped_external_action", "{}");
resp.content = Some("I'll handle that.".to_string());
resp
},
MockProvider::text_response("This reply should time out before it is used."),
];
let harness = crate::testing::setup_test_agent_with_extra_tools_and_llm_timeout(
MockProvider::with_delayed_responses(
responses,
vec![std::time::Duration::ZERO, std::time::Duration::from_secs(2)],
),
vec![Arc::new(PreWrappedExternalActionTool)],
Some(1),
)
.await
.unwrap();
let response = tokio::time::timeout(
std::time::Duration::from_secs(3),
harness.agent.handle_message(
"prewrapped_external_action_timeout",
"Create the remote record.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
),
)
.await
.expect("request should not hang")
.unwrap();
assert!(
response.contains("The requested action completed successfully."),
"response should use deterministic completion ack: {}",
response
);
assert!(
response.contains("Created remote record id=wrapped123"),
"response should preserve the latest external action result: {}",
response
);
assert!(
!response.contains("UNTRUSTED EXTERNAL DATA"),
"response should not leak wrapper markers into the final reply: {}",
response
);
}
#[tokio::test]
async fn test_visible_outcome_request_requires_matching_verification_before_completion() {
let responses = vec![
{
let mut resp = MockProvider::tool_call_response("external_action", "{}");
resp.content = Some("I'll fix that.".to_string());
resp
},
MockProvider::text_response("Done."),
MockProvider::tool_call_response(
"url_probe",
r#"{"url":"https://blog.aidaemon.ai"}"#,
),
MockProvider::text_response(
"I checked https://blog.aidaemon.ai and the posts are now visible.",
),
];
let harness = crate::testing::setup_test_agent_with_extra_tools_and_llm_timeout(
MockProvider::with_responses(responses),
vec![Arc::new(ExternalActionTool), Arc::new(UrlProbeTool)],
None,
)
.await
.unwrap();
let response = harness
.agent
.handle_message(
"visible_outcome_verification",
"I still don't see the posts here: https://blog.aidaemon.ai",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("posts are now visible"),
"final response should reflect a verified outcome: {}",
response
);
assert!(
!response.contains("The requested action completed successfully."),
"generic success ack should be blocked until verification completes: {}",
response
);
assert_eq!(
harness.provider.call_count().await,
4,
"agent should continue past the low-signal completion and perform verification"
);
}
#[tokio::test]
async fn test_full_stack_duplicate_send_file_suppressed() {
struct CountingSendFileTool {
calls: Arc<AtomicUsize>,
}
#[async_trait::async_trait]
impl crate::traits::Tool for CountingSendFileTool {
fn name(&self) -> &str {
"send_file"
}
fn description(&self) -> &str {
"Test send_file tool that counts executions."
}
fn schema(&self) -> serde_json::Value {
json!({
"name": "send_file",
"description": self.description(),
"parameters": {
"type": "object",
"properties": {
"file_path": { "type": "string" },
"caption": { "type": "string" }
},
"required": ["file_path"]
}
})
}
async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
self.calls.fetch_add(1, Ordering::SeqCst);
Ok("File sent by counting send_file tool".to_string())
}
}
let send_file_args = r#"{"file_path":"/Users/testuser/projects/acme-corp/proposal/sow-project-plan.pdf","caption":"Here is the SOW PDF from the Acme project."}"#;
let responses = vec![
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::text_response("Done. I sent the file."),
];
let send_file_calls = Arc::new(AtomicUsize::new(0));
let send_file_tool = Arc::new(CountingSendFileTool {
calls: send_file_calls.clone(),
});
let harness = setup_full_stack_test_agent_with_extra_tools(
MockProvider::with_responses(responses),
vec![send_file_tool as Arc<dyn crate::traits::Tool>],
)
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test",
"Send me the SOW PDF from the Lodestar project",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert!(
response.contains("Done."),
"Agent should complete normally. Got: {}",
response
);
assert_eq!(
send_file_calls.load(Ordering::SeqCst),
1,
"send_file should execute only once for duplicate identical calls"
);
let history = harness
.state
.get_history("telegram_test", 200)
.await
.unwrap();
let dedupe_msgs = history
.iter()
.filter(|m| {
m.role == "tool"
&& m.tool_name.as_deref() == Some("send_file")
&& m.content
.as_deref()
.is_some_and(|c| c.contains("Duplicate send_file suppressed"))
})
.count();
assert_eq!(
dedupe_msgs, 1,
"Expected one dedupe tool message for suppressed duplicate send_file"
);
}
#[tokio::test]
async fn test_duplicate_send_file_forces_text_closeout() {
struct CountingSendFileTool {
calls: Arc<AtomicUsize>,
}
#[async_trait::async_trait]
impl crate::traits::Tool for CountingSendFileTool {
fn name(&self) -> &str {
"send_file"
}
fn description(&self) -> &str {
"Test send_file tool that counts executions."
}
fn schema(&self) -> serde_json::Value {
json!({
"name": "send_file",
"description": self.description(),
"parameters": {
"type": "object",
"properties": {
"file_path": { "type": "string" },
"caption": { "type": "string" }
},
"required": ["file_path"]
}
})
}
async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
self.calls.fetch_add(1, Ordering::SeqCst);
Ok("File sent by counting send_file tool".to_string())
}
}
let send_file_args = r#"{"file_path":"/Users/testuser/projects/acme-corp/proposal/sow-project-plan.pdf","caption":"Here is the SOW PDF from the Acme project."}"#;
let responses = vec![
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::tool_call_response("send_file", send_file_args),
MockProvider::text_response("Done. I already sent the file."),
];
let send_file_calls = Arc::new(AtomicUsize::new(0));
let send_file_tool = Arc::new(CountingSendFileTool {
calls: send_file_calls.clone(),
});
let harness = setup_full_stack_test_agent_with_extra_tools(
MockProvider::with_responses(responses),
vec![send_file_tool as Arc<dyn crate::traits::Tool>],
)
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test_force_text",
"Send me the SOW PDF from the Lodestar project",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert!(
response.contains("Done. I already sent the file."),
"Agent should close out with plain text after duplicate send_file. Got: {}",
response
);
assert_eq!(
send_file_calls.load(Ordering::SeqCst),
1,
"Only the first send_file call should execute"
);
let history = harness
.state
.get_history("telegram_test_force_text", 200)
.await
.unwrap();
let dedupe_msgs = history
.iter()
.filter(|m| {
m.role == "tool"
&& m.tool_name.as_deref() == Some("send_file")
&& m.content
.as_deref()
.is_some_and(|c| c.contains("Duplicate send_file suppressed"))
})
.count();
assert_eq!(
dedupe_msgs, 1,
"Expected exactly one duplicate suppression message"
);
}
#[tokio::test]
async fn test_full_stack_deployed_site_url_lookup_no_stall() {
let mut responses: Vec<ProviderResponse> = Vec::new();
let commands = [
(
"Let me check the git remote to find the deployment URL.",
r#"{"command": "git remote -v"}"#,
),
(
"Let me look for deployment configuration files.",
r#"{"command": "ls -la"}"#,
),
(
"Checking for a CNAME or deployment config.",
r#"{"command": "ls public/ 2>/dev/null || echo 'no public dir'"}"#,
),
(
"Let me check package.json for deployment scripts.",
r#"{"command": "cat package.json 2>/dev/null || echo 'no package.json'"}"#,
),
(
"Looking for Vercel or Netlify config.",
r#"{"command": "ls vercel.json netlify.toml .vercel 2>/dev/null || echo 'none found'"}"#,
),
(
"Checking environment variables for URLs.",
r#"{"command": "env | grep -i url || echo 'no URL env vars'"}"#,
),
(
"Let me check git log for deployment commits.",
r#"{"command": "git log --oneline -5 2>/dev/null || echo 'not a git repo'"}"#,
),
(
"Checking for GitHub Pages or similar config.",
r#"{"command": "cat CNAME 2>/dev/null || echo 'no CNAME'"}"#,
),
(
"Looking for docker or CI deployment files.",
r#"{"command": "ls Dockerfile docker-compose.yml .github/workflows/ 2>/dev/null || echo 'none'"}"#,
),
(
"Checking the git config for any deploy URLs.",
r#"{"command": "git config --list 2>/dev/null | grep -i url || echo 'no url in git config'"}"#,
),
(
"One more check — looking at recent branches.",
r#"{"command": "git branch -a 2>/dev/null | head -10 || echo 'no branches'"}"#,
),
];
for (narration, args) in &commands {
let mut resp = MockProvider::tool_call_response("terminal", args);
resp.content = Some(narration.to_string());
responses.push(resp);
}
responses.push(MockProvider::text_response(
"I couldn't find a specific deployment URL in the current project. \
The git remote points to github.com but I don't see a CNAME, \
Vercel config, or Netlify config. Could you tell me which project \
you're referring to? I may have that info stored from a previous session.",
));
let harness = setup_full_stack_test_agent(MockProvider::with_responses(responses))
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test",
"What's the url of the site that you deployed?",
None,
UserRole::Owner,
ChannelContext {
visibility: ChannelVisibility::Private,
platform: "telegram".to_string(),
channel_name: None,
channel_id: None,
sender_name: Some("Alice".to_string()),
sender_id: Some("telegram:12345".to_string()),
channel_member_names: vec![],
user_id_map: std::collections::HashMap::new(),
trusted: false,
},
None,
)
.await
.unwrap();
assert!(
!response.contains("stuck in a loop"),
"Should not trigger stuck-in-a-loop message. Got: {}",
response.chars().take(400).collect::<String>()
);
assert!(
!response.is_empty(),
"Agent should return a non-empty response"
);
}
#[tokio::test]
async fn test_full_stack_blocked_tool_triggers_stall() {
let mut responses: Vec<ProviderResponse> = Vec::new();
{
let mut resp = MockProvider::tool_call_response("system_info", "{}");
resp.content = Some(
"Let me look up the deployment URL by checking the system configuration.".to_string(),
);
responses.push(resp);
}
for i in 0..3 {
let mut resp = MockProvider::tool_call_response(
"system_info",
&format!(r#"{{"check":"deploy_{}"}}"#, i),
);
resp.content = Some(format!("Checking deployment config {}.", i));
responses.push(resp);
}
for i in 3..6 {
let mut resp = MockProvider::tool_call_response(
"system_info",
&format!(r#"{{"check":"deploy_{}"}}"#, i),
);
resp.content = Some(format!("Let me try checking config {} again.", i));
responses.push(resp);
}
responses.push(MockProvider::text_response(
"I couldn't find the deployment URL. Which project are you referring to?",
));
let harness = setup_full_stack_test_agent(MockProvider::with_responses(responses))
.await
.unwrap();
let response = harness
.agent
.handle_message(
"telegram_test",
"What's the url of the site that you deployed?",
None,
UserRole::Owner,
ChannelContext {
visibility: ChannelVisibility::Private,
platform: "telegram".to_string(),
channel_name: None,
channel_id: None,
sender_name: Some("Alice".to_string()),
sender_id: Some("telegram:12345".to_string()),
channel_member_names: vec![],
user_id_map: std::collections::HashMap::new(),
trusted: false,
},
None,
)
.await
.unwrap();
assert!(
!response.contains("stuck") && !response.contains("not making progress"),
"Blocked non-exempt tool calls should NOT trigger stall detection. Got: {}",
response.chars().take(400).collect::<String>()
);
}