#[tokio::test]
async fn test_task_boundary_injected_between_turns() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Because the previous step required it — happy to elaborate if useful.",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Found the Spanish resume."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _r1 = harness
.agent
.handle_message(
"boundary_test",
"Why?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _r2 = harness
.agent
.handle_message(
"boundary_test",
"Send me the resume in Spanish now.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 2,
"Expected at least 2 LLM calls (one per turn), got {}",
call_log.len()
);
let turn2_calls: Vec<_> = call_log
.iter()
.filter(|call| {
call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("Send me the resume in Spanish now."))
})
})
.collect();
assert!(
!turn2_calls.is_empty(),
"Expected at least one Turn 2 LLM call containing the current user request"
);
let turn2_calls_ok = turn2_calls.iter().all(|call| {
let has_old_user = call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("Why?"))
});
let has_boundary = call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("system")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Current Task]"))
});
!has_old_user || has_boundary
});
assert!(
turn2_calls_ok,
"All Turn 2 LLM calls must have [Current Task] when old user context is present"
);
}
#[tokio::test]
async fn test_uploaded_artifact_request_has_task_boundary() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Would you like me to get more detailed information for any specific trial(s)?",
),
MockProvider::text_response("Summary of prior conversation."),
MockProvider::text_response("I reviewed the uploaded document and identified the issue."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"artifact_bleed_test",
"These are the NCT trial numbers: NCT06737964 and NCT06737965.",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let artifact_request = "[File received: 68235.png (413 KB, image/png)\nSaved to: /Users/davidloor/projects/aidaemon/.aidaemon/files/inbox/694c3943_68235.png]\nCheck the doc and fix the issue.";
let _ = harness
.agent
.handle_message(
"artifact_bleed_test",
artifact_request,
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let turn2_call = call_log.last().expect("turn 2 call");
assert!(
turn2_call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[File received: 68235.png"))
}),
"Turn 2 should include the uploaded-file context"
);
assert!(
turn2_call.messages.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Current Task]"))
}),
"Turn 2 should have a task boundary marker: {:?}",
turn2_call.messages
);
}
#[tokio::test]
async fn test_idle_reengagement_reply_after_tool_progress_is_recovered() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("I'm here. What would you like me to help you with?"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"idle_reengagement_recovery",
"Check the system details and tell me what machine this is.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!response.contains("What would you like me to help you with"),
"generic idle re-engagement reply should not be returned after tool progress: {}",
response
);
assert!(
response.contains("latest tool output") || response.contains("Date:"),
"final reply should recover from concrete tool evidence: {}",
response
);
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 2,
"expected at least two LLM calls, got {}",
call_log.len()
);
let second_call_has_checkpoint = call_log[1].messages.iter().any(|message| {
message.get("role").and_then(|r| r.as_str()) == Some("system")
&& message
.get("content")
.and_then(|c| c.as_str())
.is_some_and(|content| {
content.contains("EXECUTION CHECKPOINT")
&& content.contains("Check the system details")
})
});
assert!(
second_call_has_checkpoint,
"second LLM call should include the execution checkpoint"
);
}
#[tokio::test]
async fn test_orchestrator_first_call_has_tools() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I'll check that for you."),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("System is running macOS."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _response = harness
.agent
.handle_message(
"test_session",
"Show me the system information",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
assert!(!calls.is_empty(), "Expected at least 1 LLM call");
assert!(
!calls[0].tools.is_empty(),
"First LLM call must have tools present, got 0 tools"
);
}
#[tokio::test]
async fn test_orchestrator_executes_tool_calls_in_first_iteration() {
use crate::traits::ToolCall;
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some("I'll look into the system details.".to_string()),
tool_calls: vec![ToolCall {
id: "call_system_info".to_string(),
name: "system_info".to_string(),
arguments: "{}".to_string(),
extra_content: None,
}],
usage: Some(crate::traits::TokenUsage {
input_tokens: 100,
output_tokens: 50,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
MockProvider::text_response("System is running macOS."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Check the system information now",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "System is running macOS.");
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First LLM call must have tools present"
);
assert!(
calls.len() >= 2,
"Expected at least 2 LLM calls (tool call + final), got {}",
calls.len()
);
}
#[tokio::test]
async fn test_orchestrator_knowledge_flow() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("Let me check my memory first."),
MockProvider::text_response("The capital of France is Paris."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"What is the capital of France?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "The capital of France is Paris.");
let call_count = harness.provider.call_count().await;
assert_eq!(call_count, 2, "Expected bounced deferral + executor answer");
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First LLM call should have tools present (default+fallback routing)"
);
}
#[tokio::test]
async fn test_executor_mode_retains_tools() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("System info retrieved."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _response = harness
.agent
.handle_message(
"test_session",
"Show me the system information",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"Executor mode must have tools available in LLM calls"
);
}
#[tokio::test]
async fn test_old_tool_intermediates_collapsed_in_follow_up() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Your system has 16GB RAM and an M1 chip."),
MockProvider::text_response("Mia is your cat."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let r1 = harness
.agent
.handle_message(
"collapse_test",
"What system info do I have?",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert_eq!(r1, "Your system has 16GB RAM and an M1 chip.");
let r2 = harness
.agent
.handle_message(
"collapse_test",
"Who is mia?",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert_eq!(r2, "Mia is your cat.");
let call_log = harness.provider.call_log.lock().await;
let turn2_call = call_log.last().unwrap();
let turn2_msgs = &turn2_call.messages;
let tool_msgs: Vec<&serde_json::Value> = turn2_msgs
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("tool"))
.collect();
for tool_msg in &tool_msgs {
let content = tool_msg
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("");
assert!(
content.len() < 200,
"Prior 1 tool result should be summarized (compact), got: {}",
content
);
}
let user_msgs: Vec<&serde_json::Value> = turn2_msgs
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
.collect();
assert!(
user_msgs.len() >= 2,
"Turn 2 should include user messages from both turns, found {}",
user_msgs.len()
);
}
#[tokio::test]
async fn test_synthesized_done_persisted() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response(""),
MockProvider::text_response("Let me check my memory first."),
MockProvider::text_response("Weather is sunny."),
]);
let mut harness = setup_test_agent(provider).await.unwrap();
harness.agent.set_test_orchestrator_mode();
let r1 = harness
.agent
.handle_message(
"done_persist_test",
"Check my system info",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
r1.starts_with("Done") || r1.starts_with("Here is the latest tool output") || r1.starts_with("Here's the command output") || r1.starts_with("Here are the results"),
"Expected Done synthesis or tool output recovery, got: {}",
r1
);
let r2 = harness
.agent
.handle_message(
"done_persist_test",
"Tell me the weather",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(!r2.is_empty(), "Turn 2 should produce a non-empty response");
let call_log = harness.provider.call_log.lock().await;
let turn2_call = &call_log[3];
let user_msgs: Vec<&serde_json::Value> = turn2_call
.messages
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
.collect();
assert!(
user_msgs.len() >= 2,
"Turn 2 should have at least 2 separate user messages (not merged), found {}",
user_msgs.len()
);
let completion_assistant = turn2_call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("assistant")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with("Done") || s.starts_with("Here is the latest tool output") || s.starts_with("Here's the command output") || s.starts_with("Here are the results") || s.starts_with("Here's"))
});
assert!(
completion_assistant,
"Turn 2's history should contain the persisted completion assistant message from Turn 1"
);
}
#[tokio::test]
async fn test_old_interaction_assistant_content_truncated() {
let long_response_1 = "B".repeat(500);
let long_response_2 = "A".repeat(500);
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(&long_response_1),
MockProvider::text_response(&long_response_2),
MockProvider::text_response("Short answer."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let r1 = harness
.agent
.handle_message(
"truncate_test",
"First question?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(r1, long_response_1);
let r2 = harness
.agent
.handle_message(
"truncate_test",
"Second question?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(r2, long_response_2);
let r3 = harness
.agent
.handle_message(
"truncate_test",
"Also third question?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(r3, "Short answer.");
let call_log = harness.provider.call_log.lock().await;
let turn3_call = call_log.last().unwrap();
let assistant_msgs: Vec<&serde_json::Value> = turn3_call
.messages
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("assistant"))
.collect();
let has_truncated_b = assistant_msgs.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with('B') && s.ends_with('…') && s.len() < 500)
});
assert!(
has_truncated_b,
"Turn 1's long assistant response should be truncated in Turn 3's context"
);
let has_truncated_a = assistant_msgs.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with('A') && s.ends_with('…') && s.len() < 500)
});
assert!(
has_truncated_a,
"Turn 2's assistant response should also be truncated (archived)"
);
for m in &assistant_msgs {
if let Some(content) = m.get("content").and_then(|c| c.as_str()) {
if (content.starts_with('B') || content.starts_with('A')) && content.ends_with('…') {
assert!(
content.len() <= 210,
"Truncated content should be ~203 chars max, got {} chars: {}...",
content.len(),
&content[..50.min(content.len())]
);
}
}
}
}
#[tokio::test]
async fn test_old_short_assistant_response_preserved_unmodified() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("It is 4."),
MockProvider::text_response("Rust 1.82.0"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"prior_turn_no_marker",
"What is 2 + 2?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"prior_turn_no_marker",
"What version of Rust is installed?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let turn2_call = call_log.last().unwrap();
let old_assistant_msgs: Vec<&serde_json::Value> = turn2_call
.messages
.iter()
.filter(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("assistant")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s == "It is 4.")
})
.collect();
assert!(
!old_assistant_msgs.is_empty(),
"Turn 1's assistant response should be present in Turn 2's context"
);
let content = old_assistant_msgs[0]
.get("content")
.and_then(|c| c.as_str())
.unwrap();
assert!(
!content.contains("[prior turn]"),
"Old assistant responses should NOT have [prior turn] marker (causes LLM echoing). Got: {}",
content
);
assert_eq!(
content, "It is 4.",
"Short old assistant content should be preserved unmodified"
);
}
#[tokio::test]
async fn test_compaction_fires_on_window_overflow() {
let mut responses = Vec::new();
for i in 1..=6 {
responses.push(MockProvider::text_response(&format!("Response {}", i)));
}
responses.push(MockProvider::text_response("Mock response"));
responses.push(MockProvider::text_response("Response 7"));
responses.push(MockProvider::text_response("Mock response"));
responses.push(MockProvider::text_response("Response 8"));
for _ in 0..4 {
responses.push(MockProvider::text_response("Mock response"));
}
let provider = MockProvider::with_responses(responses);
let harness = setup_test_agent(provider).await.unwrap();
for i in 1..=7 {
let _ = harness
.agent
.handle_message(
"compaction_test",
&format!("Question {} about topic {}", i, i),
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
}
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
let summary = harness
.state
.get_conversation_summary("compaction_test")
.await
.unwrap();
assert!(
summary.is_some(),
"Compaction summary should exist in DB after window overflow"
);
let summary = summary.unwrap();
assert!(
!summary.summary.is_empty(),
"Compaction summary should not be empty"
);
let _ = harness
.agent
.handle_message(
"compaction_test",
"Question 8 about topic 8",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 8,
"expected at least one call per turn; got {}",
call_log.len()
);
let tail_start = call_log.len().saturating_sub(4);
let has_summary = call_log[tail_start..].iter().any(|call| {
call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("system")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Session Summary]"))
})
});
assert!(
has_summary,
"Turn 8's LLM context should include [Session Summary] from compaction"
);
let has_boundary = call_log[tail_start..].iter().any(|call| {
call.messages.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Current Task]"))
})
});
assert!(
has_boundary,
"Turn 8's LLM context should include [Current Task] boundary marker"
);
}
#[tokio::test]
async fn test_turn_id_groups_messages_within_a_turn() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Done turn 1"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Done turn 2"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"turn_id_test",
"First request",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"turn_id_test",
"Second request",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let history = harness
.state
.get_history("turn_id_test", 100)
.await
.unwrap();
let stamped: Vec<_> = history.iter().filter(|m| m.turn_id.is_some()).collect();
assert!(
!stamped.is_empty(),
"expected some messages to carry a turn_id, got {} messages with none",
history.len()
);
let user_messages: Vec<_> = history.iter().filter(|m| m.role == "user").collect();
assert_eq!(
user_messages.len(),
2,
"expected 2 user messages, got {}",
user_messages.len()
);
for um in &user_messages {
assert_eq!(
um.turn_id.as_deref(),
Some(um.id.as_str()),
"user message turn_id should equal its own id; got msg id={} turn_id={:?}",
um.id,
um.turn_id
);
}
assert_ne!(
user_messages[0].turn_id, user_messages[1].turn_id,
"two distinct user turns must have distinct turn_ids"
);
let turn1_id = user_messages[0].turn_id.clone().unwrap();
let turn2_id = user_messages[1].turn_id.clone().unwrap();
let turn1_nonuser_count = history
.iter()
.filter(|m| m.role != "user" && m.turn_id.as_deref() == Some(&turn1_id))
.count();
let turn2_nonuser_count = history
.iter()
.filter(|m| m.role != "user" && m.turn_id.as_deref() == Some(&turn2_id))
.count();
assert!(
turn1_nonuser_count > 0,
"Turn 1 should have at least one non-user message stamped with its turn_id"
);
assert!(
turn2_nonuser_count > 0,
"Turn 2 should have at least one non-user message stamped with its turn_id"
);
}
fn llm_messages_contain_input_audio(messages: &[serde_json::Value]) -> bool {
messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content").and_then(|c| c.as_array()).is_some_and(|blocks| {
blocks
.iter()
.any(|b| b.get("type").and_then(|t| t.as_str()) == Some("input_audio"))
})
})
}
fn llm_messages_contain_image_url(messages: &[serde_json::Value]) -> bool {
messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content").and_then(|c| c.as_array()).is_some_and(|blocks| {
blocks
.iter()
.any(|b| b.get("type").and_then(|t| t.as_str()) == Some("image_url"))
})
})
}
#[tokio::test]
async fn test_vision_image_attachment_reaches_provider_as_multimodal() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
let mut png = tempfile::NamedTempFile::new().unwrap();
png.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00])
.unwrap();
let attachment = message_attachment(
png.path().to_path_buf(),
"test.png".to_string(),
"image/png".to_string(),
9,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("what is this?", &attachments);
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("Prior context acknowledged."),
MockProvider::text_response("Summary of prior conversation."),
MockProvider::text_response("That looks like a PNG image."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"vision_multimodal_test",
"Hello before the image.",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message_with_attachments(
"vision_multimodal_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let agent_calls: Vec<_> = call_log
.iter()
.filter(|call| {
!call.messages.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("conversation summarizer"))
})
})
.collect();
assert!(
agent_calls
.iter()
.any(|call| llm_messages_contain_image_url(&call.messages)),
"expected an agent LLM call with image_url content block, got: {:?}",
agent_calls
.iter()
.map(|c| &c.messages)
.collect::<Vec<_>>()
);
}
#[tokio::test]
async fn test_vision_disabled_sends_text_stub_only() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
use crate::config::{FilesConfig, VisionConfig};
let mut png = tempfile::NamedTempFile::new().unwrap();
png.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00])
.unwrap();
let attachment = message_attachment(
png.path().to_path_buf(),
"test.png".to_string(),
"image/png".to_string(),
9,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("describe this", &attachments);
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I received your file but cannot view images while vision is disabled.",
)]);
let mut harness = setup_test_agent(provider).await.unwrap();
let mut files = FilesConfig::default();
files.vision_enabled = false;
harness
.agent
.set_test_vision_config(VisionConfig::from_files(&files));
let _ = harness
.agent
.handle_message_with_attachments(
"vision_disabled_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
!call_log
.iter()
.any(|call| llm_messages_contain_image_url(&call.messages)),
"vision disabled must not send image_url blocks"
);
assert!(
call_log.iter().any(|call| {
call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[File received: test.png"))
})
}),
"user message should still include the text stub"
);
}
#[tokio::test]
async fn test_audio_attachment_reaches_provider_as_input_audio() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
let mut ogg = tempfile::NamedTempFile::new().unwrap();
ogg.write_all(&[1, 2, 3, 4, 5]).unwrap();
let attachment = message_attachment(
ogg.path().to_path_buf(),
"voice.ogg".to_string(),
"audio/ogg".to_string(),
5,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("what did they say?", &attachments);
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"They asked about the weather.",
)]);
let harness = setup_test_agent(provider).await.unwrap();
harness
.agent
.set_test_model("gemini-2.0-flash")
.await;
let _ = harness
.agent
.handle_message_with_attachments(
"audio_multimodal_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log
.iter()
.any(|call| llm_messages_contain_input_audio(&call.messages)),
"expected input_audio block in LLM payload"
);
}
#[tokio::test]
async fn test_audio_disabled_sends_text_stub_only() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
use crate::config::{AudioConfig, FilesConfig};
let mut ogg = tempfile::NamedTempFile::new().unwrap();
ogg.write_all(&[1, 2, 3, 4]).unwrap();
let attachment = message_attachment(
ogg.path().to_path_buf(),
"voice.ogg".to_string(),
"audio/ogg".to_string(),
4,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("listen", &attachments);
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I received your audio but cannot process it while audio is disabled.",
)]);
let mut harness = setup_test_agent(provider).await.unwrap();
let mut files = FilesConfig::default();
files.audio_enabled = false;
harness
.agent
.set_test_audio_config(AudioConfig::from_files(&files));
harness
.agent
.set_test_model("gemini-2.0-flash")
.await;
let _ = harness
.agent
.handle_message_with_attachments(
"audio_disabled_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
!call_log
.iter()
.any(|call| llm_messages_contain_input_audio(&call.messages)),
"audio disabled must not send input_audio blocks"
);
}
#[tokio::test]
async fn test_audio_ineligible_model_sends_text_stub_only() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
let mut ogg = tempfile::NamedTempFile::new().unwrap();
ogg.write_all(&[1, 2, 3, 4]).unwrap();
let attachment = message_attachment(
ogg.path().to_path_buf(),
"voice.ogg".to_string(),
"audio/ogg".to_string(),
4,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("listen", &attachments);
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I saved the audio but this model cannot hear it.",
)]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message_with_attachments(
"audio_ineligible_model_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
!call_log
.iter()
.any(|call| llm_messages_contain_input_audio(&call.messages)),
"mock-model must not send input_audio blocks"
);
}
#[tokio::test]
async fn test_stt_fallback_appends_transcription_when_native_audio_skipped() {
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use crate::agent::stt::{content_has_transcription, format_transcription_line};
use crate::channels::attachments::{build_inbound_text, message_attachment};
use crate::config::SttConfig;
let tmp = tempfile::tempdir().unwrap();
let mock_cli = tmp.path().join("mock-whisper-cli.sh");
std::fs::write(
&mock_cli,
r#"#!/bin/sh
out=""
while [ $# -gt 0 ]; do
case "$1" in
-of) out="$2"; shift 2 ;;
*) shift ;;
esac
done
printf '%s' 'Who is my dad?' > "${out}.txt"
"#,
)
.unwrap();
let mut perms = std::fs::metadata(&mock_cli).unwrap().permissions();
perms.set_mode(0o755);
std::fs::set_permissions(&mock_cli, perms).unwrap();
let model_path = tmp.path().join("model.bin");
std::fs::write(&model_path, b"mock").unwrap();
let mut wav = tempfile::NamedTempFile::new().unwrap();
wav.write_all(b"RIFF....WAVEfmt ").unwrap();
let attachment = message_attachment(
wav.path().to_path_buf(),
"voice.wav".to_string(),
"audio/wav".to_string(),
16,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("what did they say?", &attachments);
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"They asked who their dad is.",
)]);
let mut harness = setup_test_agent(provider).await.unwrap();
harness.agent.set_test_stt_config(SttConfig {
enabled: true,
cli_path: mock_cli,
model_path,
ffmpeg_path: std::path::PathBuf::from("ffmpeg"),
language: "en".to_string(),
max_audio_bytes: 25 * 1_048_576,
timeout_secs: 30,
mime_types: vec!["audio/wav".to_string(), "audio/ogg".to_string()],
});
let _ = harness
.agent
.handle_message_with_attachments(
"stt_fallback_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let history = harness
.state
.get_history("stt_fallback_test", 5)
.await
.unwrap();
let user_msg = history
.iter()
.find(|m| m.role == "user")
.expect("user message persisted");
let content = user_msg.content.as_deref().unwrap_or("");
assert!(
content_has_transcription(content),
"user message should include STT transcription, got: {content}"
);
assert!(content.contains(&format_transcription_line("voice.wav", "Who is my dad?")));
let call_log = harness.provider.call_log.lock().await;
let llm_user_text = call_log
.iter()
.flat_map(|call| call.messages.iter())
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
.filter_map(|m| m.get("content").and_then(|c| c.as_str()))
.collect::<Vec<_>>();
assert!(
llm_user_text
.iter()
.any(|text| content_has_transcription(text)),
"LLM payload should include transcription text"
);
assert!(
!call_log
.iter()
.any(|call| llm_messages_contain_input_audio(&call.messages)),
"STT fallback must not send input_audio blocks"
);
}
#[tokio::test]
async fn test_non_image_attachment_is_text_stub_only() {
use crate::channels::attachments::{build_inbound_text, message_attachment};
let attachment = message_attachment(
std::path::PathBuf::from("/tmp/voice.ogg"),
"voice.ogg".to_string(),
"audio/ogg".to_string(),
1200,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("transcribe this", &attachments);
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I saved the audio file but cannot transcribe it in this test.",
)]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message_with_attachments(
"vision_audio_stub_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
!call_log
.iter()
.any(|call| llm_messages_contain_image_url(&call.messages)),
"audio attachments must not produce image_url blocks"
);
}
#[tokio::test]
async fn test_vision_attachment_still_triggers_compaction() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
let mut png = tempfile::NamedTempFile::new().unwrap();
png.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00])
.unwrap();
let attachment = message_attachment(
png.path().to_path_buf(),
"doc.png".to_string(),
"image/png".to_string(),
9,
);
let attachments = vec![attachment.clone()];
let inbound_text = build_inbound_text("Check the doc and fix the issue.", &attachments);
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Would you like me to get more detailed information for any specific trial(s)?",
),
MockProvider::text_response("Summary of prior conversation."),
MockProvider::text_response("I reviewed the uploaded document and identified the issue."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"vision_compaction_test",
"These are the NCT trial numbers: NCT06737964 and NCT06737965.",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message_with_attachments(
"vision_compaction_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 3,
"file upload with attachment should trigger compaction LLM call; got {} calls",
call_log.len()
);
assert!(
call_log.iter().any(|call| {
call.messages.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("Summary of prior conversation."))
})
}),
"expected compaction summary in an LLM call"
);
}
#[tokio::test]
async fn test_attachment_stub_metadata_does_not_force_tool_required_loop() {
use std::io::Write;
use crate::channels::attachments::{build_inbound_text, message_attachment};
use crate::traits::ToolChoiceMode;
let mut png = tempfile::NamedTempFile::new().unwrap();
png.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00])
.unwrap();
let attachment = message_attachment(
png.path().to_path_buf(),
"photo.jpg".to_string(),
"image/jpeg".to_string(),
9,
);
let inbound_text = build_inbound_text("", &[attachment.clone()]);
let vision_reply = MockProvider::text_response("This image looks like a small PNG file.");
let provider = MockProvider::with_responses(vec![
vision_reply.clone(),
vision_reply,
]);
let harness = setup_test_agent(provider).await.unwrap();
let reply = harness
.agent
.handle_message_with_attachments(
"attachment_stub_intent_test",
&inbound_text,
&[attachment],
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert!(
reply.contains("PNG") || reply.contains("image"),
"expected vision/text reply, got: {reply}"
);
let call_log = harness.provider.call_log.lock().await;
let forced_tool_calls = call_log
.iter()
.filter(|call| call.options.tool_choice == ToolChoiceMode::Required)
.count();
assert_eq!(
forced_tool_calls, 0,
"stub inbox path must not trigger tool_choice=Required recovery loop"
);
}