#[tokio::test]
async fn test_task_boundary_injected_between_turns() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"[INTENT_GATE] {\"complexity\":\"knowledge\",\"can_answer_now\":true,\"needs_tools\":false}",
),
MockProvider::text_response(
"[INTENT_GATE] {\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Found the Spanish resume."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _r1 = harness
.agent
.handle_message(
"boundary_test",
"Why?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _r2 = harness
.agent
.handle_message(
"boundary_test",
"Send me the resume in Spanish now.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 2,
"Expected at least 2 LLM calls (one per turn), got {}",
call_log.len()
);
let turn2_calls: Vec<_> = call_log
.iter()
.filter(|call| {
call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("Send me the resume in Spanish now."))
})
})
.collect();
assert!(
!turn2_calls.is_empty(),
"Expected at least one Turn 2 LLM call containing the current user request"
);
let turn2_calls_ok = turn2_calls.iter().all(|call| {
let has_old_user = call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("Why?"))
});
let has_boundary = call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("system")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Current Task]"))
});
!has_old_user || has_boundary
});
assert!(
turn2_calls_ok,
"All Turn 2 LLM calls must have [Current Task] when old user context is present"
);
}
#[tokio::test]
async fn test_uploaded_artifact_request_has_task_boundary() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Would you like me to get more detailed information for any specific trial(s)?",
),
MockProvider::text_response("Summary of prior conversation."),
MockProvider::text_response("I reviewed the uploaded document and identified the issue."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"artifact_bleed_test",
"These are the NCT trial numbers: NCT06737964 and NCT06737965.",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let artifact_request = "[File received: 68235.png (413 KB, image/png)\nSaved to: /Users/davidloor/projects/aidaemon/.aidaemon/files/inbox/694c3943_68235.png]\nCheck the doc and fix the issue.";
let _ = harness
.agent
.handle_message(
"artifact_bleed_test",
artifact_request,
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let turn2_call = call_log.last().expect("turn 2 call");
assert!(
turn2_call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("user")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[File received: 68235.png"))
}),
"Turn 2 should include the uploaded-file context"
);
assert!(
turn2_call.messages.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Current Task]"))
}),
"Turn 2 should have a task boundary marker: {:?}",
turn2_call.messages
);
}
#[tokio::test]
async fn test_idle_reengagement_reply_after_tool_progress_is_recovered() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("I'm here. What would you like me to help you with?"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"idle_reengagement_recovery",
"Check the system details and tell me what machine this is.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!response.contains("What would you like me to help you with"),
"generic idle re-engagement reply should not be returned after tool progress: {}",
response
);
assert!(
response.contains("latest tool output") || response.contains("Date:"),
"final reply should recover from concrete tool evidence: {}",
response
);
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 2,
"expected at least two LLM calls, got {}",
call_log.len()
);
let second_call_has_checkpoint = call_log[1].messages.iter().any(|message| {
message.get("role").and_then(|r| r.as_str()) == Some("system")
&& message
.get("content")
.and_then(|c| c.as_str())
.is_some_and(|content| {
content.contains("EXECUTION CHECKPOINT")
&& content.contains("Check the system details")
})
});
assert!(
second_call_has_checkpoint,
"second LLM call should include the execution checkpoint"
);
}
#[tokio::test]
async fn test_orchestrator_first_call_has_tools() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I'll check that for you."),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("System is running macOS."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _response = harness
.agent
.handle_message(
"test_session",
"Show me the system information",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
assert!(!calls.is_empty(), "Expected at least 1 LLM call");
assert!(
!calls[0].tools.is_empty(),
"First LLM call must have tools present, got 0 tools"
);
}
#[tokio::test]
async fn test_orchestrator_executes_tool_calls_in_first_iteration() {
use crate::traits::ToolCall;
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some("I'll look into the system details.".to_string()),
tool_calls: vec![ToolCall {
id: "call_system_info".to_string(),
name: "system_info".to_string(),
arguments: "{}".to_string(),
extra_content: None,
}],
usage: Some(crate::traits::TokenUsage {
input_tokens: 100,
output_tokens: 50,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
MockProvider::text_response("System is running macOS."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Check the system information now",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "System is running macOS.");
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First LLM call must have tools present"
);
assert!(
calls.len() >= 2,
"Expected at least 2 LLM calls (tool call + final), got {}",
calls.len()
);
}
#[tokio::test]
async fn test_orchestrator_knowledge_flow() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I can answer this from memory.\n[INTENT_GATE]\n{\"complexity\": \"knowledge\", \"can_answer_now\": true, \"needs_tools\": false}",
),
MockProvider::text_response("The capital of France is Paris."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"What is the capital of France?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "The capital of France is Paris.");
let call_count = harness.provider.call_count().await;
assert_eq!(call_count, 2, "Expected intent gate classifier + executor answer");
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First LLM call should have tools present (default+fallback routing)"
);
}
#[tokio::test]
async fn test_executor_mode_retains_tools() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("System info retrieved."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _response = harness
.agent
.handle_message(
"test_session",
"Show me the system information",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"Executor mode must have tools available in LLM calls"
);
}
#[tokio::test]
async fn test_old_tool_intermediates_collapsed_in_follow_up() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Your system has 16GB RAM and an M1 chip."),
MockProvider::text_response("Bella is your cat."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let r1 = harness
.agent
.handle_message(
"collapse_test",
"What system info do I have?",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert_eq!(r1, "Your system has 16GB RAM and an M1 chip.");
let r2 = harness
.agent
.handle_message(
"collapse_test",
"Who is bella?",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
)
.await
.unwrap();
assert_eq!(r2, "Bella is your cat.");
let call_log = harness.provider.call_log.lock().await;
let turn2_call = call_log.last().unwrap();
let turn2_msgs = &turn2_call.messages;
let tool_msgs: Vec<&serde_json::Value> = turn2_msgs
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("tool"))
.collect();
for tool_msg in &tool_msgs {
let content = tool_msg
.get("content")
.and_then(|c| c.as_str())
.unwrap_or("");
assert!(
content.len() < 200,
"Prior 1 tool result should be summarized (compact), got: {}",
content
);
}
let user_msgs: Vec<&serde_json::Value> = turn2_msgs
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
.collect();
assert!(
user_msgs.len() >= 2,
"Turn 2 should include user messages from both turns, found {}",
user_msgs.len()
);
}
#[tokio::test]
async fn test_synthesized_done_persisted() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response(""),
MockProvider::text_response(
"I can answer this from memory.\n[INTENT_GATE] {\"complexity\":\"knowledge\",\"can_answer_now\":true,\"needs_tools\":false}",
),
MockProvider::text_response("Weather is sunny."),
]);
let mut harness = setup_test_agent(provider).await.unwrap();
harness.agent.set_test_orchestrator_mode();
let r1 = harness
.agent
.handle_message(
"done_persist_test",
"Check my system info",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
r1.starts_with("Done") || r1.starts_with("Here is the latest tool output") || r1.starts_with("Here's the command output") || r1.starts_with("Here are the results"),
"Expected Done synthesis or tool output recovery, got: {}",
r1
);
let r2 = harness
.agent
.handle_message(
"done_persist_test",
"Tell me the weather",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(!r2.is_empty(), "Turn 2 should produce a non-empty response");
let call_log = harness.provider.call_log.lock().await;
let turn2_call = &call_log[3];
let user_msgs: Vec<&serde_json::Value> = turn2_call
.messages
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("user"))
.collect();
assert!(
user_msgs.len() >= 2,
"Turn 2 should have at least 2 separate user messages (not merged), found {}",
user_msgs.len()
);
let completion_assistant = turn2_call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("assistant")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with("Done") || s.starts_with("Here is the latest tool output") || s.starts_with("Here's the command output") || s.starts_with("Here are the results") || s.starts_with("Here's"))
});
assert!(
completion_assistant,
"Turn 2's history should contain the persisted completion assistant message from Turn 1"
);
}
#[tokio::test]
async fn test_old_interaction_assistant_content_truncated() {
let long_response_1 = "B".repeat(500);
let long_response_2 = "A".repeat(500);
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(&long_response_1),
MockProvider::text_response(&long_response_2),
MockProvider::text_response("Short answer."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let r1 = harness
.agent
.handle_message(
"truncate_test",
"First question?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(r1, long_response_1);
let r2 = harness
.agent
.handle_message(
"truncate_test",
"Second question?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(r2, long_response_2);
let r3 = harness
.agent
.handle_message(
"truncate_test",
"Also third question?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(r3, "Short answer.");
let call_log = harness.provider.call_log.lock().await;
let turn3_call = call_log.last().unwrap();
let assistant_msgs: Vec<&serde_json::Value> = turn3_call
.messages
.iter()
.filter(|m| m.get("role").and_then(|r| r.as_str()) == Some("assistant"))
.collect();
let has_truncated = assistant_msgs.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with('B') && s.ends_with('…') && s.len() < 500)
});
assert!(
has_truncated,
"Turn 1's long assistant response should be truncated in Turn 3's context"
);
let has_preserved = assistant_msgs.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.starts_with('A') && s.len() == 500)
});
assert!(
has_preserved,
"Turn 2's assistant response (immediately prior) should be preserved untruncated"
);
for m in &assistant_msgs {
if let Some(content) = m.get("content").and_then(|c| c.as_str()) {
if content.starts_with('B') && content.ends_with('…') {
assert!(
content.len() <= 210,
"Truncated content should be ~203 chars max, got {} chars: {}...",
content.len(),
&content[..50.min(content.len())]
);
}
}
}
}
#[tokio::test]
async fn test_old_short_assistant_response_preserved_unmodified() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("It is 4."),
MockProvider::text_response("Rust 1.82.0"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"prior_turn_no_marker",
"What is 2 + 2?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"prior_turn_no_marker",
"What version of Rust is installed?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let turn2_call = call_log.last().unwrap();
let old_assistant_msgs: Vec<&serde_json::Value> = turn2_call
.messages
.iter()
.filter(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("assistant")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s == "It is 4.")
})
.collect();
assert!(
!old_assistant_msgs.is_empty(),
"Turn 1's assistant response should be present in Turn 2's context"
);
let content = old_assistant_msgs[0]
.get("content")
.and_then(|c| c.as_str())
.unwrap();
assert!(
!content.contains("[prior turn]"),
"Old assistant responses should NOT have [prior turn] marker (causes LLM echoing). Got: {}",
content
);
assert_eq!(
content, "It is 4.",
"Short old assistant content should be preserved unmodified"
);
}
#[tokio::test]
async fn test_compaction_fires_on_window_overflow() {
let mut responses = Vec::new();
for i in 1..=6 {
responses.push(MockProvider::text_response(&format!("Response {}", i)));
}
responses.push(MockProvider::text_response("Mock response"));
responses.push(MockProvider::text_response("Response 7"));
responses.push(MockProvider::text_response("Mock response"));
responses.push(MockProvider::text_response("Response 8"));
for _ in 0..4 {
responses.push(MockProvider::text_response("Mock response"));
}
let provider = MockProvider::with_responses(responses);
let harness = setup_test_agent(provider).await.unwrap();
for i in 1..=7 {
let _ = harness
.agent
.handle_message(
"compaction_test",
&format!("Question {} about topic {}", i, i),
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
}
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
let summary = harness
.state
.get_conversation_summary("compaction_test")
.await
.unwrap();
assert!(
summary.is_some(),
"Compaction summary should exist in DB after window overflow"
);
let summary = summary.unwrap();
assert!(
!summary.summary.is_empty(),
"Compaction summary should not be empty"
);
let _ = harness
.agent
.handle_message(
"compaction_test",
"Question 8 about topic 8",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
tokio::time::sleep(std::time::Duration::from_millis(1000)).await;
let call_log = harness.provider.call_log.lock().await;
assert!(
call_log.len() >= 8,
"expected at least one call per turn; got {}",
call_log.len()
);
let tail_start = call_log.len().saturating_sub(4);
let has_summary = call_log[tail_start..].iter().any(|call| {
call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("system")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Session Summary]"))
})
});
let turn8_call = call_log.last().expect("should have Turn 8 call");
assert!(
has_summary,
"Turn 8's LLM context should include [Session Summary] from compaction"
);
let has_boundary = turn8_call.messages.iter().any(|m| {
m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|s| s.contains("[Current Task]"))
});
assert!(
has_boundary,
"Turn 8's LLM context should include [Current Task] boundary marker"
);
}
#[tokio::test]
async fn test_turn_id_groups_messages_within_a_turn() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Done turn 1"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Done turn 2"),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"turn_id_test",
"First request",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"turn_id_test",
"Second request",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let history = harness
.state
.get_history("turn_id_test", 100)
.await
.unwrap();
let stamped: Vec<_> = history.iter().filter(|m| m.turn_id.is_some()).collect();
assert!(
!stamped.is_empty(),
"expected some messages to carry a turn_id, got {} messages with none",
history.len()
);
let user_messages: Vec<_> = history.iter().filter(|m| m.role == "user").collect();
assert_eq!(
user_messages.len(),
2,
"expected 2 user messages, got {}",
user_messages.len()
);
for um in &user_messages {
assert_eq!(
um.turn_id.as_deref(),
Some(um.id.as_str()),
"user message turn_id should equal its own id; got msg id={} turn_id={:?}",
um.id,
um.turn_id
);
}
assert_ne!(
user_messages[0].turn_id, user_messages[1].turn_id,
"two distinct user turns must have distinct turn_ids"
);
let turn1_id = user_messages[0].turn_id.clone().unwrap();
let turn2_id = user_messages[1].turn_id.clone().unwrap();
let turn1_nonuser_count = history
.iter()
.filter(|m| m.role != "user" && m.turn_id.as_deref() == Some(&turn1_id))
.count();
let turn2_nonuser_count = history
.iter()
.filter(|m| m.role != "user" && m.turn_id.as_deref() == Some(&turn2_id))
.count();
assert!(
turn1_nonuser_count > 0,
"Turn 1 should have at least one non-user message stamped with its turn_id"
);
assert!(
turn2_nonuser_count > 0,
"Turn 2 should have at least one non-user message stamped with its turn_id"
);
}