#[tokio::test]
async fn test_initial_routing_call_classifies_then_executor_answers_questions() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I can answer this from memory.\n[INTENT_GATE]\n\
{\"complexity\": \"knowledge\", \"can_answer_now\": true, \"needs_tools\": false}",
),
MockProvider::text_response(
"Your website is deployed to Cloudflare Workers at your-site.workers.dev.",
),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
harness
.state
.upsert_fact(
"project",
"my website",
"deployed to cloudflare workers at your-site.workers.dev",
"user",
None,
crate::types::FactPrivacy::Global,
)
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Can you tell me the deployment URL for my website?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(
response,
"Your website is deployed to Cloudflare Workers at your-site.workers.dev."
);
assert_eq!(harness.provider.call_count().await, 2);
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First call should have tools (no separate tool-free text-only pre-pass)"
);
}
#[tokio::test]
async fn test_critical_owner_name_query_is_deterministic() {
let harness = setup_test_agent_with_models(MockProvider::new(), "primary-model", "smart-model")
.await
.unwrap();
harness
.state
.upsert_fact(
"user",
"name",
"Test Owner",
"owner",
None,
crate::types::FactPrivacy::Global,
)
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"What's my name?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Your name is Test Owner.");
assert_eq!(
harness.provider.call_count().await,
0,
"Critical identity query should resolve without an LLM call"
);
}
#[tokio::test]
async fn test_personal_recall_turn_routes_at_least_primary_model() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I don't have pet information saved."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"What about pets?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("don't have pet information"),
"Expected pet information response, got: {response}"
);
let calls = harness.provider.call_log.lock().await;
assert!(
!calls.is_empty(),
"Execution loop should make at least one LLM call"
);
assert_eq!(
calls[0].model, "primary-model",
"Personal recall should not route to the cheapest profile/model"
);
}
#[tokio::test]
async fn test_empty_answerable_routing_output_falls_through_to_tool_path() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(""),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Recovered after memory/tool retry."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"What timezone am I in?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Recovered after memory/tool retry.");
let calls = harness.provider.call_log.lock().await;
assert!(calls.len() >= 2);
assert!(
!calls[1].tools.is_empty(),
"Empty first response should trigger tool-enabled retry path"
);
}
#[tokio::test]
async fn test_identity_tool_result_survives_context_collapse() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response(
"remember_fact",
r#"{"category":"user","key":"name","value":"David"}"#,
),
MockProvider::text_response("Saved."),
MockProvider::text_response("Continuing with your latest request."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"test_session",
"Remember that my name is David",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"test_session",
"What should we do next?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let calls = harness.provider.call_log.lock().await;
assert!(
calls.len() >= 3,
"Expected at least 3 model calls across both turns"
);
let second_turn_call = &calls[2];
let has_identity_tool_context = second_turn_call.messages.iter().any(|m| {
m.get("role").and_then(|r| r.as_str()) == Some("tool")
&& m.get("name").and_then(|n| n.as_str()) == Some("remember_fact")
&& m.get("content")
.and_then(|c| c.as_str())
.is_some_and(|c| c.to_ascii_lowercase().contains("name = david"))
});
assert!(
has_identity_tool_context,
"Critical identity tool result should be preserved across context collapsing"
);
}
#[tokio::test]
async fn test_initial_routing_call_continues_for_actions() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I'll check the system information for you."),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Your system is running macOS."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Show me the current system information and environment details",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Your system is running macOS.");
let call_count = harness.provider.call_count().await;
assert!(
call_count >= 2,
"Expected at least 2 LLM calls (intent classification + tool execution)"
);
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First call should have tools (no separate tool-free text-only pre-pass)"
);
}
#[tokio::test]
async fn test_deferred_action_no_tool_calls_does_not_complete_task() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I'll check and send it over.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"complexity\":\"simple\"}",
),
MockProvider::text_response(
"I'll find your resume and send it over right away.\nStarting the send-resume workflow...",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Found it and sent it."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"send me my resume",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Found it and sent it.");
assert_eq!(harness.provider.call_count().await, 4);
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First call should have tools available"
);
assert!(
!calls[1].tools.is_empty(),
"Execution loop must have tools available"
);
}
#[tokio::test]
async fn test_deferred_action_after_tool_progress_does_not_complete_task() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I'll find it for you.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"complexity\":\"simple\"}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response(
"I'll send it over once I locate the exact file. Give me a moment.",
),
MockProvider::text_response("I couldn't find a matching SOW PDF in the project files."),
MockProvider::text_response("I couldn't find a matching SOW PDF in the project files."),
MockProvider::text_response("I couldn't find a matching SOW PDF in the project files."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Send me the SOW PDF from the Lodestar project",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(
response,
"I couldn't find a matching SOW PDF in the project files."
);
let call_count = harness.provider.call_count().await;
assert!(
(4..=7).contains(&call_count),
"Expected 4-7 LLM calls, got {}",
call_count
);
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"First call should have tools available"
);
assert!(
!calls[1].tools.is_empty(),
"Execution loop must have tools available"
);
}
#[tokio::test]
async fn test_uniform_models_have_tools_on_first_call() {
let provider = MockProvider::with_responses(vec![
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Here is your info."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"What's my system info?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Here is your info.");
assert_eq!(harness.provider.call_count().await, 2);
let calls = harness.provider.call_log.lock().await;
assert!(
!calls[0].tools.is_empty(),
"Without text-only pre-pass, first call should have tools"
);
}
#[tokio::test]
async fn test_empty_first_pass_response_not_intercepted() {
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some(String::new()),
tool_calls: vec![],
usage: Some(crate::traits::TokenUsage {
input_tokens: 10,
output_tokens: 0,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
MockProvider::text_response("Fallback response."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let _response = harness
.agent
.handle_message(
"test_session",
"Hello",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
}
#[tokio::test]
async fn test_empty_execution_response_persists_fallback_message() {
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some(String::new()),
tool_calls: vec![],
usage: Some(crate::traits::TokenUsage {
input_tokens: 10,
output_tokens: 0,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
ProviderResponse {
content: Some(String::new()),
tool_calls: vec![],
usage: Some(crate::traits::TokenUsage {
input_tokens: 20,
output_tokens: 0,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Who is becquer?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let expected = "I wasn't able to process that request. Could you try rephrasing?";
assert_eq!(response, expected);
assert_eq!(harness.provider.call_count().await, 2);
let history = harness.state.get_history("test_session", 10).await.unwrap();
assert!(
history
.iter()
.any(|m| m.role == "assistant" && m.content.as_deref() == Some(expected)),
"Fallback response should be persisted in history. History: {:?}",
history
);
}
#[tokio::test]
async fn test_empty_execution_response_surfaces_provider_note() {
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some(String::new()),
tool_calls: vec![],
usage: Some(crate::traits::TokenUsage {
input_tokens: 20,
output_tokens: 0,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: Some(
"finish reason: SAFETY; candidate safety categories: HARM_CATEGORY_HATE_SPEECH"
.to_string(),
),
},
ProviderResponse {
content: Some(String::new()),
tool_calls: vec![],
usage: Some(crate::traits::TokenUsage {
input_tokens: 20,
output_tokens: 0,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Find my resume and send it",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(response.starts_with("I wasn't able to process that request."));
assert!(response.contains("The model returned no usable output (finish reason: SAFETY; candidate safety categories: HARM_CATEGORY_HATE_SPEECH)."));
assert!(response.ends_with("Could you try rephrasing?"));
assert_eq!(harness.provider.call_count().await, 2);
let history = harness.state.get_history("test_session", 10).await.unwrap();
assert!(
history
.iter()
.any(|m| m.role == "assistant" && m.content.as_deref() == Some(response.as_str())),
"Fallback response with provider note should be persisted in history. History: {:?}",
history
);
}
#[tokio::test]
async fn test_empty_execution_response_retry_recovers_with_text() {
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some(String::new()),
tool_calls: vec![],
usage: Some(crate::traits::TokenUsage {
input_tokens: 10,
output_tokens: 0,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
MockProvider::text_response("Recovered response."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Create a page",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Recovered response.");
assert_eq!(harness.provider.call_count().await, 2);
assert!(!response.contains("I wasn't able to process that request."));
}
#[tokio::test]
async fn test_initial_routing_call_drops_hallucinated_tool_calls() {
use crate::traits::ToolCall;
let provider = MockProvider::with_responses(vec![
ProviderResponse {
content: Some(
"Your website is deployed at your-site.workers.dev on Cloudflare Workers."
.to_string(),
),
tool_calls: vec![ToolCall {
id: "call_hallucinated".to_string(),
name: "terminal".to_string(),
arguments: r#"{"command":"find ~ -name wrangler.toml"}"#.to_string(),
extra_content: None,
}],
usage: Some(crate::traits::TokenUsage {
input_tokens: 100,
output_tokens: 50,
cached_input_tokens: None,
cache_creation_input_tokens: None,
model: "mock".to_string(),
}),
thinking: None,
response_note: None,
},
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response(
"Your website is deployed at your-site.workers.dev on Cloudflare Workers.",
),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Can you tell me the deployment URL for my website?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(
response,
"Your website is deployed at your-site.workers.dev on Cloudflare Workers."
);
let call_count = harness.provider.call_count().await;
assert!(
call_count >= 3,
"Expected at least 3 LLM calls — initial routing call + tool call + final response (got {})",
call_count
);
}
#[tokio::test]
async fn test_acknowledgment_with_needs_tools_and_empty_analysis_falls_through() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"[tool_use: terminal]\n\
cmd: read_file project/plan.md\n\
args: {\"path\":\"project/plan.md\"}\n\
\n\
[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"is_acknowledgment\":true}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Proceeding with the requested changes."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Yes, do it.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Proceeding with the requested changes.");
assert_eq!(
harness.provider.call_count().await,
3,
"Expected initial routing call + tool call + execution pass"
);
}
#[tokio::test]
async fn test_acknowledgment_with_empty_analysis_returns_default_reply() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"is_acknowledgment\":true}",
),
MockProvider::text_response("Got it, understood."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Yes",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Got it, understood.");
assert_eq!(harness.provider.call_count().await, 2);
}
#[tokio::test]
async fn test_short_correction_with_empty_analysis_returns_default_reply() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"is_acknowledgment\":false}",
),
MockProvider::text_response("You're right, my apologies for the confusion."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"You did send me the file",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "You're right, my apologies for the confusion.");
assert_eq!(harness.provider.call_count().await, 2);
}
#[tokio::test]
async fn test_intent_gate_decision_metadata_includes_route_reason_for_direct_reply() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"is_acknowledgment\":true}",
),
MockProvider::text_response("Got it, understood."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Yes",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Got it, understood.");
assert_eq!(harness.provider.call_count().await, 2);
let event_rows: Vec<String> = sqlx::query_scalar(
"SELECT data FROM events WHERE session_id = ? AND event_type = 'decision_point' ORDER BY id DESC",
)
.bind("test_session")
.fetch_all(&harness.state.pool())
.await
.unwrap();
let intent_gate_direct_reply = event_rows
.iter()
.map(|raw| serde_json::from_str::<serde_json::Value>(raw).unwrap())
.find(|data| {
data.get("decision_type").and_then(|v| v.as_str()) == Some("intent_gate")
&& data
.get("metadata")
.and_then(|m| m.get("route_reason"))
.and_then(|v| v.as_str())
== Some("acknowledgment_direct_reply")
});
assert!(
intent_gate_direct_reply.is_none(),
"With text-only pre-pass disabled, acknowledgment_direct_reply intent_gate decisions should not be emitted"
);
}
#[tokio::test]
async fn test_intent_gate_decision_metadata_includes_route_reason_for_continue() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"[INTENT_GATE]\n\
{\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"is_acknowledgment\":true}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Proceeding with the requested changes."),
]);
let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
.await
.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Yes, do it.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Proceeding with the requested changes.");
let event_rows: Vec<String> = sqlx::query_scalar(
"SELECT data FROM events WHERE session_id = ? AND event_type = 'decision_point' ORDER BY id DESC",
)
.bind("test_session")
.fetch_all(&harness.state.pool())
.await
.unwrap();
let intent_gate_tools_required = event_rows
.iter()
.map(|raw| serde_json::from_str::<serde_json::Value>(raw).unwrap())
.find(|data| {
data.get("decision_type").and_then(|v| v.as_str()) == Some("intent_gate")
&& data
.get("metadata")
.and_then(|m| m.get("route_reason"))
.and_then(|v| v.as_str())
== Some("tools_required")
});
assert!(
intent_gate_tools_required.is_none(),
"With text-only pre-pass disabled, tools_required intent_gate decisions from the removed decision phase should not be emitted"
);
}