#[tokio::test]
async fn test_orchestration_uniform_models_no_routing() {
let provider = MockProvider::new(); let harness = setup_test_agent(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Hello!",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Mock response");
let goals = harness.state.get_active_goals().await.unwrap();
assert!(goals.is_empty(), "No goals with uniform models");
}
#[tokio::test]
async fn test_orchestration_simple_falls_through_to_full_loop() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I'll help you check the system info.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Your system is running macOS."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"check system info",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Your system is running macOS.");
let goals = harness.state.get_active_goals().await.unwrap();
assert!(goals.is_empty(), "Simple tasks should not create goals");
}
#[tokio::test]
async fn test_orchestration_complex_creates_goal() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I'll start working on building your website."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Analyze the requirements and compare authentication libraries, then identify the best frameworks, find suitable database solutions, and summarize the deployment options for a full-stack website with CI/CD pipeline",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(!response.is_empty(), "Should return a non-empty response");
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1, "Complex request should create a goal");
assert_eq!(goals[0].status, "active");
assert!(goals[0]
.description
.contains("Analyze the requirements"));
}
#[tokio::test]
async fn test_orchestration_complex_internal_maintenance_does_not_create_goal() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Analyze the knowledge base and identify stale entries, then process embeddings, consolidate memories, find outdated data, and report on decay old facts",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("already runs via built-in background jobs"),
"Expected maintenance-routing response, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert!(
goals.is_empty(),
"Internal maintenance intent should not create a goal"
);
}
#[tokio::test]
async fn test_orchestration_simple_stall_detection_in_full_loop() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I'll run a command for you.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Should not reach here"),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"run a quick check",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!response.is_empty(),
"Should return a non-empty response even on stall"
);
}
#[tokio::test]
async fn test_orchestration_simple_uses_full_loop_with_all_tools() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I'll help with that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Diagnostics complete. All systems normal."),
MockProvider::text_response("Diagnostics complete. All systems normal."),
MockProvider::text_response("Diagnostics complete. All systems normal."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"run diagnostics",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(response, "Diagnostics complete. All systems normal.");
}
#[tokio::test]
async fn test_personal_recall_challenge_scopes_tools_and_reaffirms() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Let me verify memory first.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
),
{
let mut resp = MockProvider::tool_call_response(
"browser",
r#"{"action":"navigate","url":"https://example.com"}"#,
);
resp.content = Some("I'll check additional sources.".to_string());
resp
},
{
let mut resp = MockProvider::tool_call_response(
"manage_people",
r#"{"action":"view","person_name":"__unknown_person_for_recall_guardrail__"}"#,
);
resp.content = Some("I'll re-check your stored people data.".to_string());
resp
},
MockProvider::text_response("I still do not have that information saved in memory."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Are you sure I have pets?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("do not have"),
"Expected no-information reaffirmation after targeted memory re-check, got: {}",
response
);
assert!(
harness.provider.call_count().await <= 4,
"Challenge turn should stay bounded and not spiral"
);
let history = harness.state.get_history("test_session", 50).await.unwrap();
let browser_tool_msgs: Vec<&crate::traits::Message> = history
.iter()
.filter(|m| m.role == "tool" && m.tool_name.as_deref() == Some("browser"))
.collect();
let scoped_block = !browser_tool_msgs.is_empty()
&& browser_tool_msgs.iter().all(|m| {
m.content.as_deref().is_some_and(|c| {
c.contains("Personal-memory recall")
|| c.contains("not a real tool")
|| c.contains("Unknown tool")
|| c.contains("should be answered directly in plain text")
})
});
assert!(
scoped_block,
"Expected out-of-scope browser tool call to be blocked for personal recall turn"
);
}
#[tokio::test]
async fn test_personal_recall_challenge_inherits_previous_turn_context() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("I don't have information about pets."),
MockProvider::text_response("I still do not have that information saved in memory."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let first = harness
.agent
.handle_message(
"test_session",
"What about pets?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
first.contains("don't have information about pets"),
"Expected personal-recall context, got: {}",
first
);
let second = harness
.agent
.handle_message(
"test_session",
"Are you sure?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
second.contains("do not have") || second.contains("requires running tools"),
"Expected no-information reaffirmation or tools-unavailable message, got: {}",
second
);
assert!(
harness.provider.call_count().await <= 3,
"Follow-up challenge should stay bounded and not spiral"
);
}
#[tokio::test]
async fn test_general_reaffirmation_challenge_injects_prior_answer_anchor() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("There are 3 R's in strawberry."),
MockProvider::text_response("Yes — strawberry has 3 R's."),
]);
let harness = setup_test_agent(provider).await.unwrap();
let first = harness
.agent
.handle_message(
"reaffirm_anchor_test",
"How many R's in strawberry?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
first.contains("3 R"),
"Expected strawberry answer, got: {}",
first
);
let _second = harness
.agent
.handle_message(
"reaffirm_anchor_test",
"Are you sure?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let challenge_call = call_log.last().expect("challenge turn LLM call");
let has_anchor = challenge_call.messages.iter().any(|message| {
message
.get("content")
.and_then(|content| content.as_str())
.is_some_and(|text| {
text.contains("REAFFIRMATION CHALLENGE")
&& text.contains("How many R's in strawberry?")
&& text.contains("There are 3 R's in strawberry.")
})
});
assert!(
has_anchor,
"Challenge turn should inject reaffirmation anchor for the immediately previous exchange: {:?}",
challenge_call.messages
);
}
#[tokio::test]
async fn test_compound_message_with_challenge_keyword_skips_reaffirmation_anchor() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response("There are 3 R's in strawberry."),
MockProvider::text_response("Here is the blog post about Ecuador."),
]);
let harness = setup_test_agent(provider).await.unwrap();
harness
.agent
.handle_message(
"reaffirm_anchor_negative_test",
"How many R's in strawberry?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
harness
.agent
.handle_message(
"reaffirm_anchor_negative_test",
"I really need you to write a blog post about Ecuador",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let call_log = harness.provider.call_log.lock().await;
let second_call = call_log.last().expect("second turn LLM call");
let has_anchor = second_call.messages.iter().any(|message| {
message
.get("content")
.and_then(|content| content.as_str())
.is_some_and(|text| text.contains("REAFFIRMATION CHALLENGE"))
});
assert!(
!has_anchor,
"Compound new-task message must not be pinned to the previous exchange: {:?}",
second_call.messages
);
}
#[tokio::test]
async fn test_orchestration_scheduled_one_shot_creates_pending_confirmation() {
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I'll schedule that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"in 2h\",\"schedule_type\":\"one_shot\"}",
)]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"deploy in 2 hours",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("Reply **confirm**"),
"Expected confirmation prompt for scheduled goal"
);
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].goal_type, "finite");
assert_eq!(goals[0].status, "pending_confirmation");
let schedules = harness
.state
.get_schedules_for_goal(&goals[0].id)
.await
.unwrap();
assert_eq!(schedules.len(), 1);
assert!(schedules[0].is_one_shot);
}
#[tokio::test]
async fn test_orchestration_scheduled_malformed_schedule_recovers_from_user_text() {
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"Sure.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"2 minutes\",\"schedule_type\":\"one_shot\"}",
)]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"check disk space in 2 minutes",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("Reply **confirm**"),
"Expected confirmation prompt for scheduled goal"
);
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].status, "pending_confirmation");
let schedules = harness
.state
.get_schedules_for_goal(&goals[0].id)
.await
.unwrap();
assert_eq!(schedules.len(), 1);
assert!(schedules[0].is_one_shot);
}
#[tokio::test]
async fn test_orchestration_scheduled_recurring_creates_pending_confirmation() {
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I'll schedule recurring monitoring.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"every 6h\",\"schedule_type\":\"recurring\"}",
)]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"monitor API health every 6h",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("Reply **confirm**"),
"Expected confirmation prompt for recurring schedule"
);
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].goal_type, "continuous");
assert_eq!(goals[0].status, "pending_confirmation");
assert_eq!(goals[0].budget_per_check, Some(100_000));
assert_eq!(goals[0].budget_daily, Some(500_000));
let schedules = harness
.state
.get_schedules_for_goal(&goals[0].id)
.await
.unwrap();
assert_eq!(schedules.len(), 1);
assert!(!schedules[0].is_one_shot);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_creates_two_pending_goals() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"1) every day at 9am remind me to check server health. 2) in 2 hours send status report",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("Reply **confirm**"),
"Expected confirmation prompt for multi-schedule request"
);
assert!(
response.contains("2 goals"),
"Expected batch confirmation text, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 2);
assert!(
goals.iter().all(|goal| goal.status == "pending_confirmation"),
"All goals should await confirmation"
);
assert!(
goals.iter().any(|goal| goal.description == "Check server health"),
"Expected cleaned recurring description"
);
assert!(
goals.iter().any(|goal| goal.description == "Send status report"),
"Expected cleaned one-shot description"
);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_confirm_and_cancel() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"test_session_confirm",
"1) every day at 9am check server health. 2) in 2 hours send status report",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let confirm_response = harness
.agent
.handle_message(
"test_session_confirm",
"confirm",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
confirm_response.contains("Scheduled 2 goals"),
"Expected batch activation, got: {confirm_response}"
);
let confirm_goals = harness
.state
.get_goals_for_session("test_session_confirm")
.await
.unwrap();
assert_eq!(confirm_goals.len(), 2);
assert!(confirm_goals.iter().all(|goal| goal.status == "active"));
let _ = harness
.agent
.handle_message(
"test_session_cancel",
"1) every day at 9am check server health. 2) in 2 hours send status report",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let cancel_response = harness
.agent
.handle_message(
"test_session_cancel",
"cancel",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
cancel_response.contains("cancelled 2 goals"),
"Expected batch cancellation, got: {cancel_response}"
);
let cancel_goals = harness
.state
.get_goals_for_session("test_session_cancel")
.await
.unwrap();
assert_eq!(cancel_goals.len(), 2);
assert!(
cancel_goals
.iter()
.all(|goal| goal.status == "cancelled")
);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_auto_confirms_when_session_preapproved() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
harness
.agent
.set_test_schedule_approval_for_session("test_session_preapproved_multi", true)
.await;
let response = harness
.agent
.handle_message(
"test_session_preapproved_multi",
"1) every day at 9am check server health. 2) in 2 hours send status report",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("Scheduled 2 goals"),
"Expected auto-confirmed batch activation, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session_preapproved_multi")
.await
.unwrap();
assert_eq!(goals.len(), 2);
assert!(goals.iter().all(|goal| goal.status == "active"));
assert_eq!(
harness.provider.call_count().await,
0,
"Auto-approved scheduling should not require LLM calls"
);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_rejects_too_many_segments() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let request = (1..=11)
.map(|n| format!("{n}) in {n}h run task {n}"))
.collect::<Vec<_>>()
.join(". ");
let response = harness
.agent
.handle_message(
"test_session_too_many_segments",
&request,
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("up to 10 goals per message"),
"Expected segment-limit guidance, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session_too_many_segments")
.await
.unwrap();
assert!(
goals.is_empty(),
"Segment-limit rejection should not create any goals"
);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_invalid_segment_rejected_without_partial_goal_creation(
) {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session_invalid_multi",
"1) every day at 9am check server health. 2) every 0m send status report",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("couldn't parse one of the schedules"),
"Expected direct invalid-segment response, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session_invalid_multi")
.await
.unwrap();
assert!(
goals.is_empty(),
"Invalid multi-schedule input should not create partial goals"
);
assert!(
harness.provider.call_count().await == 0,
"Invalid multi-schedule input should not enter the LLM fallback loop"
);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_invalid_first_segment_creates_no_goals() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session_invalid_first_multi",
"1) every 0m check server health. 2) in 2 hours send status report",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("couldn't parse one of the schedules"),
"Expected direct invalid-segment response, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session_invalid_first_multi")
.await
.unwrap();
assert!(
goals.is_empty(),
"Invalid first segment should prevent all goal creation"
);
assert_eq!(
harness.provider.call_count().await,
0,
"Invalid first segment should not trigger the LLM fallback loop"
);
}
#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_invalid_middle_segment_creates_no_goals() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session_invalid_middle_multi",
"1) every day at 9am check server health. 2) every 0m send status report. 3) in 3 hours check alerts",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
response.contains("couldn't parse one of the schedules"),
"Expected direct invalid-segment response, got: {response}"
);
let goals = harness
.state
.get_goals_for_session("test_session_invalid_middle_multi")
.await
.unwrap();
assert!(
goals.is_empty(),
"Invalid middle segment should prevent all goal creation"
);
assert_eq!(
harness.provider.call_count().await,
0,
"Invalid middle segment should not trigger the LLM fallback loop"
);
}
#[tokio::test]
async fn test_orchestration_scheduled_single_description_uses_current_turn_task_text() {
let provider = MockProvider::new();
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"test_session_description_contamination",
"What is my daily budget?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"test_session_description_contamination",
"tomorrow at 11:09pm EST remind me to check logs",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let goals = harness
.state
.get_goals_for_session("test_session_description_contamination")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].description, "Check logs");
assert!(
!goals[0].description.contains("daily budget"),
"Description should not include unrelated prior turn text"
);
}
#[tokio::test]
async fn test_orchestration_schedule_confirm_activates_goal() {
let provider = MockProvider::new(); let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"test_session",
"deploy in 2 hours",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let confirm_response = harness
.agent
.handle_message(
"test_session",
"confirm",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(confirm_response.contains("Scheduled:"));
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].status, "active");
assert_eq!(
harness.provider.call_count().await,
0,
"Both schedule creation (deterministic) and confirm (gate) should work without LLM calls"
);
}
#[tokio::test]
async fn test_orchestration_schedule_cancel_removes_goal() {
let provider = MockProvider::with_responses(vec![MockProvider::text_response(
"I'll schedule that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"in 2h\",\"schedule_type\":\"one_shot\"}",
)]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"test_session",
"deploy in 2 hours",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let cancel_response = harness
.agent
.handle_message(
"test_session",
"cancel",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(cancel_response.contains("cancelled"));
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].status, "cancelled");
}
#[tokio::test]
async fn test_orchestration_targeted_cancel_text_does_not_auto_cancel_session_goal() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"Understood.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"cancel_intent\":true,\"cancel_scope\":\"targeted\",\"complexity\":\"simple\"}",
),
MockProvider::tool_call_response("system_info", "{}"),
MockProvider::text_response("Please share the goal ID to cancel that specific goal."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let morning_goal = Goal::new_continuous(
"Send me a slack message at 7:00 am EST tomorrow with a positive message",
"test_session",
Some(2000),
Some(20000),
);
harness.state.create_goal(&morning_goal).await.unwrap();
let english_goal = Goal::new_continuous(
"English Research: Researching English pronunciation/phonetics for Spanish speakers",
"other_session",
Some(2000),
Some(20000),
);
harness.state.create_goal(&english_goal).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"cancel this goal: English Research: Researching English",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert_eq!(
response,
"Please share the goal ID to cancel that specific goal."
);
assert_eq!(
harness.provider.call_count().await,
3,
"Targeted cancel text should not trigger session-wide auto-cancel shortcut"
);
let morning_after = harness
.state
.get_goal(&morning_goal.id)
.await
.unwrap()
.unwrap();
let english_after = harness
.state
.get_goal(&english_goal.id)
.await
.unwrap()
.unwrap();
assert_eq!(morning_after.status, "active");
assert_eq!(english_after.status, "active");
}
#[tokio::test]
async fn test_orchestration_schedule_new_message_cancels_pending() {
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(
"I'll schedule that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"in 2h\",\"schedule_type\":\"one_shot\"}",
),
MockProvider::text_response(
"[INTENT_GATE] {\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"complexity\":\"knowledge\"}",
),
MockProvider::text_response("Rust is a systems programming language."),
]);
let harness = setup_test_agent_orchestrator(provider).await.unwrap();
let _ = harness
.agent
.handle_message(
"test_session",
"deploy in 2 hours",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let _ = harness
.agent
.handle_message(
"test_session",
"what is rust?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
let goals = harness
.state
.get_goals_for_session("test_session")
.await
.unwrap();
assert_eq!(goals.len(), 1);
assert_eq!(goals[0].status, "cancelled");
}
#[tokio::test]
async fn test_zero_tool_fabricated_mutation_claim_is_blocked() {
let fabrication = "I have deleted the folder /tmp/fab-test entirely.";
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(fabrication),
MockProvider::text_response(fabrication),
MockProvider::text_response(
"I could not verify the deletion because no command was run.",
),
]);
let harness = setup_test_agent(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"test_session",
"Delete the folder /tmp/fab-test entirely.",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!response.contains("I have deleted"),
"fabricated zero-tool mutation claim was accepted as completion: {response}"
);
let calls = harness.provider.call_log.lock().await;
assert!(
calls.len() >= 2,
"completion was accepted on the first iteration (calls={})",
calls.len()
);
let nudged = calls.iter().skip(1).any(|c| {
c.messages.iter().any(|m| {
m["content"]
.as_str()
.is_some_and(|t| t.contains("MUST include at least one tool call"))
})
});
assert!(
nudged,
"DeferredToolCallRequired directive was not injected after the fabricated claim"
);
let event_store = crate::events::EventStore::new(harness.state.pool())
.await
.expect("event store from harness pool");
let events = event_store
.query_recent_events("test_session", 200)
.await
.expect("recent events");
let saw_mutation_gate_warning = events.iter().any(|event| {
let Ok(data) = event.parse_data::<crate::events::DecisionPointData>() else {
return false;
};
data.decision_type == crate::events::DecisionType::PostExecutionValidation
&& data
.metadata
.get("condition")
.and_then(serde_json::Value::as_str)
== Some("expects_mutation_gate_evaluated")
&& data
.metadata
.get("assistant_claimed_mutation")
.and_then(serde_json::Value::as_bool)
== Some(true)
&& data
.metadata
.get("mutation_tool_calls_count")
.and_then(serde_json::Value::as_u64)
== Some(0)
&& data
.metadata
.get("outcome")
.and_then(serde_json::Value::as_str)
== Some("blocked_claimed_mutation_without_tool")
});
assert!(
saw_mutation_gate_warning,
"mutation gate did not emit explicit warning telemetry for fabricated zero-tool mutation claim"
);
}
#[tokio::test]
async fn test_zero_tool_fabricated_delegation_claim_is_blocked() {
let fabrication =
"I've initiated a deep analysis using a specialized review agent. I'll return shortly.";
let provider = MockProvider::with_responses(vec![
MockProvider::text_response(fabrication),
MockProvider::text_response(
"I could not start a specialist agent, so no delegated review is running.",
),
]);
let harness = setup_test_agent(provider).await.unwrap();
let response = harness
.agent
.handle_message(
"fabricated_delegation",
"Analyze that resume. Any flaws?",
None,
UserRole::Owner,
ChannelContext::private("test"),
None,
)
.await
.unwrap();
assert!(
!response.contains("I've initiated"),
"fabricated zero-tool delegation claim was accepted: {response}"
);
let calls = harness.provider.call_log.lock().await;
assert!(calls.len() >= 2);
assert!(calls.iter().skip(1).any(|call| {
call.messages.iter().any(|message| {
message["content"]
.as_str()
.is_some_and(|text| text.contains("MUST include at least one tool call"))
})
}));
}