aidaemon 0.11.1

// ==================== Orchestration Integration Tests ====================

#[tokio::test]
async fn test_orchestration_uniform_models_no_routing() {
    // With uniform models (no router), first-pass orchestration does not
    // activate, so simple messages get direct responses.
    let provider = MockProvider::new(); // Returns "Mock response"
    let harness = setup_test_agent(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Hello!",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Mock response");

    // No goals — uniform models bypass orchestration routing
    let goals = harness.state.get_active_goals().await.unwrap();
    assert!(goals.is_empty(), "No goals with uniform models");
}

#[tokio::test]
async fn test_orchestration_simple_falls_through_to_full_loop() {
    // Orchestration enabled, non-uniform models -> first routing pass activates.
    // Routing says "needs tools" (not can_answer_now), simple task -> full agent loop
    let provider = MockProvider::with_responses(vec![
        // 1st call: first routing pass -> analysis that triggers needs_tools
        MockProvider::text_response(
            "I'll help you check the system info.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
        ),
        // 2nd call: full agent loop — tool call
        MockProvider::tool_call_response("system_info", "{}"),
        // 3rd call: full agent loop — final response
        MockProvider::text_response("Your system is running macOS."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "check system info",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // Should get the full agent loop's response
    assert_eq!(response, "Your system is running macOS.");

    // No goals should be created (simple tasks don't create goals)
    let goals = harness.state.get_active_goals().await.unwrap();
    assert!(goals.is_empty(), "Simple tasks should not create goals");
}

#[tokio::test]
async fn test_orchestration_complex_creates_goal() {
    // Deterministic pre-routing classifies the request as complex based on
    // keyword heuristics (action markers + compound keywords), creates a goal,
    // and spawns a task lead synchronously (no self_ref in tests).
    // No orchestration LLM call — routing is purely deterministic.
    let provider = MockProvider::with_responses(vec![
        // Task lead's LLM call — deterministic routing creates the goal
        // without an LLM call, then the task lead runs the agent loop.
        MockProvider::text_response("I'll start working on building your website."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    // User text must trigger looks_like_complex_request_fallback():
    // >=3 action markers (analyze, compare, identify, find, summarize) + compound ("and"/"then")
    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Analyze the requirements and compare authentication libraries, then identify the best frameworks, find suitable database solutions, and summarize the deployment options for a full-stack website with CI/CD pipeline",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // Should get a response (the exact text depends on how many LLM calls the agent loop makes)
    assert!(!response.is_empty(), "Should return a non-empty response");

    // The key assertion: a goal should have been created
    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1, "Complex request should create a goal");
    // Text-only task-lead replies must not auto-complete the goal without finished tasks.
    assert_eq!(goals[0].status, "active");
    assert!(goals[0]
        .description
        .contains("Analyze the requirements"));
}

#[tokio::test]
async fn test_orchestration_complex_internal_maintenance_does_not_create_goal() {
    // Deterministic routing classifies this as complex (action markers trigger
    // looks_like_complex_request_fallback), but handle_complex_intent detects
    // maintenance keywords and returns a canned response without creating a goal.
    // No LLM calls needed — both routing and maintenance detection are deterministic.
    let provider = MockProvider::new(); // No scripted responses needed
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    // User text triggers complex classification (analyze, identify, find, report = 4 action
    // markers + "and"/"then") AND matches is_internal_maintenance_intent (process embeddings
    // + consolidate memories + decay old facts).
    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Analyze the knowledge base and identify stale entries, then process embeddings, consolidate memories, find outdated data, and report on decay old facts",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("already runs via built-in background jobs"),
        "Expected maintenance-routing response, got: {response}"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert!(
        goals.is_empty(),
        "Internal maintenance intent should not create a goal"
    );
}

#[tokio::test]
async fn test_orchestration_simple_stall_detection_in_full_loop() {
    // Simple tasks now go through full agent loop which has its own stall detection.
    // After the first routing pass, repeated identical tool calls should be detected.
    let provider = MockProvider::with_responses(vec![
        // 1st call: first routing pass
        MockProvider::text_response(
            "I'll run a command for you.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
        ),
        // 2nd call: intent gate narration (full loop requires narration on first tool iteration)
        MockProvider::tool_call_response("system_info", "{}"),
        // Repeated identical tool calls — stall detection should kick in
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        // Enough repetitions to trigger stall detection
        MockProvider::text_response("Should not reach here"),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "run a quick check",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // Full loop stall detection produces graceful responses
    assert!(
        !response.is_empty(),
        "Should return a non-empty response even on stall"
    );
}

#[tokio::test]
async fn test_orchestration_simple_uses_full_loop_with_all_tools() {
    // Simple tasks now use the full agent loop with all tools available.
    // Verify the agent can complete a simple task through the full loop.
    let provider = MockProvider::with_responses(vec![
        // 1st call: first routing pass (INTENT_GATE classification)
        MockProvider::text_response(
            "I'll help with that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
        ),
        // 2nd call: tool prelude forces tool use after text-only INTENT_GATE
        MockProvider::tool_call_response("system_info", "{}"),
        // 3rd-5th calls: final response, repeated to survive mutation-contract
        // nudges ("run" triggers expects_mutation=true → up to 2 extra iterations
        // before text response is accepted).
        MockProvider::text_response("Diagnostics complete. All systems normal."),
        MockProvider::text_response("Diagnostics complete. All systems normal."),
        MockProvider::text_response("Diagnostics complete. All systems normal."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "run diagnostics",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(response, "Diagnostics complete. All systems normal.");
}

#[tokio::test]
async fn test_personal_recall_challenge_scopes_tools_and_reaffirms() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "Let me verify memory first.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[]}",
        ),
        {
            let mut resp = MockProvider::tool_call_response(
                "browser",
                r#"{"action":"navigate","url":"https://example.com"}"#,
            );
            resp.content = Some("I'll check additional sources.".to_string());
            resp
        },
        {
            let mut resp = MockProvider::tool_call_response(
                "manage_people",
                r#"{"action":"view","person_name":"__unknown_person_for_recall_guardrail__"}"#,
            );
            resp.content = Some("I'll re-check your stored people data.".to_string());
            resp
        },
        MockProvider::text_response("I still do not have that information saved in memory."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Are you sure I have pets?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("do not have"),
        "Expected no-information reaffirmation after targeted memory re-check, got: {}",
        response
    );
    assert!(
        harness.provider.call_count().await <= 4,
        "Challenge turn should stay bounded and not spiral"
    );

    let history = harness.state.get_history("test_session", 50).await.unwrap();
    let browser_tool_msgs: Vec<&crate::traits::Message> = history
        .iter()
        .filter(|m| m.role == "tool" && m.tool_name.as_deref() == Some("browser"))
        .collect();
    let scoped_block = !browser_tool_msgs.is_empty()
        && browser_tool_msgs.iter().all(|m| {
            m.content.as_deref().is_some_and(|c| {
                c.contains("Personal-memory recall")
                    || c.contains("not a real tool")
                    || c.contains("Unknown tool")
                    || c.contains("should be answered directly in plain text")
            })
        });
    // The browser tool call may be blocked by the personal-recall scope
    // guard OR by the text-only prelude check (for non-mutation turns).
    // Either path prevents execution.
    assert!(
        scoped_block,
        "Expected out-of-scope browser tool call to be blocked for personal recall turn"
    );
}

#[tokio::test]
async fn test_personal_recall_challenge_inherits_previous_turn_context() {
    // With deterministic routing (no orchestration LLM pass), each turn makes a
    // single LLM call through the normal agent loop. The deterministic routing
    // classifies both messages as Simple (no action markers, no schedule) and
    // falls through to the execution loop.
    let provider = MockProvider::with_responses(vec![
        // Turn 1: execution loop — direct answer
        MockProvider::text_response("I don't have information about pets."),
        // Turn 2: execution loop — reaffirmation
        MockProvider::text_response("I still do not have that information saved in memory."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let first = harness
        .agent
        .handle_message(
            "test_session",
            "What about pets?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(
        first.contains("don't have information about pets"),
        "Expected personal-recall context, got: {}",
        first
    );

    let second = harness
        .agent
        .handle_message(
            "test_session",
            "Are you sure?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(
        second.contains("do not have") || second.contains("requires running tools"),
        "Expected no-information reaffirmation or tools-unavailable message, got: {}",
        second
    );
    // No text-only pre-pass: 1 LLM call per turn x 2 turns = 2
    assert!(
        harness.provider.call_count().await <= 3,
        "Follow-up challenge should stay bounded and not spiral"
    );
}

#[tokio::test]
async fn test_general_reaffirmation_challenge_injects_prior_answer_anchor() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response("There are 3 R's in strawberry."),
        MockProvider::text_response("Yes — strawberry has 3 R's."),
    ]);
    let harness = setup_test_agent(provider).await.unwrap();

    let first = harness
        .agent
        .handle_message(
            "reaffirm_anchor_test",
            "How many R's in strawberry?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(
        first.contains("3 R"),
        "Expected strawberry answer, got: {}",
        first
    );

    let _second = harness
        .agent
        .handle_message(
            "reaffirm_anchor_test",
            "Are you sure?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let call_log = harness.provider.call_log.lock().await;
    let challenge_call = call_log.last().expect("challenge turn LLM call");
    let has_anchor = challenge_call.messages.iter().any(|message| {
        message
            .get("content")
            .and_then(|content| content.as_str())
            .is_some_and(|text| {
                text.contains("REAFFIRMATION CHALLENGE")
                    && text.contains("How many R's in strawberry?")
                    && text.contains("There are 3 R's in strawberry.")
            })
    });
    assert!(
        has_anchor,
        "Challenge turn should inject reaffirmation anchor for the immediately previous exchange: {:?}",
        challenge_call.messages
    );
}

#[tokio::test]
async fn test_compound_message_with_challenge_keyword_skips_reaffirmation_anchor() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response("There are 3 R's in strawberry."),
        MockProvider::text_response("Here is the blog post about Ecuador."),
    ]);
    let harness = setup_test_agent(provider).await.unwrap();

    harness
        .agent
        .handle_message(
            "reaffirm_anchor_negative_test",
            "How many R's in strawberry?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // Contains the word "really" but is a new task, not a vague challenge of
    // the previous answer — the anchor directive must NOT be injected.
    harness
        .agent
        .handle_message(
            "reaffirm_anchor_negative_test",
            "I really need you to write a blog post about Ecuador",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let call_log = harness.provider.call_log.lock().await;
    let second_call = call_log.last().expect("second turn LLM call");
    let has_anchor = second_call.messages.iter().any(|message| {
        message
            .get("content")
            .and_then(|content| content.as_str())
            .is_some_and(|text| text.contains("REAFFIRMATION CHALLENGE"))
    });
    assert!(
        !has_anchor,
        "Compound new-task message must not be pinned to the previous exchange: {:?}",
        second_call.messages
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_one_shot_creates_pending_confirmation() {
    let provider = MockProvider::with_responses(vec![MockProvider::text_response(
        "I'll schedule that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"in 2h\",\"schedule_type\":\"one_shot\"}",
    )]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "deploy in 2 hours",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("Reply **confirm**"),
        "Expected confirmation prompt for scheduled goal"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].goal_type, "finite");
    assert_eq!(goals[0].status, "pending_confirmation");
    let schedules = harness
        .state
        .get_schedules_for_goal(&goals[0].id)
        .await
        .unwrap();
    assert_eq!(schedules.len(), 1);
    assert!(schedules[0].is_one_shot);
}

#[tokio::test]
async fn test_orchestration_scheduled_malformed_schedule_recovers_from_user_text() {
    // E2E regression: LLM sometimes emits schedule="2 minutes" while the user
    // said "in 2 minutes". The scheduler path should still be taken.
    let provider = MockProvider::with_responses(vec![MockProvider::text_response(
        "Sure.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"2 minutes\",\"schedule_type\":\"one_shot\"}",
    )]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "check disk space in 2 minutes",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("Reply **confirm**"),
        "Expected confirmation prompt for scheduled goal"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].status, "pending_confirmation");
    let schedules = harness
        .state
        .get_schedules_for_goal(&goals[0].id)
        .await
        .unwrap();
    assert_eq!(schedules.len(), 1);
    assert!(schedules[0].is_one_shot);
}

#[tokio::test]
async fn test_orchestration_scheduled_recurring_creates_pending_confirmation() {
    let provider = MockProvider::with_responses(vec![MockProvider::text_response(
        "I'll schedule recurring monitoring.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"every 6h\",\"schedule_type\":\"recurring\"}",
    )]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "monitor API health every 6h",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("Reply **confirm**"),
        "Expected confirmation prompt for recurring schedule"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].goal_type, "continuous");
    assert_eq!(goals[0].status, "pending_confirmation");
    assert_eq!(goals[0].budget_per_check, Some(100_000));
    assert_eq!(goals[0].budget_daily, Some(500_000));
    let schedules = harness
        .state
        .get_schedules_for_goal(&goals[0].id)
        .await
        .unwrap();
    assert_eq!(schedules.len(), 1);
    assert!(!schedules[0].is_one_shot);
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_creates_two_pending_goals() {
    let provider = MockProvider::new(); // Deterministic schedule heuristics path
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "1) every day at 9am remind me to check server health. 2) in 2 hours send status report",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("Reply **confirm**"),
        "Expected confirmation prompt for multi-schedule request"
    );
    assert!(
        response.contains("2 goals"),
        "Expected batch confirmation text, got: {response}"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 2);
    assert!(
        goals.iter().all(|goal| goal.status == "pending_confirmation"),
        "All goals should await confirmation"
    );
    assert!(
        goals.iter().any(|goal| goal.description == "Check server health"),
        "Expected cleaned recurring description"
    );
    assert!(
        goals.iter().any(|goal| goal.description == "Send status report"),
        "Expected cleaned one-shot description"
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_confirm_and_cancel() {
    let provider = MockProvider::new(); // Deterministic schedule heuristics path
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session_confirm",
            "1) every day at 9am check server health. 2) in 2 hours send status report",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    let confirm_response = harness
        .agent
        .handle_message(
            "test_session_confirm",
            "confirm",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(
        confirm_response.contains("Scheduled 2 goals"),
        "Expected batch activation, got: {confirm_response}"
    );
    let confirm_goals = harness
        .state
        .get_goals_for_session("test_session_confirm")
        .await
        .unwrap();
    assert_eq!(confirm_goals.len(), 2);
    assert!(confirm_goals.iter().all(|goal| goal.status == "active"));

    let _ = harness
        .agent
        .handle_message(
            "test_session_cancel",
            "1) every day at 9am check server health. 2) in 2 hours send status report",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    let cancel_response = harness
        .agent
        .handle_message(
            "test_session_cancel",
            "cancel",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(
        cancel_response.contains("cancelled 2 goals"),
        "Expected batch cancellation, got: {cancel_response}"
    );
    let cancel_goals = harness
        .state
        .get_goals_for_session("test_session_cancel")
        .await
        .unwrap();
    assert_eq!(cancel_goals.len(), 2);
    assert!(
        cancel_goals
            .iter()
            .all(|goal| goal.status == "cancelled")
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_auto_confirms_when_session_preapproved() {
    let provider = MockProvider::new(); // Deterministic schedule heuristics path
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();
    harness
        .agent
        .set_test_schedule_approval_for_session("test_session_preapproved_multi", true)
        .await;

    let response = harness
        .agent
        .handle_message(
            "test_session_preapproved_multi",
            "1) every day at 9am check server health. 2) in 2 hours send status report",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("Scheduled 2 goals"),
        "Expected auto-confirmed batch activation, got: {response}"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session_preapproved_multi")
        .await
        .unwrap();
    assert_eq!(goals.len(), 2);
    assert!(goals.iter().all(|goal| goal.status == "active"));
    assert_eq!(
        harness.provider.call_count().await,
        0,
        "Auto-approved scheduling should not require LLM calls"
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_rejects_too_many_segments() {
    let provider = MockProvider::new(); // Deterministic schedule heuristics path
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let request = (1..=11)
        .map(|n| format!("{n}) in {n}h run task {n}"))
        .collect::<Vec<_>>()
        .join(". ");

    let response = harness
        .agent
        .handle_message(
            "test_session_too_many_segments",
            &request,
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("up to 10 goals per message"),
        "Expected segment-limit guidance, got: {response}"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session_too_many_segments")
        .await
        .unwrap();
    assert!(
        goals.is_empty(),
        "Segment-limit rejection should not create any goals"
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_invalid_segment_rejected_without_partial_goal_creation(
) {
    let provider = MockProvider::new(); // Deterministic scheduling path should reject directly
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session_invalid_multi",
            "1) every day at 9am check server health. 2) every 0m send status report",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("couldn't parse one of the schedules"),
        "Expected direct invalid-segment response, got: {response}"
    );

    let goals = harness
        .state
        .get_goals_for_session("test_session_invalid_multi")
        .await
        .unwrap();
    assert!(
        goals.is_empty(),
        "Invalid multi-schedule input should not create partial goals"
    );
    assert!(
        harness.provider.call_count().await == 0,
        "Invalid multi-schedule input should not enter the LLM fallback loop"
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_invalid_first_segment_creates_no_goals() {
    let provider = MockProvider::new(); // Deterministic scheduling path should reject directly
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session_invalid_first_multi",
            "1) every 0m check server health. 2) in 2 hours send status report",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("couldn't parse one of the schedules"),
        "Expected direct invalid-segment response, got: {response}"
    );
    let goals = harness
        .state
        .get_goals_for_session("test_session_invalid_first_multi")
        .await
        .unwrap();
    assert!(
        goals.is_empty(),
        "Invalid first segment should prevent all goal creation"
    );
    assert_eq!(
        harness.provider.call_count().await,
        0,
        "Invalid first segment should not trigger the LLM fallback loop"
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_multi_segment_invalid_middle_segment_creates_no_goals() {
    let provider = MockProvider::new(); // Deterministic scheduling path should reject directly
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session_invalid_middle_multi",
            "1) every day at 9am check server health. 2) every 0m send status report. 3) in 3 hours check alerts",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        response.contains("couldn't parse one of the schedules"),
        "Expected direct invalid-segment response, got: {response}"
    );
    let goals = harness
        .state
        .get_goals_for_session("test_session_invalid_middle_multi")
        .await
        .unwrap();
    assert!(
        goals.is_empty(),
        "Invalid middle segment should prevent all goal creation"
    );
    assert_eq!(
        harness.provider.call_count().await,
        0,
        "Invalid middle segment should not trigger the LLM fallback loop"
    );
}

#[tokio::test]
async fn test_orchestration_scheduled_single_description_uses_current_turn_task_text() {
    let provider = MockProvider::new();
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session_description_contamination",
            "What is my daily budget?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session_description_contamination",
            "tomorrow at 11:09pm EST remind me to check logs",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let goals = harness
        .state
        .get_goals_for_session("test_session_description_contamination")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].description, "Check logs");
    assert!(
        !goals[0].description.contains("daily budget"),
        "Description should not include unrelated prior turn text"
    );
}

#[tokio::test]
async fn test_orchestration_schedule_confirm_activates_goal() {
    // Deterministic routing detects "in 2 hours" via schedule heuristic (no LLM call).
    // "confirm" is handled by the confirmation gate (also no LLM call).
    // Total LLM calls across both messages = 0.
    let provider = MockProvider::new(); // No scripted responses needed
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session",
            "deploy in 2 hours",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let confirm_response = harness
        .agent
        .handle_message(
            "test_session",
            "confirm",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(confirm_response.contains("Scheduled:"));

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].status, "active");
    assert_eq!(
        harness.provider.call_count().await,
        0,
        "Both schedule creation (deterministic) and confirm (gate) should work without LLM calls"
    );
}

#[tokio::test]
async fn test_orchestration_schedule_cancel_removes_goal() {
    let provider = MockProvider::with_responses(vec![MockProvider::text_response(
        "I'll schedule that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"in 2h\",\"schedule_type\":\"one_shot\"}",
    )]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session",
            "deploy in 2 hours",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let cancel_response = harness
        .agent
        .handle_message(
            "test_session",
            "cancel",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(cancel_response.contains("cancelled"));

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].status, "cancelled");
}

#[tokio::test]
async fn test_orchestration_targeted_cancel_text_does_not_auto_cancel_session_goal() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "Understood.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"cancel_intent\":true,\"cancel_scope\":\"targeted\",\"complexity\":\"simple\"}",
        ),
        // needs_tools=true blocks text-only responses, so executor must use a tool call
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Please share the goal ID to cancel that specific goal."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let morning_goal = Goal::new_continuous(
        "Send me a slack message at 7:00 am EST tomorrow with a positive message",
        "test_session",
        Some(2000),
        Some(20000),
    );
    harness.state.create_goal(&morning_goal).await.unwrap();

    let english_goal = Goal::new_continuous(
        "English Research: Researching English pronunciation/phonetics for Spanish speakers",
        "other_session",
        Some(2000),
        Some(20000),
    );
    harness.state.create_goal(&english_goal).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "cancel this goal: English Research: Researching English",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(
        response,
        "Please share the goal ID to cancel that specific goal."
    );
    assert_eq!(
        harness.provider.call_count().await,
        3,
        "Targeted cancel text should not trigger session-wide auto-cancel shortcut"
    );

    let morning_after = harness
        .state
        .get_goal(&morning_goal.id)
        .await
        .unwrap()
        .unwrap();
    let english_after = harness
        .state
        .get_goal(&english_goal.id)
        .await
        .unwrap()
        .unwrap();
    assert_eq!(morning_after.status, "active");
    assert_eq!(english_after.status, "active");
}

#[tokio::test]
async fn test_orchestration_schedule_new_message_cancels_pending() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "I'll schedule that.\n[INTENT_GATE] {\"can_answer_now\":false,\"needs_tools\":true,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"schedule\":\"in 2h\",\"schedule_type\":\"one_shot\"}",
        ),
        MockProvider::text_response(
            "[INTENT_GATE] {\"can_answer_now\":true,\"needs_tools\":false,\"needs_clarification\":false,\"clarifying_question\":\"\",\"missing_info\":[],\"complexity\":\"knowledge\"}",
        ),
        MockProvider::text_response("Rust is a systems programming language."),
    ]);
    let harness = setup_test_agent_orchestrator(provider).await.unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session",
            "deploy in 2 hours",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let _ = harness
        .agent
        .handle_message(
            "test_session",
            "what is rust?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let goals = harness
        .state
        .get_goals_for_session("test_session")
        .await
        .unwrap();
    assert_eq!(goals.len(), 1);
    assert_eq!(goals[0].status, "cancelled");
}

#[tokio::test]
async fn test_zero_tool_fabricated_mutation_claim_is_blocked() {
    // Reproduces the 2026-06-06 attribution-run turn-10 bug: the model
    // claimed "I have deleted the folder" without making a single tool
    // call, and the completion phase accepted it. A past-tense side-effect
    // claim in a zero-tool task with a mutation contract must be treated
    // like a deferred action: nudged with a hard tool requirement, never
    // accepted as the final answer.
    let fabrication = "I have deleted the folder /tmp/fab-test entirely.";
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(fabrication),
        MockProvider::text_response(fabrication),
        MockProvider::text_response(
            "I could not verify the deletion because no command was run.",
        ),
    ]);
    let harness = setup_test_agent(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "test_session",
            "Delete the folder /tmp/fab-test entirely.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // The fabricated claim must not survive as the final answer.
    assert!(
        !response.contains("I have deleted"),
        "fabricated zero-tool mutation claim was accepted as completion: {response}"
    );

    // The loop must have continued past the first reply, and the hard
    // tool-call requirement directive must have been injected.
    let calls = harness.provider.call_log.lock().await;
    assert!(
        calls.len() >= 2,
        "completion was accepted on the first iteration (calls={})",
        calls.len()
    );
    let nudged = calls.iter().skip(1).any(|c| {
        c.messages.iter().any(|m| {
            m["content"]
                .as_str()
                .is_some_and(|t| t.contains("MUST include at least one tool call"))
        })
    });
    assert!(
        nudged,
        "DeferredToolCallRequired directive was not injected after the fabricated claim"
    );

    let event_store = crate::events::EventStore::new(harness.state.pool())
        .await
        .expect("event store from harness pool");
    let events = event_store
        .query_recent_events("test_session", 200)
        .await
        .expect("recent events");
    let saw_mutation_gate_warning = events.iter().any(|event| {
        let Ok(data) = event.parse_data::<crate::events::DecisionPointData>() else {
            return false;
        };
        data.decision_type == crate::events::DecisionType::PostExecutionValidation
            && data
                .metadata
                .get("condition")
                .and_then(serde_json::Value::as_str)
                == Some("expects_mutation_gate_evaluated")
            && data
                .metadata
                .get("assistant_claimed_mutation")
                .and_then(serde_json::Value::as_bool)
                == Some(true)
            && data
                .metadata
                .get("mutation_tool_calls_count")
                .and_then(serde_json::Value::as_u64)
                == Some(0)
            && data
                .metadata
                .get("outcome")
                .and_then(serde_json::Value::as_str)
                == Some("blocked_claimed_mutation_without_tool")
    });
    assert!(
        saw_mutation_gate_warning,
        "mutation gate did not emit explicit warning telemetry for fabricated zero-tool mutation claim"
    );
}

#[tokio::test]
async fn test_zero_tool_fabricated_delegation_claim_is_blocked() {
    let fabrication =
        "I've initiated a deep analysis using a specialized review agent. I'll return shortly.";
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(fabrication),
        MockProvider::text_response(
            "I could not start a specialist agent, so no delegated review is running.",
        ),
    ]);
    let harness = setup_test_agent(provider).await.unwrap();

    let response = harness
        .agent
        .handle_message(
            "fabricated_delegation",
            "Analyze that resume. Any flaws?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        !response.contains("I've initiated"),
        "fabricated zero-tool delegation claim was accepted: {response}"
    );
    let calls = harness.provider.call_log.lock().await;
    assert!(calls.len() >= 2);
    assert!(calls.iter().skip(1).any(|call| {
        call.messages.iter().any(|message| {
            message["content"]
                .as_str()
                .is_some_and(|text| text.contains("MUST include at least one tool call"))
        })
    }));
}