aidaemon 0.11.3

use crate::agent::policy_metrics_snapshot;
use crate::testing::{
    setup_full_stack_test_agent_with_extra_tools, setup_test_agent,
    setup_test_agent_root_with_extra_tools_and_llm_timeout, setup_test_agent_with_models,
    MockProvider, MockTool,
};
use crate::traits::{
    ChatOptions, ProviderResponse, ResponseMode, TokenUsage, Tool, ToolCall, ToolCallMetadata,
    ToolCallOutcome, ToolCallSemantics, ToolChoiceMode, ToolTargetHintKind, ToolVerificationMode,
};
use crate::types::{ChannelContext, StatusUpdate, UserRole};
use async_trait::async_trait;
use serde_json::{json, Value};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use tokio::sync::Mutex;

#[tokio::test]
async fn response_metrics_capture_direct_return_and_fallthrough_paths() {
    let before = policy_metrics_snapshot();

    // Direct-return case (deterministic schedule routing before first LLM call).
    let direct_provider = MockProvider::with_responses(vec![]);
    let direct_harness =
        setup_test_agent_with_models(direct_provider, "primary-model", "smart-model")
            .await
            .unwrap();
    let direct_reply = direct_harness
        .agent
        .handle_message(
            "metrics_direct",
            "Check deployment tomorrow at 9am",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert!(
        direct_reply.contains("Reply **confirm** to proceed"),
        "expected schedule confirmation direct-return, got: {direct_reply}"
    );
    assert_eq!(
        direct_harness.provider.call_count().await,
        0,
        "expected deterministic pre-routing to avoid first LLM call"
    );

    // Fallthrough case (deterministic simple route continues into full tool loop).
    let fallthrough_provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("System inspected."),
    ]);
    let fallthrough_harness =
        setup_test_agent_with_models(fallthrough_provider, "primary-model", "smart-model")
            .await
            .unwrap();
    let fallthrough_reply = fallthrough_harness
        .agent
        .handle_message(
            "metrics_fallthrough",
            "Check my system status",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();
    assert_eq!(fallthrough_reply, "System inspected.");

    let after = policy_metrics_snapshot();
    let direct_delta = after
        .response_direct_return_total
        .saturating_sub(before.response_direct_return_total);
    let fallthrough_delta = after
        .response_fallthrough_total
        .saturating_sub(before.response_fallthrough_total);

    assert!(
        direct_delta >= 1,
        "expected response_direct_return_total to increase by at least 1; before={} after={}",
        before.response_direct_return_total,
        after.response_direct_return_total
    );
    assert!(
        fallthrough_delta >= 1,
        "expected response_fallthrough_total to increase by at least 1; before={} after={}",
        before.response_fallthrough_total,
        after.response_fallthrough_total
    );
}

#[tokio::test]
#[ignore = "tokens_failed_tasks_total / no_progress_iterations_total not yet wired to agent loop"]
async fn failed_task_and_no_progress_metrics_are_observable() {
    let before = policy_metrics_snapshot();

    // Iteration 1: unknown tool call (blocked) => no successful tools => no-progress increment.
    // Iterations 2..: repeated valid tool call => repetitive-loop failure path.
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("no_such_tool", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::tool_call_response("system_info", "{}"),
    ]);

    let harness = setup_test_agent(provider).await.unwrap();
    let _ = harness
        .agent
        .handle_message(
            "metrics_failure_no_progress",
            "Run system checks repeatedly",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    let after = policy_metrics_snapshot();
    let failed_tokens_delta = after
        .tokens_failed_tasks_total
        .saturating_sub(before.tokens_failed_tasks_total);
    let no_progress_delta = after
        .no_progress_iterations_total
        .saturating_sub(before.no_progress_iterations_total);

    assert!(
        failed_tokens_delta > 0,
        "expected tokens_failed_tasks_total to increase; before={} after={}",
        before.tokens_failed_tasks_total,
        after.tokens_failed_tasks_total
    );
    assert!(
        no_progress_delta >= 1,
        "expected no_progress_iterations_total to increase by at least 1; before={} after={}",
        before.no_progress_iterations_total,
        after.no_progress_iterations_total
    );
}

struct RecordingSearchFilesTool {
    calls: Arc<Mutex<Vec<String>>>,
}

#[async_trait]
impl Tool for RecordingSearchFilesTool {
    fn name(&self) -> &str {
        "search_files"
    }

    fn description(&self) -> &str {
        "Mock search_files tool for regression testing"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "search_files",
            "description": "Mock search",
            "parameters": {
                "type": "object",
                "properties": {
                    "glob": {"type": "string"},
                    "path": {"type": "string"}
                },
                "additionalProperties": true
            }
        })
    }

    async fn call(&self, arguments: &str) -> anyhow::Result<String> {
        self.calls.lock().await.push(arguments.to_string());
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let path = args["path"].as_str().unwrap_or(".");
        Ok(format!("No matches found (0 files scanned in {})", path))
    }
}

struct RecordingProjectInspectTool {
    calls: Arc<Mutex<Vec<String>>>,
}

#[async_trait]
impl Tool for RecordingProjectInspectTool {
    fn name(&self) -> &str {
        "project_inspect"
    }

    fn description(&self) -> &str {
        "Recording project_inspect tool for regression testing"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "project_inspect",
            "description": "Record project_inspect args",
            "parameters": {
                "type": "object",
                "properties": {
                    "path": {"type": "string"},
                    "paths": {"type": "array", "items": {"type": "string"}}
                },
                "additionalProperties": true
            }
        })
    }

    async fn call(&self, arguments: &str) -> anyhow::Result<String> {
        self.calls.lock().await.push(arguments.to_string());
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let primary = args["path"]
            .as_str()
            .or_else(|| {
                args["paths"]
                    .as_array()
                    .and_then(|arr| arr.first())
                    .and_then(|v| v.as_str())
            })
            .unwrap_or(".");
        Ok(format!(
            "# Project: {}\n\n## Structure\n```\nindex.html\nstyles.css\n```\n",
            primary
        ))
    }
}

struct MockProjectInspectTool;

#[async_trait]
impl Tool for MockProjectInspectTool {
    fn name(&self) -> &str {
        "project_inspect"
    }

    fn description(&self) -> &str {
        "Mock project_inspect tool for regression testing"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "project_inspect",
            "description": "Mock inspect",
            "parameters": {
                "type": "object",
                "properties": {
                    "path": {"type": "string"},
                    "paths": {"type": "array", "items": {"type": "string"}}
                },
                "additionalProperties": true
            }
        })
    }

    async fn call(&self, arguments: &str) -> anyhow::Result<String> {
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let path = args["path"].as_str().unwrap_or(".");
        Ok(format!(
            "# Project: {}\n\n## Structure\n```\nindex.html\nstyles.css\n```\n",
            path
        ))
    }
}

struct CountingSendFileTool {
    calls: Arc<AtomicUsize>,
}

#[async_trait]
impl Tool for CountingSendFileTool {
    fn name(&self) -> &str {
        "send_file"
    }

    fn description(&self) -> &str {
        "Mock send_file tool for force-text characterization"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "send_file",
            "description": "Mock send file",
            "parameters": {
                "type": "object",
                "properties": {
                    "file_path": {"type": "string"},
                    "caption": {"type": "string"}
                },
                "required": ["file_path"],
                "additionalProperties": false
            }
        })
    }

    async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
        self.calls.fetch_add(1, Ordering::SeqCst);
        Ok("File sent successfully.".to_string())
    }
}

struct BackgroundDetachTool;

#[async_trait]
impl Tool for BackgroundDetachTool {
    fn name(&self) -> &str {
        "background_task"
    }

    fn description(&self) -> &str {
        "Mock tool that detaches work to the background"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "background_task",
            "description": "Mock background detach",
            "parameters": {
                "type": "object",
                "properties": {
                    "job": {"type": "string"}
                },
                "additionalProperties": false
            }
        })
    }

    async fn call(&self, _arguments: &str) -> anyhow::Result<String> {
        Ok("Background job started.".to_string())
    }

    async fn call_with_status_outcome(
        &self,
        arguments: &str,
        status_tx: Option<tokio::sync::mpsc::Sender<StatusUpdate>>,
    ) -> anyhow::Result<ToolCallOutcome> {
        let _ = (arguments, status_tx);
        Ok(ToolCallOutcome {
            output: "Background job started.".to_string(),
            metadata: ToolCallMetadata {
                background_started: true,
                detached: true,
                completion_notifications_enabled: true,
                ..ToolCallMetadata::default()
            },
        })
    }
}

struct MockRemoteMutationTool;

#[async_trait]
impl Tool for MockRemoteMutationTool {
    fn name(&self) -> &str {
        "update_remote"
    }

    fn description(&self) -> &str {
        "Mock tool that updates a remote URL"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "update_remote",
            "description": "Mock remote update",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {"type": "string"}
                },
                "required": ["url"],
                "additionalProperties": false
            }
        })
    }

    async fn call(&self, arguments: &str) -> anyhow::Result<String> {
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let url = args["url"].as_str().unwrap_or("https://example.com/status");
        Ok(format!("Updated {}", url))
    }

    fn call_semantics(&self, arguments: &str) -> ToolCallSemantics {
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let url = args["url"].as_str().unwrap_or("https://example.com/status");
        ToolCallSemantics::mutation().with_target_hint(ToolTargetHintKind::Url, url)
    }
}

struct MockRemoteObservationTool;

#[async_trait]
impl Tool for MockRemoteObservationTool {
    fn name(&self) -> &str {
        "check_remote"
    }

    fn description(&self) -> &str {
        "Mock tool that checks a remote URL"
    }

    fn schema(&self) -> Value {
        json!({
            "name": "check_remote",
            "description": "Mock remote check",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {"type": "string"}
                },
                "required": ["url"],
                "additionalProperties": false
            }
        })
    }

    async fn call(&self, arguments: &str) -> anyhow::Result<String> {
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let url = args["url"].as_str().unwrap_or("https://example.com/status");
        Ok(format!("Verified {} shows the updated status.", url))
    }

    fn call_semantics(&self, arguments: &str) -> ToolCallSemantics {
        let args: Value = serde_json::from_str(arguments).unwrap_or_else(|_| json!({}));
        let url = args["url"].as_str().unwrap_or("https://example.com/status");
        ToolCallSemantics::observation()
            .with_verification_mode(ToolVerificationMode::ResultContent)
            .with_target_hint(ToolTargetHintKind::Url, url)
    }
}

#[tokio::test]
async fn force_text_characterization_strips_tools_after_duplicate_send_file() {
    let send_file_args =
        r#"{"file_path":"/tmp/aidaemon-characterization.pdf","caption":"Characterization"}"#;
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("send_file", send_file_args),
        MockProvider::tool_call_response("send_file", send_file_args),
        MockProvider::tool_call_response("send_file", send_file_args),
        MockProvider::text_response("Done. I already sent the file."),
    ]);
    let send_file_calls = Arc::new(AtomicUsize::new(0));

    let harness = setup_full_stack_test_agent_with_extra_tools(
        provider,
        vec![Arc::new(CountingSendFileTool {
            calls: send_file_calls.clone(),
        }) as Arc<dyn Tool>],
    )
    .await
    .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "force_text_characterization",
            "Send me the characterization PDF",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Done. I already sent the file.");
    assert_eq!(
        send_file_calls.load(Ordering::SeqCst),
        1,
        "duplicate send_file calls should be suppressed before force-text closeout"
    );

    let call_log = harness.provider.call_log.lock().await.clone();
    assert!(
        call_log.last().is_some_and(|call| !call.tools.is_empty()
            && call.options.tool_choice == crate::traits::ToolChoiceMode::None),
        "force-text closeout retains tool defs (prompt-prefix stability) and \
         disables calling via tool_choice=none: {:?}",
        call_log.last().map(|call| &call.options.tool_choice)
    );
}

#[tokio::test]
async fn verification_characterization_blocks_completion_until_matching_observation() {
    let url = "https://example.com/status";
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("update_remote", &json!({"url": url}).to_string()),
        MockProvider::text_response("Updated it."),
        MockProvider::tool_call_response("check_remote", &json!({"url": url}).to_string()),
        MockProvider::text_response("Updated and verified the status page."),
    ]);

    let harness = setup_full_stack_test_agent_with_extra_tools(
        provider,
        vec![
            Arc::new(MockRemoteMutationTool) as Arc<dyn Tool>,
            Arc::new(MockRemoteObservationTool) as Arc<dyn Tool>,
        ],
    )
    .await
    .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "verification_characterization",
            &format!("Update {} and verify it.", url),
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Updated and verified the status page.");
    assert_eq!(
        harness.provider.call_count().await,
        4,
        "the premature final text should be blocked so the verification tool can run"
    );

    let call_log = harness.provider.call_log.lock().await.clone();
    assert!(
        call_log.iter().any(|call| {
            call.messages.iter().any(|message| {
                message.get("role").and_then(|v| v.as_str()) == Some("system")
                    && message
                        .get("content")
                        .and_then(|v| v.as_str())
                        .is_some_and(|content| {
                            content.contains("final verification step")
                                || content.contains("verification")
                        })
            })
        }),
        "verification guard should inject a verification-required system directive"
    );
}

#[tokio::test]
async fn stall_characterization_stops_repeated_unknown_tool_before_final_text() {
    let mut responses = Vec::new();
    for attempt in 1..=7 {
        responses.push({
            let mut resp = MockProvider::tool_call_response("unknown_stall_tool", "{}");
            resp.content = Some(format!("I'll retry the same tool, attempt {}.", attempt));
            resp
        });
    }
    responses.push(MockProvider::text_response("This should not be reached."));
    let provider = MockProvider::with_responses(responses);

    let harness = setup_test_agent(provider).await.unwrap();
    let reply = harness
        .agent
        .handle_message(
            "stall_characterization",
            "Use the unavailable stall tool",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        !reply.contains("This should not be reached"),
        "stall detection should stop repeated unknown-tool attempts before the scripted final text"
    );
    assert!(
        harness.provider.call_count().await < 8,
        "stall detection should stop early; provider calls: {}",
        harness.provider.call_count().await
    );
}

#[tokio::test]
async fn truncation_characterization_reassembles_mid_sentence_text_continuation() {
    let prefix = format!(
        "{} ",
        std::iter::repeat_n("partial", 205)
            .collect::<Vec<_>>()
            .join(" ")
    );
    let continuation = "and the final sentence is complete.";
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(&prefix),
        MockProvider::text_response(continuation),
    ]);

    let harness = setup_test_agent(provider).await.unwrap();
    let reply = harness
        .agent
        .handle_message(
            "truncation_characterization",
            "Draft a long status update",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, format!("{}{}", prefix, continuation));
    assert_eq!(
        harness.provider.call_count().await,
        2,
        "truncated first response should trigger exactly one continuation pass"
    );

    let call_log = harness.provider.call_log.lock().await.clone();
    assert!(
        call_log.last().is_some_and(|call| {
            call.messages.iter().any(|message| {
                message.get("role").and_then(|v| v.as_str()) == Some("system")
                    && message
                        .get("content")
                        .and_then(|v| v.as_str())
                        .is_some_and(|content| {
                            content.contains("previous text response was cut off mid-sentence")
                                && content.contains("Continue your response")
                        })
            })
        }),
        "continuation pass should include the truncation recovery directive"
    );
}

#[tokio::test]
async fn truncation_characterization_keeps_prefix_when_short_tail_repeats_earlier_phrase() {
    let prefix = format!(
        "Which company or role are you targeting? {} The AI Expert resume is the ch",
        std::iter::repeat_n("detail", 205)
            .collect::<Vec<_>>()
            .join(" ")
    );
    let continuation = "osen one even stronger. Which company or role?";
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(&prefix),
        MockProvider::text_response(continuation),
    ]);

    let harness = setup_test_agent(provider).await.unwrap();
    let reply = harness
        .agent
        .handle_message(
            "truncation_short_overlapping_tail",
            "Which resume should I send?",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, format!("{}{}", prefix, continuation));
}

#[tokio::test]
async fn background_ack_characterization_forces_text_with_handoff_directive() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("background_task", r#"{"job":"long-running"}"#),
        MockProvider::text_response("This model text should be ignored."),
    ]);

    let harness = setup_full_stack_test_agent_with_extra_tools(
        provider,
        vec![Arc::new(BackgroundDetachTool) as Arc<dyn Tool>],
    )
    .await
    .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "background_ack_characterization",
            "Start a long running background job",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "This model text should be ignored.");

    let call_log = harness.provider.call_log.lock().await.clone();
    assert_eq!(
        call_log.len(),
        2,
        "background detach currently runs one forced text summary pass after the tool call"
    );
    assert!(
        call_log.last().is_some_and(|call| !call.tools.is_empty()
            && call.options.tool_choice == crate::traits::ToolChoiceMode::None),
        "background detach retains tool defs (prompt-prefix stability) and \
         disables calling via tool_choice=none on the forced text pass"
    );
    assert!(
        call_log.last().is_some_and(|call| {
            call.messages.iter().any(|message| {
                message.get("role").and_then(|v| v.as_str()) == Some("system")
                    && message
                        .get("content")
                        .and_then(|v| v.as_str())
                        .is_some_and(|content| {
                            content.contains("A background task is now running")
                                && content.contains("completion notifications are enabled")
                        })
            })
        }),
        "background detach should carry a handoff directive into the forced text pass"
    );
}

#[tokio::test]
async fn contradictory_file_evidence_forces_recheck_before_final_answer() {
    let project_dir = tempfile::tempdir().unwrap();
    let project_dir_str = project_dir.path().to_string_lossy().to_string();
    let search_calls: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));

    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response("search_files", &json!({"glob":"*.html"}).to_string()),
        MockProvider::tool_call_response(
            "project_inspect",
            &json!({"path": project_dir_str}).to_string(),
        ),
        MockProvider::text_response("I couldn't find any HTML files."),
        MockProvider::tool_call_response(
            "search_files",
            &json!({"glob":"*.html", "path": project_dir_str}).to_string(),
        ),
        MockProvider::text_response(
            "After re-checking with an explicit path, I still have no HTML matches.",
        ),
    ]);

    let harness = setup_full_stack_test_agent_with_extra_tools(
        provider,
        vec![
            Arc::new(RecordingSearchFilesTool {
                calls: search_calls.clone(),
            }) as Arc<dyn Tool>,
            Arc::new(MockProjectInspectTool) as Arc<dyn Tool>,
        ],
    )
    .await
    .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "contradictory_file_recheck",
            &format!("Find HTML files under {}", project_dir_str),
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(
        reply,
        "After re-checking with an explicit path, I still have no HTML matches."
    );
    assert_eq!(harness.provider.call_count().await, 5);

    let calls = search_calls.lock().await.clone();
    assert_eq!(calls.len(), 2, "expected initial search + forced re-check");
    assert!(
        calls[0].contains("\"path\"") && calls[0].contains(&project_dir_str),
        "expected first search_files call to receive injected project path, got: {}",
        calls[0]
    );

    let call_log = harness.provider.call_log.lock().await.clone();
    let contradiction_nudge_seen = call_log.iter().any(|entry| {
        entry.messages.iter().any(|m| {
            m.get("role").and_then(|v| v.as_str()) == Some("system")
                && m.get("content")
                    .and_then(|v| v.as_str())
                    .is_some_and(|c| c.contains("Contradictory file evidence was detected"))
        })
    });
    assert!(
        contradiction_nudge_seen,
        "expected contradiction re-check system nudge in provider context"
    );
}

#[tokio::test]
async fn budget_blocked_same_tool_calls_do_not_trigger_false_consecutive_loop_stop() {
    let burst_calls: Vec<ToolCall> = (0..20)
        .map(|idx| ToolCall {
            id: format!("call_{}", idx),
            name: "project_inspect".to_string(),
            arguments: json!({"path": format!("/tmp/project_{}", idx)}).to_string(),
            extra_content: None,
        })
        .collect();

    let provider = MockProvider::with_responses(vec![
        ProviderResponse {
            content: None,
            tool_calls: burst_calls,
            usage: Some(TokenUsage {
                input_tokens: 10,
                output_tokens: 10,
                cached_input_tokens: None,
                cache_creation_input_tokens: None,
                model: "mock".to_string(),
            }),
            thinking: None,
            response_note: None,
        },
        MockProvider::text_response("Summarized project status."),
    ]);

    let harness = setup_full_stack_test_agent_with_extra_tools(
        provider,
        vec![Arc::new(MockProjectInspectTool) as Arc<dyn Tool>],
    )
    .await
    .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "budget_vs_loop_ordering",
            "Inspect all these project folders and summarize",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Summarized project status.");
}

#[tokio::test]
#[ignore = "project directory scope constraints not yet fully wired"]
async fn mixed_project_inspect_path_and_paths_preserves_primary_path_for_follow_up_tools() {
    let primary_dir = tempfile::tempdir().unwrap();
    let secondary_dir = tempfile::tempdir().unwrap();
    let primary_dir_str = primary_dir.path().to_string_lossy().to_string();
    let secondary_dir_str = secondary_dir.path().to_string_lossy().to_string();

    let search_calls: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
    let inspect_calls: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));

    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response(
            "project_inspect",
            &json!({
                "path": primary_dir_str,
                "paths": [primary_dir_str, secondary_dir_str]
            })
            .to_string(),
        ),
        MockProvider::tool_call_response("search_files", &json!({"glob":"*.html"}).to_string()),
        MockProvider::tool_call_response(
            "search_files",
            &json!({"glob":"*.html", "path": primary_dir.path().to_string_lossy()}).to_string(),
        ),
        MockProvider::text_response("Inspection complete."),
    ]);

    let harness = setup_full_stack_test_agent_with_extra_tools(
        provider,
        vec![
            Arc::new(RecordingSearchFilesTool {
                calls: search_calls.clone(),
            }) as Arc<dyn Tool>,
            Arc::new(RecordingProjectInspectTool {
                calls: inspect_calls.clone(),
            }) as Arc<dyn Tool>,
        ],
    )
    .await
    .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "mixed_project_inspect_path_paths",
            "Inspect both project folders and find HTML files",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Inspection complete.");

    let inspect_args = inspect_calls.lock().await.clone();
    assert_eq!(inspect_args.len(), 1, "expected one project_inspect call");
    assert!(
        inspect_args[0].contains("\"path\"") && inspect_args[0].contains("\"paths\""),
        "expected mixed path+paths args in project_inspect call, got: {}",
        inspect_args[0]
    );

    let search_args = search_calls.lock().await.clone();
    assert_eq!(
        search_args.len(),
        2,
        "expected one follow-up search_files call plus required explicit re-check"
    );
    assert!(
        search_args[0].contains(&format!("\"path\":\"{}\"", primary_dir.path().display())),
        "expected first search_files call to inherit primary path from project_inspect(path), got: {}",
        search_args[0]
    );
}

#[tokio::test]
async fn replay_trace_yes_do_it_with_sanitized_response_analysis_falls_through_to_tools() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "arguments:\nname: terminal\ncommand: ls\n\
             [INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":true,\"needs_tools\":true,\"is_acknowledgment\":true}",
        ),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Applied the requested changes."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "replay_yes_do_it",
            "Yes, do it.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Applied the requested changes.");
    assert!(
        harness.provider.call_count().await >= 3,
        "expected initial routing call + tool-call + final response path"
    );
}

#[tokio::test]
async fn replay_trace_deferred_planning_text_does_not_stall_before_first_tool_call() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response("I'll search for all Rust files with async fn first."),
        MockProvider::text_response("Next I'll inspect each file and count async functions."),
        MockProvider::text_response("I'm going to run the search now."),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("Found the files and compiled the async summary."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "replay_pre_tool_deferral",
            "Find all Rust files that contain async fn and give me the top 3 files.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    // The agent may either:
    // 1. Run all 5 responses and return the final text (old behavior)
    // 2. Stop earlier due to deferred-no-tool detection returning an intermediate text
    // Both are acceptable — the key is no crash and a non-empty response.
    assert!(
        !reply.is_empty(),
        "Agent should return a non-empty response"
    );
    // At minimum some deferral retries should fire before recovery.
    assert!(
        harness.provider.call_count().await >= 3,
        "expected at least a few retries before deferred/no-tool recovery"
    );

    let call_log = harness.provider.call_log.lock().await.clone();
    assert!(
        !call_log
            .iter()
            .any(|entry| matches!(entry.options.response_mode, ResponseMode::JsonSchema { .. })),
        "text-only schema pass should be disabled"
    );
    // NOTE: ToolChoiceMode::Required is only set when the user text itself is
    // deterministically classified as needing tools (infer_intent_gate returns
    // needs_tools=true). For generic queries like this one the INTENT_GATE JSON
    // in the LLM response does not retroactively flip that flag. Deferred-action
    // retries still fire (they rely on deferred_no_tool_streak), but they do not
    // use Required mode unless tools_required_for_turn was already true.
}

#[tokio::test]
async fn deferred_no_tool_forced_required_resets_after_first_successful_tool_call() {
    let provider = MockProvider::with_responses(vec![
        MockProvider::text_response(
            "Need to inspect first.\n\
             [INTENT_GATE]\n\
             {\"complexity\":\"simple\",\"can_answer_now\":false,\"needs_tools\":true}",
        ),
        MockProvider::text_response("I'll inspect the machine first."),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response("I'll format the final summary next."),
        MockProvider::text_response("Final summary: system inspection completed."),
    ]);

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "deferred_no_tool_reset_after_success",
            "Inspect my system and summarize it.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Final summary: system inspection completed.");

    // ToolChoiceMode::Required is only set when the user text itself triggers
    // infer_intent_gate to return needs_tools=true (e.g. explicit filesystem
    // paths, local execution requests). For a generic user text like
    // "Inspect my system and summarize it." the deterministic intent gate does
    // not flag needs_tools, so Required mode is never activated regardless of
    // the INTENT_GATE JSON embedded in the LLM response. Deferred-action
    // retries still fire via deferred_no_tool_streak, driving the agent toward
    // the tool call, but they use the default ToolChoiceMode rather than Required.
    //
    // Verify the agent still reached the tool call and produced the final reply.
    let call_log = harness.provider.call_log.lock().await.clone();
    assert!(
        !call_log.is_empty(),
        "expected provider calls to be recorded"
    );
    assert!(
        call_log
            .iter()
            .all(|entry| !matches!(entry.options.tool_choice, ToolChoiceMode::Required)),
        "expected no Required tool-choice for a non-tool-classified user text"
    );
}

#[tokio::test]
async fn failed_specialist_plan_reply_pivots_to_direct_tools() {
    let incomplete_plan = "I've started breaking down your goal into specific tasks. I've created \
a plan to first research the 2026 AI job market and then synthesize that into your personalized \
morning briefing.\n\nI attempted to launch a research specialist, but the request timed out. I'm \
monitoring the system and will retry the research task as soon as the connection is stable.\n\n\
Current Plan:\n1. Research Phase: Deep dive into trends, roles, and skills.\n\
2. Synthesis Phase: Organize findings into a morning briefing.";
    let provider = MockProvider::with_responses(vec![
        MockProvider::tool_call_response(
            "spawn_agent",
            r#"{"mission":"Research AI jobs","task":"Produce current findings"}"#,
        ),
        MockProvider::text_response(incomplete_plan),
        MockProvider::text_response(incomplete_plan),
        MockProvider::tool_call_response("system_info", "{}"),
        MockProvider::text_response(
            "Market Snapshot: applied AI engineering remains the strongest target. \
Target Roles: GenAI engineer and AI product manager. Interview Edge: prepare concrete \
examples of evaluation, deployment, and agent reliability work.",
        ),
    ]);
    let spawn_tool: Arc<dyn Tool> = Arc::new(MockTool::new(
        "spawn_agent",
        "Mock failed specialist delegation",
        "Error: specialist timed out after 300 seconds",
    ));
    let harness =
        setup_test_agent_root_with_extra_tools_and_llm_timeout(provider, vec![spawn_tool], None)
            .await
            .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "failed_specialist_plan_pivot",
            "Research the 2026 AI job market and produce my morning briefing.",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert!(
        reply.contains("Market Snapshot:"),
        "unexpected reply: {reply}"
    );
    assert!(!reply.contains("monitoring the system"));
    assert!(
        harness.provider.call_count().await >= 5,
        "repeated incomplete plans should trigger another tool-backed iteration"
    );
    let calls = harness.provider.call_log.lock().await;
    assert!(
        calls.iter().any(|call| {
            call.messages.iter().any(|message| {
                message
                    .get("content")
                    .and_then(Value::as_str)
                    .is_some_and(|content| {
                        content.contains("Specialist delegation failed")
                            && content.contains("available direct tools")
                    })
            })
        }),
        "failed delegation should inject direct-tool recovery guidance"
    );
}

#[tokio::test]
async fn provider_option_rejection_falls_back_to_default_chat() {
    let provider = MockProvider::with_responses(vec![MockProvider::text_response("Got it.")])
        .rejecting_non_default_options();

    let harness = setup_test_agent_with_models(provider, "primary-model", "smart-model")
        .await
        .unwrap();

    let reply = harness
        .agent
        .handle_message(
            "provider_option_rejection_fallback",
            "Yes",
            None,
            UserRole::Owner,
            ChannelContext::private("test"),
            None,
        )
        .await
        .unwrap();

    assert_eq!(reply, "Got it.");

    let call_log = harness.provider.call_log.lock().await.clone();
    assert!(!call_log.is_empty(), "expected at least one provider call");
    assert!(
        call_log
            .iter()
            .all(|entry| entry.options == ChatOptions::default()),
        "expected default chat options when the text-only pass is disabled"
    );
}