aidaemon 0.11.1

//! Tests for `execution_state` (split out from the original module via `#[path]`).
//!
//! Moved verbatim — no logic changes. Included as a child test module of
//! `execution_state` so `use super::*;` continues to resolve against it.

use super::*;
use crate::agent::{CompletionContract, CompletionTaskKind, TurnContext};
use crate::traits::{ToolCallSemantics, ToolTargetHintKind};
use serde_json::json;

#[test]
fn scoped_edit_requests_start_with_small_budget() {
    let turn_context = TurnContext {
        primary_project_scope: Some("/tmp/demo".to_string()),
        ..TurnContext::default()
    };
    let (tier, route_kind, budget) = select_initial_execution_budget(
        "edit /tmp/demo/src/main.rs",
        &turn_context,
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Small);
    assert_eq!(route_kind, "scoped_modification");
    assert_eq!(budget.max_validation_rounds, 3);
}

#[test]
fn research_plus_create_gets_standard_not_small() {
    // "Search the web ... then create a file at path.md" should get Standard
    // budget, not Small, even though it has a scoped target (.md).
    let turn_context = TurnContext::default();
    let (tier, route_kind, _budget) = select_initial_execution_budget(
        "Search the web for the top 3 Rust crates then create a markdown file at ~/projects/blog/drafts/rust-crates-2025.md",
        &turn_context,
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(
        tier,
        BudgetTier::Standard,
        "research+create should get Standard budget"
    );
    assert_eq!(route_kind, "scoped_modification_with_verification");
}

#[test]
fn delegated_work_starts_with_extended_budget() {
    let (tier, route_kind, budget) = select_initial_execution_budget(
        "fix the deployment",
        &TurnContext::default(),
        1,
        AgentRole::Executor,
    );
    assert_eq!(tier, BudgetTier::Extended);
    assert_eq!(route_kind, "delegated_multi_step");
    assert!(budget.max_tool_calls >= 16);
}

#[test]
fn compile_step_plan_uses_scope_and_idempotency_for_mutations() {
    let semantics =
        ToolCallSemantics::mutation().with_target_hint(ToolTargetHintKind::Path, "src/main.rs");
    let plan = compile_step_execution_plan(
        "exec-1",
        3,
        2,
        "call-1",
        "edit_file",
        r#"{"path":"src/main.rs"}"#,
        &semantics,
        ToolCapabilities {
            read_only: false,
            external_side_effect: false,
            needs_approval: true,
            idempotent: false,
            high_impact_write: false,
        },
        Some("/repo"),
    );

    assert_eq!(plan.primary_tool.as_deref(), Some("edit_file"));
    assert_eq!(plan.plan_version, 3);
    assert_eq!(plan.target_scope.allowed_targets.len(), 1);
    assert_eq!(
        plan.target_scope.allowed_targets[0].kind,
        ToolTargetHintKind::ProjectScope
    );
    assert!(plan.target_scope.hard_fail_outside_scope);
    assert!(plan.idempotency_key.is_some());
    assert!(matches!(
        plan.approval_requirement,
        ApprovalRequirement::Required { .. }
    ));
}

#[test]
fn compile_step_plan_preserves_url_targets_when_project_scope_exists() {
    let semantics = ToolCallSemantics::observation().with_target_hint(
        ToolTargetHintKind::Url,
        "https://clinicaltrials.gov/api/v2/studies",
    );
    let plan = compile_step_execution_plan(
        "exec-1",
        3,
        2,
        "call-1",
        "http_request",
        r#"{"url":"https://clinicaltrials.gov/api/v2/studies"}"#,
        &semantics,
        ToolCapabilities {
            read_only: true,
            external_side_effect: true,
            needs_approval: false,
            idempotent: true,
            high_impact_write: false,
        },
        Some("/repo"),
    );

    assert_eq!(plan.target_scope.allowed_targets.len(), 1);
    assert_eq!(
        plan.target_scope.allowed_targets[0].kind,
        ToolTargetHintKind::Url
    );
    assert_eq!(
        plan.target_scope.allowed_targets[0].value,
        "https://clinicaltrials.gov/api/v2/studies"
    );
}

#[test]
fn execution_state_reports_budget_exhaustion() {
    let mut state = ExecutionState::new(
        BudgetTier::Small,
        ExecutionBudget {
            max_steps: 1,
            max_tokens: 100,
            max_llm_calls: 1,
            max_tool_calls: 1,
            max_validation_rounds: 1,
            max_wall_clock_ms: 1_000,
        },
        ExecutionPersistence::Ephemeral,
    );
    state.activate_budget_envelope(0, Duration::from_millis(0));
    state.record_llm_call();
    assert_eq!(
        state.exhausted_limit(0, Duration::from_millis(1)),
        Some(ExecutionBudgetLimit::LlmCalls)
    );
}

#[test]
fn inactive_execution_budget_ignores_plain_text_token_usage() {
    let state = ExecutionState::new(
        BudgetTier::None,
        ExecutionBudget {
            max_steps: 24,
            max_tokens: 10,
            max_llm_calls: 1,
            max_tool_calls: 1,
            max_validation_rounds: 1,
            max_wall_clock_ms: 1,
        },
        ExecutionPersistence::Ephemeral,
    );

    assert_eq!(state.exhausted_limit(10_000, Duration::from_secs(30)), None);
}

#[test]
fn knowledge_turns_use_standard_budget() {
    let turn_context = TurnContext {
        completion_contract: CompletionContract::default(),
        ..TurnContext::default()
    };
    let (tier, route_kind, _) = select_initial_execution_budget(
        "what's the capital of france",
        &turn_context,
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "knowledge");
}

#[test]
fn scheduled_turns_use_standard_budget() {
    let (tier, route_kind, budget) = select_initial_execution_budget(
        "schedule a daily health check",
        &TurnContext::default(),
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "scheduled_action");
    assert!(budget.max_validation_rounds >= 3);
}

#[test]
fn read_only_investigation_uses_standard_budget() {
    let (tier, route_kind, _) = select_initial_execution_budget(
        "inspect the latest logs and show me the current status",
        &TurnContext::default(),
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "read_only_investigation");
}

#[test]
fn api_read_requests_use_standard_budget_for_multi_step_lookups() {
    let (tier, route_kind, budget) = select_initial_execution_budget(
        "Using the clinical trials API, give me studies near Fairfax for skin cancer.",
        &TurnContext::default(),
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "api_lookup");
    assert!(budget.max_llm_calls >= 18);
    assert_eq!(budget.max_tokens, 0);
}

#[test]
fn connected_content_authoring_requests_stay_in_knowledge_lane() {
    let mut turn_context = TurnContext::default();
    turn_context.completion_contract.connected_content_mode =
        crate::agent::intent_routing::ConnectedContentMode::DraftThenDeliver;
    turn_context.completion_contract.task_kind = CompletionTaskKind::Deliver;
    turn_context.completion_contract.expects_mutation = true;
    let (tier, route_kind, _) = select_initial_execution_budget(
        "Can you post a tweet about your new stuff and make it engaging so people want to comment?",
        &turn_context,
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "deployment_or_external_write");
}

#[test]
fn account_scoped_connected_content_delivery_uses_external_write_budget() {
    let (tier, route_kind, budget) = select_initial_execution_budget(
        "Can you post a tweet on your account?",
        &TurnContext::default(),
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "deployment_or_external_write");
    assert!(budget.max_llm_calls >= 18);
}

#[test]
fn auth_management_requests_use_standard_budget() {
    let (tier, route_kind, _) = select_initial_execution_budget(
        "Reconnect my Twitter OAuth account so you can post for me.",
        &TurnContext::default(),
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "deployment_or_external_write");
}

#[test]
fn contextual_followups_start_with_standard_budget() {
    let turn_context = TurnContext {
        followup_mode: Some(FollowupMode::Followup),
        recent_messages: vec![json!({
            "role": "assistant",
            "content": "Here are 20 matching studies with short summaries."
        })],
        ..TurnContext::default()
    };
    let (tier, _route_kind, budget) = select_initial_execution_budget(
        "Which one is most relevant to skin cancer?",
        &turn_context,
        0,
        AgentRole::Orchestrator,
    );
    // Tier is Standard regardless of followup context since the
    // base tier is now Standard (no longer None/Small that needed promotion).
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(budget.max_tokens, 0);
}

#[test]
fn clarification_followups_promote_scoped_edits_to_standard_budget() {
    let turn_context = TurnContext {
        primary_project_scope: Some("/tmp/demo".to_string()),
        followup_mode: Some(FollowupMode::ClarificationAnswer),
        recent_messages: vec![json!({
            "role": "assistant",
            "content": "Which file should I update?"
        })],
        ..TurnContext::default()
    };
    let (tier, route_kind, budget) = select_initial_execution_budget(
        "Update the config in src/main.rs",
        &turn_context,
        0,
        AgentRole::Orchestrator,
    );
    assert_eq!(tier, BudgetTier::Standard);
    assert_eq!(route_kind, "contextual_followup");
    assert!(budget.max_validation_rounds >= 3);
}

#[test]
fn extend_budget_on_progress_increases_limits() {
    let mut state = ExecutionState::new(
        BudgetTier::None,
        default_execution_budget(BudgetTier::None),
        ExecutionPersistence::Ephemeral,
    );
    let original_llm = state.budget.max_llm_calls;
    let original_tools = state.budget.max_tool_calls;
    let original_steps = state.budget.max_steps;
    let original_wall = state.budget.max_wall_clock_ms;
    let original_validation = state.budget.max_validation_rounds;

    // No extension when budget envelope is inactive
    state.extend_budget_on_progress();
    assert_eq!(state.budget.max_llm_calls, original_llm);
    assert_eq!(state.budget.max_wall_clock_ms, original_wall);
    assert_eq!(state.budget.max_validation_rounds, original_validation);

    // Extension kicks in once the envelope is active
    state.activate_budget_envelope(0, Duration::from_millis(0));
    state.extend_budget_on_progress();
    assert!(state.budget.max_llm_calls > original_llm);
    assert!(state.budget.max_tool_calls > original_tools);
    assert!(state.budget.max_steps > original_steps);
    assert!(state.budget.max_wall_clock_ms > original_wall);
    assert!(state.budget.max_validation_rounds > original_validation);

    // Cumulative extensions keep growing
    let after_first = state.budget.max_llm_calls;
    let after_first_wall = state.budget.max_wall_clock_ms;
    let after_first_validation = state.budget.max_validation_rounds;
    state.extend_budget_on_progress();
    assert!(state.budget.max_llm_calls > after_first);
    assert!(state.budget.max_wall_clock_ms > after_first_wall);
    assert!(state.budget.max_validation_rounds > after_first_validation);
}

#[test]
fn productive_run_never_exhausts_budget() {
    let mut state = ExecutionState::new(
        BudgetTier::None,
        default_execution_budget(BudgetTier::None),
        ExecutionPersistence::Ephemeral,
    );
    state.activate_budget_envelope(0, Duration::from_millis(0));

    // Simulate 30 productive iterations: each records an LLM call + tool
    // call + occasional validation round, but also extends via progress.
    // Use realistic elapsed time (~10s per iteration → 300s total) to
    // verify wall-clock extension keeps pace with real-world execution.
    for i in 0..30 {
        state.record_llm_call();
        state.record_tool_call();
        // Simulate a validation round every ~10 tool calls (realistic
        // for complex multi-step tasks).
        if i % 10 == 9 {
            state.record_validation_round();
        }
        state.extend_budget_on_progress();
    }

    // 30 iterations × ~10s each = 300s of wall time.  The base budget
    // for None tier is 180s, but 30 progress extensions add 30 × 30s =
    // 900s, giving a total wall-clock budget of 1080s — well above 300s.
    // Validation rounds: base 3, used 3, but 30 extensions of +1 each
    // give 33 total — well above the 3 used.
    let realistic_elapsed = Duration::from_secs(300);
    assert_eq!(
        state.exhausted_limit(0, realistic_elapsed),
        None,
        "Productive run should never exhaust budget, even with realistic wall-clock time"
    );
}

fn test_execution_state() -> ExecutionState {
    ExecutionState::new(
        BudgetTier::None,
        default_execution_budget(BudgetTier::None),
        ExecutionPersistence::Ephemeral,
    )
}

#[test]
fn outcome_ledger_starts_empty() {
    let state = test_execution_state();
    assert!(state.outcome_ledger.is_empty());
}

#[test]
fn outcome_ledger_records_success() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: true,
        http_status: Some(201),
        is_external_mutation: true,
        error_summary: None,
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert_eq!(state.outcome_ledger.len(), 1);
    assert!(state.outcome_ledger[0].success);
}

#[test]
fn outcome_ledger_tracks_failed_external_mutations() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(403),
        is_external_mutation: true,
        error_summary: Some("duplicate content".to_string()),
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert!(state.has_uncorrected_failed_external_mutations());
}

#[test]
fn outcome_ledger_ignores_non_external_failures() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "read_file".to_string(),
        success: false,
        http_status: None,
        is_external_mutation: false,
        error_summary: Some("file not found".to_string()),
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert!(!state.has_uncorrected_failed_external_mutations());
}

#[test]
fn attempt_reconciliation_none_when_all_succeeded() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: true,
        http_status: Some(201),
        is_external_mutation: true,
        error_summary: None,
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert!(state.build_attempt_reconciliation_summary().is_none());
}

#[test]
fn attempt_reconciliation_present_when_failures_exist() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: true,
        http_status: Some(201),
        is_external_mutation: true,
        error_summary: None,
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(403),
        is_external_mutation: true,
        error_summary: Some("duplicate content".to_string()),
        iteration: 2,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    let summary = state.build_attempt_reconciliation_summary().unwrap();
    assert!(summary.contains("attempts"));
    assert!(summary.contains("1") && summary.contains("2"));
    assert!(summary.contains("failed"));
    assert!(summary.contains("403"));
    assert!(summary.contains("duplicate content"));
}

#[test]
fn attempt_reconciliation_says_attempts_not_actions() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(403),
        is_external_mutation: true,
        error_summary: Some("dup".to_string()),
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    let summary = state.build_attempt_reconciliation_summary().unwrap();
    assert!(summary.contains("attempt"));
    assert!(!summary.contains("action"));
}

#[test]
fn corrected_failure_same_tool_skips_reconciliation() {
    // Failure at iter 3, then success of SAME tool at iter 7 → corrected
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "run_command".to_string(),
        success: false,
        http_status: None,
        is_external_mutation: true,
        error_summary: Some("could not find Cargo.toml".to_string()),
        iteration: 3,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "run_command".to_string(),
        success: true,
        http_status: None,
        is_external_mutation: true,
        error_summary: None,
        iteration: 7,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert!(state.uncorrected_failed_mutations().is_empty());
    assert!(!state.has_uncorrected_failed_external_mutations());
    assert!(state.build_attempt_reconciliation_summary().is_none());
}

#[test]
fn corrected_failure_different_tool_skips_reconciliation() {
    // Failure via run_command at iter 9, then success via terminal at iter 15
    // → corrected (all failures before last success)
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "run_command".to_string(),
        success: false,
        http_status: None,
        is_external_mutation: true,
        error_summary: Some("could not find Cargo.toml".to_string()),
        iteration: 9,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "terminal".to_string(),
        success: true,
        http_status: None,
        is_external_mutation: true,
        error_summary: None,
        iteration: 15,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert!(state.uncorrected_failed_mutations().is_empty());
    assert!(!state.has_uncorrected_failed_external_mutations());
    assert!(state.build_attempt_reconciliation_summary().is_none());
}

#[test]
fn uncorrected_failure_after_last_success_triggers_reconciliation() {
    // Success at iter 5, then failure at iter 10 → uncorrected
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "terminal".to_string(),
        success: true,
        http_status: None,
        is_external_mutation: true,
        error_summary: None,
        iteration: 5,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(500),
        is_external_mutation: true,
        error_summary: Some("server error".to_string()),
        iteration: 10,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    assert_eq!(state.uncorrected_failed_mutations().len(), 1);
    assert!(state.has_uncorrected_failed_external_mutations());
    assert!(state.build_attempt_reconciliation_summary().is_some());
}

#[test]
fn mixed_corrected_and_uncorrected_failures() {
    // run_command FAIL at iter 3 (corrected by terminal SUCCESS at iter 15)
    // http_request FAIL at iter 20 (after last success → uncorrected)
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "run_command".to_string(),
        success: false,
        http_status: None,
        is_external_mutation: true,
        error_summary: Some("not found".to_string()),
        iteration: 3,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "terminal".to_string(),
        success: true,
        http_status: None,
        is_external_mutation: true,
        error_summary: None,
        iteration: 15,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(500),
        is_external_mutation: true,
        error_summary: Some("deploy failed".to_string()),
        iteration: 20,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    let uncorrected = state.uncorrected_failed_mutations();
    assert_eq!(uncorrected.len(), 1);
    assert_eq!(uncorrected[0].tool_name, "http_request");
    assert_eq!(uncorrected[0].iteration, 20);
    let summary = state.build_attempt_reconciliation_summary().unwrap();
    assert!(summary.contains("deploy failed"));
    assert!(!summary.contains("not found")); // corrected failure excluded
}

#[test]
fn install_linear_intent_plan_sets_current_step_identity() {
    let mut state = test_execution_state();
    state.install_linear_intent_plan(
        3,
        vec![
            LinearIntentStep {
                step_id: "plan-v3-step-1".to_string(),
                step_index: 1,
                tool: "http_request".to_string(),
                target: "tweet-1".to_string(),
                description: "Post tweet 1".to_string(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
            LinearIntentStep {
                step_id: "plan-v3-step-2".to_string(),
                step_index: 2,
                tool: "http_request".to_string(),
                target: "tweet-2".to_string(),
                description: "Post tweet 2".to_string(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
        ],
    );
    let current = state.current_linear_intent_step().unwrap();
    assert_eq!(current.step_id, "plan-v3-step-1");
    assert_eq!(current.step_index, 1);
}

#[test]
fn advance_linear_intent_step_on_success_moves_forward() {
    let mut state = test_execution_state();
    state.install_linear_intent_plan(
        1,
        vec![
            LinearIntentStep {
                step_id: "plan-v1-step-1".to_string(),
                step_index: 1,
                tool: "http_request".to_string(),
                target: "tweet-1".to_string(),
                description: "Post tweet 1".to_string(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
            LinearIntentStep {
                step_id: "plan-v1-step-2".to_string(),
                step_index: 2,
                tool: "http_request".to_string(),
                target: "tweet-2".to_string(),
                description: "Post tweet 2".to_string(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
        ],
    );
    // First advance: step 1 → step 2
    state.advance_linear_intent_step_after_external_success();
    let current = state.current_linear_intent_step().unwrap();
    assert_eq!(current.step_index, 2);

    // Second advance: step 2 → past end (cursor retires)
    state.advance_linear_intent_step_after_external_success();
    assert!(
        state.current_linear_intent_step().is_none(),
        "cursor should retire past the last step"
    );

    // Further advances are no-ops
    state.advance_linear_intent_step_after_external_success();
    assert!(state.current_linear_intent_step().is_none());
}

#[test]
fn planned_step_reconciliation_groups_retry_under_one_step() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(403),
        is_external_mutation: true,
        error_summary: Some("duplicate content".to_string()),
        iteration: 1,
        plan_version: Some(1),
        planned_step_id: Some("plan-v1-step-2".to_string()),
        planned_step_index: Some(2),
        planned_step_description: Some("Post tweet 2".to_string()),
        expected_step_count: Some(5),
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: true,
        http_status: Some(201),
        is_external_mutation: true,
        error_summary: None,
        iteration: 2,
        plan_version: Some(1),
        planned_step_id: Some("plan-v1-step-2".to_string()),
        planned_step_index: Some(2),
        planned_step_description: Some("Post tweet 2".to_string()),
        expected_step_count: Some(5),
    });
    let summary = state.build_reconciliation_overview().unwrap().summary;
    assert!(summary.contains("step"));
    assert!(summary.contains("5"));
    assert!(summary.contains("Post tweet 2"));
    assert!(summary.contains("succeeded after 2 attempts"));
}

#[test]
fn planned_step_reconciliation_uses_latest_plan_version_only() {
    let mut state = test_execution_state();
    state.install_linear_intent_plan(
        2,
        vec![
            LinearIntentStep {
                step_id: "plan-v2-step-1".to_string(),
                step_index: 1,
                tool: "http_request".to_string(),
                target: "tweet-1".to_string(),
                description: "Post tweet 1".to_string(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
            LinearIntentStep {
                step_id: "plan-v2-step-2".to_string(),
                step_index: 2,
                tool: "http_request".to_string(),
                target: "tweet-2".to_string(),
                description: "Post tweet 2".to_string(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
        ],
    );
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: true,
        http_status: Some(201),
        is_external_mutation: true,
        error_summary: None,
        iteration: 1,
        plan_version: Some(1),
        planned_step_id: Some("plan-v1-step-1".to_string()),
        planned_step_index: Some(1),
        planned_step_description: Some("Old tweet 1".to_string()),
        expected_step_count: Some(3),
    });
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: true,
        http_status: Some(201),
        is_external_mutation: true,
        error_summary: None,
        iteration: 2,
        plan_version: Some(2),
        planned_step_id: Some("plan-v2-step-1".to_string()),
        planned_step_index: Some(1),
        planned_step_description: Some("Post tweet 1".to_string()),
        expected_step_count: Some(2),
    });

    let overview = state.build_reconciliation_overview().unwrap();
    assert_eq!(overview.mode, ReconciliationMode::PlannedStepLevel);
    assert_eq!(overview.total, 2);
    assert_eq!(overview.succeeded, 1);
    assert_eq!(overview.failed, 1);
    assert_eq!(overview.failed_step_indices, vec![2]);
    assert!(!overview.summary.contains("Old tweet 1"));
    assert!(overview
        .summary
        .contains("Step 2 (Post tweet 2) was not completed."));
}

#[test]
fn reconciliation_falls_back_to_attempt_level_without_step_identity() {
    let mut state = test_execution_state();
    state.record_outcome(OutcomeEntry {
        tool_name: "http_request".to_string(),
        success: false,
        http_status: Some(403),
        is_external_mutation: true,
        error_summary: Some("duplicate content".to_string()),
        iteration: 1,
        plan_version: None,
        planned_step_id: None,
        planned_step_index: None,
        planned_step_description: None,
        expected_step_count: None,
    });
    let summary = state.build_reconciliation_overview().unwrap().summary;
    assert!(summary.contains("attempt"));
}

#[test]
fn promote_budget_for_plan_none_to_standard() {
    let mut state = ExecutionState::new(
        BudgetTier::None,
        default_execution_budget(BudgetTier::None),
        ExecutionPersistence::Ephemeral,
    );
    let original_llm_calls = state.budget.max_llm_calls;
    let original_wall_clock = state.budget.max_wall_clock_ms;
    state.promote_budget_for_plan(4);
    let standard = default_execution_budget(BudgetTier::Standard);
    // None tier has lower llm_calls and wall_clock than Standard
    assert!(state.budget.max_llm_calls >= standard.max_llm_calls);
    assert!(state.budget.max_llm_calls > original_llm_calls);
    assert!(state.budget.max_wall_clock_ms > original_wall_clock);
}

#[test]
fn promote_budget_for_plan_small_to_standard() {
    let mut state = ExecutionState::new(
        BudgetTier::Small,
        default_execution_budget(BudgetTier::Small),
        ExecutionPersistence::Ephemeral,
    );
    state.promote_budget_for_plan(3);
    let standard = default_execution_budget(BudgetTier::Standard);
    assert!(state.budget.max_llm_calls >= standard.max_llm_calls);
}

#[test]
fn no_promote_for_small_plan() {
    let mut state = ExecutionState::new(
        BudgetTier::None,
        default_execution_budget(BudgetTier::None),
        ExecutionPersistence::Ephemeral,
    );
    let original = state.budget.max_tool_calls;
    state.promote_budget_for_plan(2);
    assert_eq!(state.budget.max_tool_calls, original);
}

#[test]
fn no_promote_for_standard_plus() {
    let mut state = ExecutionState::new(
        BudgetTier::Standard,
        default_execution_budget(BudgetTier::Standard),
        ExecutionPersistence::Ephemeral,
    );
    let original = state.budget.max_tool_calls;
    state.promote_budget_for_plan(5);
    assert_eq!(state.budget.max_tool_calls, original);
}

#[test]
fn plan_step_replan_debounce() {
    let mut plan = LinearIntentPlan {
        plan_version: 1,
        steps: vec![LinearIntentStep {
            step_id: "s1".into(),
            step_index: 1,
            tool: String::new(),
            target: String::new(),
            description: "Explore".into(),
            tool_calls_on_step: 0,
            completed: false,
            completion_evidence: None,
            last_evaluated_at: None,
        }],
        current_step_cursor: 0,
    };

    assert!(!plan.current_step_needs_replan());
    plan.record_tool_calls_on_current(1);
    assert!(!plan.current_step_needs_replan());
    plan.record_tool_calls_on_current(1);
    assert!(plan.current_step_needs_replan());

    plan.mark_current_step_evaluated();
    assert!(!plan.current_step_needs_replan());

    plan.record_tool_calls_on_current(1);
    assert!(!plan.current_step_needs_replan());
    plan.record_tool_calls_on_current(1);
    assert!(plan.current_step_needs_replan());
}

#[test]
fn plan_complete_step_advances_cursor() {
    let mut plan = LinearIntentPlan {
        plan_version: 1,
        steps: vec![
            LinearIntentStep {
                step_id: "s1".into(),
                step_index: 1,
                tool: String::new(),
                target: String::new(),
                description: "Explore".into(),
                tool_calls_on_step: 3,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
            LinearIntentStep {
                step_id: "s2".into(),
                step_index: 2,
                tool: String::new(),
                target: String::new(),
                description: "Create".into(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
        ],
        current_step_cursor: 0,
    };

    plan.complete_current_step_with_evidence("Found 12 posts".into());
    assert_eq!(plan.current_step_cursor, 1);
    assert!(plan.steps[0].completed);
    assert_eq!(
        plan.steps[0].completion_evidence.as_deref(),
        Some("Found 12 posts")
    );
    assert!(!plan.all_steps_complete());

    plan.complete_current_step_with_evidence("Done".into());
    assert!(plan.all_steps_complete());
}

#[test]
fn plan_format_with_progress_shows_markers() {
    let plan = LinearIntentPlan {
        plan_version: 1,
        steps: vec![
            LinearIntentStep {
                step_id: "s1".into(),
                step_index: 1,
                tool: String::new(),
                target: String::new(),
                description: "Explore posts".into(),
                tool_calls_on_step: 3,
                completed: true,
                completion_evidence: Some("Found 12 posts".into()),
                last_evaluated_at: Some(2),
            },
            LinearIntentStep {
                step_id: "s2".into(),
                step_index: 2,
                tool: String::new(),
                target: String::new(),
                description: "Create post 1".into(),
                tool_calls_on_step: 0,
                completed: false,
                completion_evidence: None,
                last_evaluated_at: None,
            },
        ],
        current_step_cursor: 1,
    };

    let formatted = plan.format_with_progress();
    assert!(formatted.contains("[DONE] Explore posts"));
    assert!(formatted.contains("Found 12 posts"));
    assert!(formatted.contains("[CURRENT] Create post 1"));
}