aidaemon 0.9.35

use super::bootstrap_phase::{BootstrapCtx, BootstrapData, BootstrapOutcome};
use super::llm_phase::{LlmPhaseCtx, LlmPhaseOutcome};
use super::message_build_phase::{MessageBuildCtx, MessageBuildData};
use super::orchestration_phase::OrchestrationCtx;
use super::response_phase::{ResponsePhaseCtx, ResponsePhaseOutcome};
use super::stopping_phase::{StoppingPhaseCtx, StoppingPhaseOutcome};
use super::tool_execution_phase::{
    PendingReflectionRecovery, ToolErrorEntry, ToolExecutionCtx, ToolExecutionOutcome,
};
use super::tool_prelude_phase::{ToolPreludeCtx, ToolPreludeOutcome};
use super::*;

/// Check if a cancel keyword appears in text without a preceding negation.
/// Returns false for phrases like "do not stop", "don't cancel", "never stop".
fn cancel_keyword_not_negated(text: &str, keyword: &str) -> bool {
    if !contains_keyword_as_words(text, keyword) {
        return false;
    }
    // Find the keyword position and check the 1-3 words before it for negation.
    let words: Vec<&str> = text.split_whitespace().collect();
    let kw_lower = keyword.to_ascii_lowercase();
    for (i, w) in words.iter().enumerate() {
        let normalized = w
            .trim_matches(|c: char| c.is_ascii_punctuation() && c != '\'')
            .to_ascii_lowercase();
        if normalized == kw_lower {
            // Check up to 3 words before for negation markers.
            let start = i.saturating_sub(3);
            for word in &words[start..i] {
                let prev = word
                    .trim_matches(|c: char| c.is_ascii_punctuation() && c != '\'')
                    .to_ascii_lowercase();
                if matches!(
                    prev.as_str(),
                    "not" | "don't" | "dont" | "no" | "never" | "shouldn't" | "without"
                ) {
                    return false;
                }
            }
        }
    }
    true
}

fn infer_deterministic_orchestration_intent(user_text: &str) -> IntentGateDecision {
    let mut intent_gate = infer_intent_gate(user_text, "");
    let lower = user_text.trim().to_ascii_lowercase();
    let explicit_cancel_command = lower == "/cancel" || lower.starts_with("/cancel ");

    // Single-word cancel keywords ("cancel", "stop", "abort") are only treated
    // as cancel intent in SHORT messages (< 80 chars). In long task descriptions
    // they are almost always part of instructions, not commands.
    // Multi-word phrases ("never mind", "forget it", "scratch that") are
    // unambiguous regardless of message length.
    let short_msg = lower.len() < 80;
    let has_cancel_phrase = if short_msg {
        [
            "cancel",
            "stop",
            "abort",
            "never mind",
            "nevermind",
            "forget it",
            "scratch that",
        ]
        .iter()
        .any(|kw| cancel_keyword_not_negated(&lower, kw))
    } else {
        ["never mind", "nevermind", "forget it", "scratch that"]
            .iter()
            .any(|kw| cancel_keyword_not_negated(&lower, kw))
    };
    if explicit_cancel_command || has_cancel_phrase {
        let targeted_cancel = [
            "goal",
            "task",
            "job",
            "this goal",
            "that goal",
            "specific",
            "id",
        ]
        .iter()
        .any(|kw| contains_keyword_as_words(&lower, kw));
        intent_gate.cancel_intent = Some(true);
        intent_gate.cancel_scope = Some(if targeted_cancel {
            "targeted".to_string()
        } else {
            "generic".to_string()
        });
    }
    intent_gate
}

impl Agent {
    /// Run the agentic loop for a user message in the given session.
    /// Returns the final assistant text response.
    /// `heartbeat` is an optional atomic timestamp updated on each activity point.
    /// Channels pass `Some(heartbeat)` so the typing indicator can detect stalls;
    /// sub-agents, triggers, and tests pass `None`.
    pub(super) async fn handle_message_impl(
        &self,
        session_id: &str,
        user_text: &str,
        status_tx: Option<mpsc::Sender<StatusUpdate>>,
        user_role: UserRole,
        channel_ctx: ChannelContext,
        heartbeat: Option<Arc<AtomicU64>>,
    ) -> anyhow::Result<String> {
        touch_heartbeat(&heartbeat);
        info!(session_id, "handle_message_impl: starting bootstrap phase");

        let bootstrap_outcome = self
            .run_bootstrap_phase(&BootstrapCtx {
                session_id,
                user_text,
                status_tx: status_tx.clone(),
                user_role,
                channel_ctx: &channel_ctx,
            })
            .await?;
        let BootstrapData {
            task_id,
            emitter,
            mut learning_ctx,
            is_personal_memory_recall_turn,
            is_reaffirmation_challenge_turn,
            requests_external_verification,
            restrict_to_personal_memory_tools,
            active_skill_names,
            active_untrusted_external_reference_skills,
            restrict_untrusted_external_reference_tools,
            personal_memory_tool_call_cap,
            tools_allowed_for_user,
            mut available_capabilities,
            mut base_tool_defs,
            mut tool_defs,
            mut policy_bundle,
            llm_provider,
            llm_router,
            mut model,
            route_failsafe_active,
            system_prompt,
            pinned_memories,
            mut session_summary,
        } = match bootstrap_outcome {
            BootstrapOutcome::Return(result) => return result,
            BootstrapOutcome::Continue(data) => *data,
        };
        let turn_context = self
            .build_turn_context_from_recent_history(session_id, user_text)
            .await;
        let followup_mode = turn_context
            .followup_mode
            .map(|mode| mode.as_str())
            .unwrap_or("unknown");
        let turn_context_reasons: Vec<&'static str> = turn_context
            .reasons
            .iter()
            .map(|reason| reason.as_code())
            .collect();
        let mut completion_progress = CompletionProgress::new(&turn_context.completion_contract);
        let (execution_budget_tier, execution_budget_route, execution_budget) =
            select_initial_execution_budget(user_text, &turn_context, self.depth, self.role);
        #[cfg(test)]
        let execution_budget = self
            .execution_budget_override
            .clone()
            .unwrap_or(execution_budget);
        let mut execution_state = ExecutionState::new(
            execution_budget_tier,
            execution_budget.clone(),
            if self.depth > 0 || self.task_id.is_some() {
                ExecutionPersistence::Durable
            } else {
                ExecutionPersistence::Ephemeral
            },
        );
        execution_state.mark_persisted_now();
        self.emit_decision_point(
            &emitter,
            &task_id,
            0,
            DecisionType::ExecutionBudgetSelection,
            "Selected initial execution budget tier".to_string(),
            json!({
                "condition": "initial_execution_budget_selected",
                "budget_tier": execution_budget_tier,
                "route_kind": execution_budget_route,
                "budget": execution_budget,
                "persistence": execution_state.persistence,
                "execution_id": execution_state.execution_id,
            }),
        )
        .await;
        self.emit_decision_point(
            &emitter,
            &task_id,
            0,
            DecisionType::ExecutionStateSnapshot,
            "Initialized execution state snapshot".to_string(),
            json!({
                "condition": "execution_state_initialized",
                "execution_state": execution_state.clone(),
            }),
        )
        .await;
        info!(
            session_id,
            followup_mode,
            reasons = ?turn_context_reasons,
            primary_project_scope = ?turn_context.primary_project_scope,
            allow_multi_project_scope = turn_context.allow_multi_project_scope,
            "Turn context resolved"
        );
        // 3. Agentic loop — runs until natural completion or safety limits
        let task_start = Instant::now();
        let mut last_progress_summary = Instant::now();
        let mut iteration: usize = 0;
        let mut stall_count: usize = 0;
        let mut deferred_no_tool_streak: usize = 0;
        let mut deferred_no_tool_model_switches: usize = 0;
        let mut total_successful_tool_calls: usize = 0;
        let mut total_tool_calls_attempted: usize = 0;
        let mut task_tokens_used: u64 = 0;
        let mut tool_failure_count: HashMap<String, usize> = HashMap::new();
        let mut tool_failure_signatures: HashMap<(String, String), usize> = HashMap::new();
        let mut tool_transient_failure_count: HashMap<String, usize> = HashMap::new();
        let mut tool_cooldown_until_iteration: HashMap<String, usize> = HashMap::new();
        let mut tool_call_count: HashMap<String, usize> = HashMap::new();
        let mut personal_memory_tool_calls: usize = 0;
        let mut no_evidence_result_streak: usize = 0;
        let mut no_evidence_tools_seen: HashSet<String> = HashSet::new();
        let mut evidence_gain_count: usize = 0;
        let mut evidence_state = EvidenceState::default();
        let mut validation_state = ValidationState::default();

        // Task-start planning call: generate a structured plan before the main loop.
        // Skipped for conversational queries, short messages, and acknowledgments.
        // In tests, MockProvider silently intercepts planning calls (skip_planning_calls=true).
        {
            use super::bootstrap_phase::task_planning::{generate_task_plan, should_skip_planning};
            use crate::agent::execution_state::LinearIntentStep;
            if !should_skip_planning(
                &turn_context.completion_contract.task_kind,
                user_text,
                false,
            ) {
                // Build conversation context from recent messages for the planner.
                // This ensures the planner sees the same narrative as the main LLM,
                // preventing intent loss across multi-hop follow-ups.
                let planner_context = {
                    let mut ctx_parts = Vec::new();
                    if let Some(ref summary) = session_summary {
                        if !summary.summary.is_empty() {
                            ctx_parts.push(format!("[Session Summary] {}", summary.summary));
                        }
                    }
                    for msg in &turn_context.recent_messages {
                        let role = msg.get("role").and_then(|r| r.as_str()).unwrap_or("");
                        let content = msg.get("content").and_then(|c| c.as_str()).unwrap_or("");
                        if !content.is_empty() {
                            ctx_parts.push(format!(
                                "- {}: {}",
                                role.chars().next().unwrap_or('?').to_uppercase(),
                                content
                            ));
                        }
                    }
                    if ctx_parts.is_empty() {
                        None
                    } else {
                        Some(ctx_parts.join("\n"))
                    }
                };
                let plan_opt = if let Some(ref router) = llm_router {
                    generate_task_plan(
                        llm_provider.clone(),
                        router,
                        user_text,
                        planner_context.as_deref(),
                    )
                    .await
                } else {
                    None
                };
                if let Some(plan) = plan_opt {
                    let linear_steps: Vec<LinearIntentStep> = plan
                        .steps
                        .iter()
                        .enumerate()
                        .map(|(i, step)| LinearIntentStep {
                            step_id: format!("task-plan-step-{}", i + 1),
                            step_index: i + 1,
                            tool: step.tool_hint.clone().unwrap_or_default(),
                            target: String::new(),
                            description: step.description.clone(),
                            tool_calls_on_step: 0,
                            completed: false,
                            completion_evidence: None,
                            last_evaluated_at: None,
                        })
                        .collect();

                    let step_count = linear_steps.len();
                    execution_state.install_linear_intent_plan(1, linear_steps);

                    if !plan.success_criteria.is_empty() {
                        validation_state.set_plan(1, &plan.success_criteria);
                    }

                    execution_state.promote_budget_for_plan(step_count);

                    info!(
                        session_id,
                        goal = %plan.goal,
                        step_count,
                        "Task plan installed and budget evaluated"
                    );
                }
            }
        }

        // Track which error solutions were injected so we can credit them on recovery.
        let mut pending_error_solution_ids: Vec<i64> = Vec::new();
        let mut tool_error_history: HashMap<(String, String), Vec<ToolErrorEntry>> = HashMap::new();
        let mut reflection_completed: HashSet<(String, String)> = HashSet::new();
        let mut pending_reflection_recoveries: HashMap<String, PendingReflectionRecovery> =
            HashMap::new();
        // In-session error learning: track repeated failures by (tool, normalized error pattern).
        let mut tool_failure_patterns: HashMap<(String, String), usize> = HashMap::new();
        let mut last_tool_failure: Option<(String, String)> = None;
        let mut in_session_learned: HashSet<(String, String)> = HashSet::new();
        let mut unknown_tools: std::collections::HashSet<String> = std::collections::HashSet::new();
        let mut recent_tool_calls: VecDeque<u64> = VecDeque::with_capacity(RECENT_CALLS_WINDOW);
        // Tracks consecutive calls to the same tool name, plus the set of
        // unique argument hashes seen during the streak.  When every call in
        // the streak has unique args the agent is likely making progress (e.g.
        // running different terminal commands), so we only trigger the stall
        // guard when the ratio of unique args is low.
        let mut consecutive_same_tool: (String, usize) = (String::new(), 0);
        let mut consecutive_same_tool_arg_hashes: HashSet<u64> = HashSet::new();
        let mut soft_limit_warned = false;
        // Force-stop flag: when true, strip tools from next LLM call to force
        // a text response. Activated after too many tool calls without settling.
        let mut force_text_response = false;
        // Safety net: count consecutive iterations where force_text_response was
        // active.  After MAX_FORCE_TEXT_ITERATIONS the loop hard-returns the last
        // text regardless of completion/consultant analysis.
        let mut force_text_iterations: usize = 0;
        const MAX_FORCE_TEXT_ITERATIONS: usize = 3;
        let mut budget_warning_sent = false;
        let mut effective_task_budget = self.task_token_budget;
        let mut effective_daily_budget = self.daily_token_budget;
        let mut budget_extensions_count: usize = 0;
        const MAX_BUDGET_EXTENSIONS: usize = 3;
        const HARD_TOKEN_CAP: i64 = 2_000_000;
        const SCHEDULED_MAX_BUDGET_EXTENSIONS: usize = 12;
        const SCHEDULED_HARD_TOKEN_CAP: i64 = 20_000_000;
        let mut pending_system_messages: Vec<SystemDirective> = Vec::new();
        if route_failsafe_active {
            pending_system_messages.push(SystemDirective::RouteFailsafeActive);
        }
        let has_recent_tool_context = turn_context
            .recent_messages
            .iter()
            .any(|row| row.get("role").and_then(|v| v.as_str()) == Some("tool"));
        if looks_like_evidence_grounding_challenge(user_text)
            && (turn_context.followup_mode != Some(FollowupMode::NewTask)
                || has_recent_tool_context)
        {
            pending_system_messages.push(SystemDirective::EvidenceGroundingRequired);
        }
        // Track recent tool names for alternating pattern detection (A-B-A-B cycles)
        let mut recent_tool_names: VecDeque<String> = VecDeque::new();
        // Cache of last successful tool results (keyed by call hash).
        // When the repetitive redirect fires for read_file/search_files, we
        // replay the cached content so the model retains data lost to context
        // truncation instead of getting a generic "BLOCKED" message.
        let mut tool_result_cache: HashMap<u64, String> = HashMap::new();
        // Mid-loop adaptation and fallback expansion controls.
        let mut last_escalation_iteration: Option<usize> = None;
        let mut consecutive_clean_iterations: usize = 0;
        let mut fallback_expanded_once = false;
        // One-shot recovery for empty execution responses (no text + no tool calls).
        let mut empty_response_retry_used = false;
        let mut empty_response_retry_pending = false;
        let mut empty_response_retry_note: Option<String> = None;
        // Accumulated text from a truncated text response; prepended on next iteration.
        let mut truncated_text_prefix: Option<String> = None;
        // Counts consecutive LLM calls truncated with all tokens on thinking;
        // the next call uses escalating recovery (low → off → force text).
        let mut thinking_truncation_count: u8 = 0;
        // Cumulative ms lost to LLM provider timeouts (excluded from wall-clock budget).
        let mut provider_timeout_ms: u64 = 0;
        // Idempotency guard for send_file within a single task execution.
        let mut successful_send_file_keys: HashSet<String> = HashSet::new();
        // Inject cli_agent completion nudges at most once per phase
        // (consecutive cli_agent completions), then reset after a
        // successful non-cli_agent tool call.
        let mut cli_agent_boundary_injected = false;
        // Deterministic top-level acknowledgement when a tool detaches to background.
        let mut pending_background_ack: Option<String> = None;
        // Deterministic fallback acknowledgement when a successful external write
        // completes but the follow-up LLM summary stalls.
        let mut pending_external_action_ack: Option<String> = None;
        // Track identity-attack prefill so we can prepend it to the final reply.
        let mut identity_prefill_text: Option<String> = None;
        // Best-effort project directory hint (seeded from user text, refined by tool calls).
        let mut known_project_dir = turn_context.primary_project_scope.clone().or_else(|| {
            super::tool_execution_phase::extract_project_dir_hint_with_aliases(
                user_text,
                &self.path_aliases.projects,
            )
        });
        // Cross-iteration directory evidence tracking for contradiction detection.
        let mut dirs_with_project_inspect_file_evidence: HashSet<String> = HashSet::new();
        let mut dirs_with_search_no_matches: HashSet<String> = HashSet::new();
        // When true, the assistant must run at least one file re-check before finalizing text.
        let mut require_file_recheck_before_answer = false;
        // Deterministic tool-required state is driven by the request itself, not
        // by route-drift fail-safe mode. Fail-safe can force a stronger model and
        // stricter routing posture without turning plain-text tasks into
        // pseudo-execution tasks.
        let mut needs_tools_for_turn = infer_intent_gate(user_text, "")
            .needs_tools
            .unwrap_or(false);

        // Determine iteration limit behavior
        let (mut hard_cap, mut soft_threshold, mut soft_warn_at) = match &self.iteration_config {
            IterationLimitConfig::Unlimited => (Some(HARD_ITERATION_CAP), None, None),
            IterationLimitConfig::Soft { threshold, warn_at } => {
                (Some(HARD_ITERATION_CAP), Some(*threshold), Some(*warn_at))
            }
            IterationLimitConfig::Hard { initial: _, cap } => (Some(*cap), None, None),
        };

        // Resolve goal_id once for per-goal token budget enforcement.
        // Executors currently carry only task_id, so we may need to lookup goal_id via task.
        let resolved_goal_id: Option<String> = if let Some(gid) = self.goal_id.clone() {
            Some(gid)
        } else if let Some(ref tid) = self.task_id {
            match self.state.get_task(tid).await {
                Ok(Some(task)) => Some(task.goal_id),
                Ok(None) => {
                    warn!(
                        session_id,
                        task_id = %tid,
                        "Task not found while resolving goal_id; goal budget enforcement disabled for this run"
                    );
                    None
                }
                Err(e) => {
                    warn!(
                        session_id,
                        task_id = %tid,
                        error = %e,
                        "Failed to resolve goal_id from task; goal budget enforcement disabled for this run"
                    );
                    None
                }
            }
        } else {
            None
        };
        let is_scheduled_goal = if let Some(goal_id) = resolved_goal_id.as_deref() {
            goal_has_scheduled_provenance(&self.state, goal_id, self.task_id.as_deref()).await
        } else {
            false
        };
        let is_root_scheduled_run = if self.task_id.is_none() {
            is_scheduled_goal
        } else {
            task_has_scheduled_provenance(&self.state, self.task_id.as_deref()).await
        };
        let scheduled_goal_budget_per_check = if let Some(goal_id) = resolved_goal_id.as_deref() {
            self.state
                .get_goal(goal_id)
                .await
                .ok()
                .flatten()
                .and_then(|g| g.budget_per_check)
        } else {
            None
        };
        let active_scheduled_root_task_id = if let Some(goal_id) = resolved_goal_id.as_deref() {
            if is_scheduled_goal {
                active_scheduled_root_task_id(&self.state, goal_id).await
            } else {
                None
            }
        } else {
            None
        };
        if is_scheduled_goal {
            hard_cap = None;
            soft_threshold = None;
            soft_warn_at = None;
            if let Some(registry) = self.goal_token_registry.as_ref() {
                if let Some(goal_id) = resolved_goal_id.as_deref() {
                    if is_root_scheduled_run {
                        let persisted_state = self
                            .state
                            .get_scheduled_run_state(goal_id)
                            .await
                            .ok()
                            .flatten();
                        let restored = if let Some(state) = persisted_state.as_ref() {
                            if Some(state.root_task_id.as_str())
                                == active_scheduled_root_task_id.as_deref()
                            {
                                registry
                                    .restore_run_budget(
                                        goal_id,
                                        state.effective_budget_per_check,
                                        state.tokens_used,
                                        state.budget_extensions_count,
                                        state.health.clone(),
                                    )
                                    .await
                            } else {
                                None
                            }
                        } else {
                            None
                        };
                        if restored.is_none() {
                            registry
                                .start_run_budget(goal_id, scheduled_goal_budget_per_check)
                                .await;
                            if let Some(status) = registry.get_run_budget(goal_id).await {
                                persist_scheduled_run_state(
                                    &self.state,
                                    goal_id,
                                    active_scheduled_root_task_id.as_deref(),
                                    &status,
                                )
                                .await;
                            } else {
                                clear_scheduled_run_state(&self.state, goal_id).await;
                            }
                        }
                    } else if registry.get_run_budget(goal_id).await.is_none() {
                        if let Some(state) = self
                            .state
                            .get_scheduled_run_state(goal_id)
                            .await
                            .ok()
                            .flatten()
                        {
                            let _ = registry
                                .restore_run_budget(
                                    goal_id,
                                    state.effective_budget_per_check,
                                    state.tokens_used,
                                    state.budget_extensions_count,
                                    state.health.clone(),
                                )
                                .await;
                        } else {
                            registry
                                .start_run_budget(goal_id, scheduled_goal_budget_per_check)
                                .await;
                            if let Some(status) = registry.get_run_budget(goal_id).await {
                                persist_scheduled_run_state(
                                    &self.state,
                                    goal_id,
                                    active_scheduled_root_task_id.as_deref(),
                                    &status,
                                )
                                .await;
                            }
                        }
                    }
                }
            }
            if let Some(per_check_budget) =
                scheduled_goal_budget_per_check.and_then(|v| u64::try_from(v).ok())
            {
                effective_task_budget = Some(
                    effective_task_budget
                        .map(|budget| budget.max(per_check_budget))
                        .unwrap_or(per_check_budget),
                );
            }
        }
        let effective_task_timeout = if is_scheduled_goal {
            None
        } else {
            self.task_timeout
        };
        let max_budget_extensions = if is_scheduled_goal {
            SCHEDULED_MAX_BUDGET_EXTENSIONS
        } else {
            MAX_BUDGET_EXTENSIONS
        };
        let hard_token_cap = if is_scheduled_goal {
            SCHEDULED_HARD_TOKEN_CAP
        } else {
            HARD_TOKEN_CAP
        };
        // Runtime-only override for goal daily budget extensions.
        // Shared via GoalTokenRegistry so task-leads/executors for the same goal
        // can inherit the same temporary budget without persisting it to SQLite.
        let mut effective_goal_daily_budget: Option<i64> = if let (Some(goal_id), Some(registry)) = (
            resolved_goal_id.as_deref(),
            self.goal_token_registry.as_ref(),
        ) {
            registry.get_effective_daily_budget(goal_id).await
        } else {
            None
        };

        loop {
            iteration += 1;
            touch_heartbeat(&heartbeat);

            // Check for cancellation (cascades via token hierarchy)
            if let Some(ref ct) = self.cancel_token {
                if ct.is_cancelled() {
                    info!(session_id, iteration, "Task cancelled by parent");
                    self.emit_decision_point(
                        &emitter,
                        &task_id,
                        iteration,
                        DecisionType::StoppingCondition,
                        "Stopping condition fired: cancellation token set".to_string(),
                        json!({"condition":"cancelled"}),
                    )
                    .await;

                    // Mark remaining tasks as cancelled.
                    if let Some(ref gid) = self.goal_id {
                        if let Ok(tasks) = self.state.get_tasks_for_goal(gid).await {
                            for task in &tasks {
                                if task.status != "completed"
                                    && task.status != "failed"
                                    && task.status != "cancelled"
                                {
                                    let mut ct = task.clone();
                                    ct.status = "cancelled".to_string();
                                    let _ = self.state.update_task(&ct).await;
                                }
                            }
                        }
                    }

                    let cancel_reply = "Task cancelled.".to_string();
                    let assistant_msg = Message {
                        id: Uuid::new_v4().to_string(),
                        session_id: session_id.to_string(),
                        role: "assistant".to_string(),
                        content: Some(cancel_reply.clone()),
                        tool_call_id: None,
                        tool_name: None,
                        tool_calls_json: None,
                        created_at: Utc::now(),
                        importance: 0.3,
                        ..Message::runtime_defaults()
                    };
                    let _ = self
                        .append_assistant_message_with_event(
                            &emitter,
                            &assistant_msg,
                            "system",
                            None,
                            None,
                        )
                        .await;

                    self.emit_task_end(
                        &emitter,
                        &task_id,
                        TaskStatus::Cancelled,
                        task_start,
                        iteration,
                        0,
                        None,
                        Some(cancel_reply.clone()),
                    )
                    .await;
                    return Ok(cancel_reply);
                }
            }

            // Safety net: if force-text mode has been active for too many
            // consecutive iterations, hard-return whatever the LLM last produced.
            // This prevents infinite force-text loops where the response/completion
            // phase keeps deciding to continue despite having no tools.
            if force_text_response {
                force_text_iterations += 1;
                if force_text_iterations > MAX_FORCE_TEXT_ITERATIONS {
                    warn!(
                        session_id,
                        iteration,
                        force_text_iterations,
                        "Force-text safety net: exceeded max consecutive force-text iterations, hard-stopping"
                    );
                    let fallback = self
                        .latest_non_system_tool_output_excerpt(session_id, 2000)
                        .await
                        .unwrap_or_else(|| {
                            "I ran into a processing limit. Please try again or rephrase your request.".to_string()
                        });
                    let assistant_msg = Message {
                        id: Uuid::new_v4().to_string(),
                        session_id: session_id.to_string(),
                        role: "assistant".to_string(),
                        content: Some(fallback.clone()),
                        tool_call_id: None,
                        tool_name: None,
                        tool_calls_json: None,
                        created_at: Utc::now(),
                        importance: 0.3,
                        ..Message::runtime_defaults()
                    };
                    let _ = self
                        .append_assistant_message_with_event(
                            &emitter,
                            &assistant_msg,
                            "force_text_safety_net",
                            None,
                            None,
                        )
                        .await;
                    self.emit_task_end(
                        &emitter,
                        &task_id,
                        TaskStatus::Completed,
                        task_start,
                        iteration,
                        task_tokens_used as usize,
                        None,
                        Some(fallback.clone()),
                    )
                    .await;
                    return Ok(fallback);
                }
            } else {
                force_text_iterations = 0;
            }

            info!(
                iteration,
                session_id,
                model = %model,
                depth = self.depth,
                policy_profile = ?policy_bundle.policy.model_profile,
                verify_level = ?policy_bundle.policy.verify_level,
                approval_mode = ?policy_bundle.policy.approval_mode,
                context_budget = policy_bundle.policy.context_budget,
                tool_budget = policy_bundle.policy.tool_budget,
                policy_rev = policy_bundle.policy.policy_rev,
                risk_score = policy_bundle.risk_score,
                uncertainty_score = policy_bundle.uncertainty_score,
                "Agent loop iteration"
            );

            // Emit ThinkingStart event
            let _ = emitter
                .emit(
                    EventType::ThinkingStart,
                    ThinkingStartData {
                        iteration: iteration as u32,
                        task_id: task_id.clone(),
                        total_tool_calls: learning_ctx.tool_calls.len() as u32,
                    },
                )
                .await;

            let stopping_outcome = self
                .run_stopping_phase(&mut StoppingPhaseCtx {
                    emitter: &emitter,
                    task_id: &task_id,
                    session_id,
                    iteration,
                    task_start,
                    learning_ctx: &mut learning_ctx,
                    hard_cap,
                    effective_task_timeout,
                    task_tokens_used,
                    effective_task_budget: &mut effective_task_budget,
                    budget_warning_sent: &mut budget_warning_sent,
                    pending_system_messages: &mut pending_system_messages,
                    budget_extensions_count: &mut budget_extensions_count,
                    user_role,
                    evidence_gain_count,
                    stall_count,
                    deferred_no_tool_streak,
                    consecutive_same_tool: &consecutive_same_tool,
                    consecutive_same_tool_arg_hashes: &consecutive_same_tool_arg_hashes,
                    total_successful_tool_calls,
                    pending_background_ack: &mut pending_background_ack,
                    status_tx: &status_tx,
                    resolved_goal_id: &resolved_goal_id,
                    is_scheduled_goal,
                    effective_daily_budget: &mut effective_daily_budget,
                    effective_goal_daily_budget: &mut effective_goal_daily_budget,
                    successful_send_file_keys: &successful_send_file_keys,
                    model: &mut model,
                    soft_threshold,
                    soft_warn_at,
                    soft_limit_warned: &mut soft_limit_warned,
                    last_progress_summary: &mut last_progress_summary,
                    tool_failure_count: &tool_failure_count,
                    session_summary: &mut session_summary,
                    policy_bundle: &mut policy_bundle,
                    user_text,
                    available_capabilities: &available_capabilities,
                    llm_router: &llm_router,
                    last_escalation_iteration: &mut last_escalation_iteration,
                    consecutive_clean_iterations: &mut consecutive_clean_iterations,
                    max_budget_extensions,
                    hard_token_cap,
                    execution_state: &mut execution_state,
                    force_text_response: &mut force_text_response,
                    completion_progress: &mut completion_progress,
                    turn_context: &turn_context,
                    validation_state: &mut validation_state,
                })
                .await?;
            match stopping_outcome {
                StoppingPhaseOutcome::ContinueLoop => continue,
                StoppingPhaseOutcome::Return(result) => return result,
                StoppingPhaseOutcome::Proceed => {}
            }

            // Deterministic control-plane routing on iteration 1:
            // handle cancel/schedule/complex intents before the first LLM call.
            if iteration == 1
                && self.depth == 0
                && self.role == AgentRole::Orchestrator
                && !route_failsafe_active
            {
                let intent_gate = infer_deterministic_orchestration_intent(user_text);
                if let Some(outcome) = self
                    .run_orchestration_phase(&mut OrchestrationCtx {
                        emitter: &emitter,
                        task_id: &task_id,
                        session_id,
                        user_text,
                        iteration,
                        task_start,
                        task_tokens_used,
                        pending_system_messages: &mut pending_system_messages,
                        tool_defs: &mut tool_defs,
                        base_tool_defs: &mut base_tool_defs,
                        available_capabilities: &mut available_capabilities,
                        policy_bundle: &mut policy_bundle,
                        tools_allowed_for_user,
                        restrict_to_personal_memory_tools,
                        llm_provider: llm_provider.clone(),
                        llm_router: llm_router.clone(),
                        model: &model,
                        user_role,
                        channel_ctx: channel_ctx.clone(),
                        status_tx: status_tx.clone(),
                        intent_gate: &intent_gate,
                        turn_context: &turn_context,
                    })
                    .await?
                {
                    match outcome {
                        ResponsePhaseOutcome::ContinueLoop => continue,
                        ResponsePhaseOutcome::Return(result) => return result,
                        ResponsePhaseOutcome::ProceedToToolExecution => {}
                    }
                }
            }

            // Inject task plan context with progress markers into the model's context.
            if let Some(ref plan) = execution_state.active_linear_intent_plan {
                if !plan.steps.is_empty() {
                    let plan_text = plan.format_with_progress();
                    pending_system_messages.push(SystemDirective::TaskPlanContext(plan_text));
                }
            }

            // Compaction: on the first iteration, detect whether the conversation
            // history needs compacting. IdleGap and FileUpload triggers run
            // synchronously (user just arrived or uploaded a file — latency is
            // acceptable). WindowOverflow runs asynchronously in the background
            // so it doesn't add 15s latency to every message once the conversation
            // exceeds the window size. The aging pair stays in the sliding window
            // until the background compaction completes.
            if iteration == 1 && self.context_window_config.enabled {
                // Count user-message pairs and compute idle gap.
                let history = self
                    .state
                    .get_history(session_id, 100)
                    .await
                    .unwrap_or_default();
                let total_pairs = history.iter().filter(|m| m.role == "user").count();
                let idle_gap_seconds = history
                    .last()
                    .map(|m| {
                        let now = Utc::now();
                        now.signed_duration_since(m.created_at).num_seconds().max(0) as u64
                    })
                    .unwrap_or(0);

                let window_size = self.context_window_config.summary_window;
                let compaction_trigger = super::compaction::detect_compaction_trigger(
                    total_pairs,
                    window_size,
                    idle_gap_seconds,
                    user_text,
                );

                if let Some(ref trigger) = compaction_trigger {
                    info!(
                        session_id,
                        ?trigger,
                        total_pairs,
                        idle_gap_seconds,
                        window_size,
                        "Compaction trigger detected"
                    );

                    // Convert history messages to JSON Value array for the compaction prompt.
                    let messages_to_compact: Vec<Value> = history
                        .iter()
                        .map(|m| {
                            let mut msg = json!({ "role": m.role });
                            if let Some(ref content) = m.content {
                                msg["content"] = json!(content);
                            }
                            if let Some(ref name) = m.tool_name {
                                msg["name"] = json!(name);
                            }
                            msg
                        })
                        .collect();

                    let last_message_id = history.last().map(|m| m.id.as_str()).unwrap_or("");

                    match trigger {
                        super::compaction::CompactionTrigger::WindowOverflow { .. } => {
                            // Async — don't block the user's response. The aging
                            // pair is already in the sliding window (we haven't
                            // removed it). On the next turn the summary's
                            // last_message_id will cover it.
                            let provider = llm_provider.clone();
                            let compaction_model = model.clone();
                            let state = self.state.clone();
                            let sid = session_id.to_string();
                            let summary_clone = session_summary.clone();
                            let msgs = messages_to_compact.clone();
                            let msg_count = total_pairs;
                            let last_id = last_message_id.to_string();
                            tokio::spawn(async move {
                                if let Err(e) = super::compaction::run_and_store_compaction(
                                    provider,
                                    &compaction_model,
                                    state.as_ref(),
                                    &sid,
                                    summary_clone,
                                    &msgs,
                                    msg_count,
                                    &last_id,
                                )
                                .await
                                {
                                    warn!(session_id = %sid, error = %e, "Background compaction failed");
                                }
                            });
                            info!(
                                session_id,
                                "Background compaction spawned for window overflow"
                            );
                        }
                        _ => {
                            // IdleGap / FileUpload — run synchronously (15s timeout).
                            match tokio::time::timeout(
                                Duration::from_secs(15),
                                super::compaction::run_and_store_compaction(
                                    llm_provider.clone(),
                                    &model,
                                    self.state.as_ref(),
                                    session_id,
                                    session_summary.clone(),
                                    &messages_to_compact,
                                    total_pairs,
                                    last_message_id,
                                ),
                            )
                            .await
                            {
                                Ok(Ok(())) => {
                                    // Reload the summary so message build uses the fresh version.
                                    session_summary = self
                                        .state
                                        .get_conversation_summary(session_id)
                                        .await
                                        .ok()
                                        .flatten();
                                    info!(session_id, "Synchronous compaction completed");
                                }
                                Ok(Err(e)) => {
                                    warn!(session_id, error = %e, "Synchronous compaction failed");
                                }
                                Err(_) => {
                                    warn!(session_id, "Synchronous compaction timed out (15s)");
                                }
                            }
                        }
                    }
                }
            }

            let MessageBuildData { mut messages } = self
                .run_message_build_phase(&mut MessageBuildCtx {
                    session_id,
                    iteration,
                    user_text,
                    completed_tool_calls: &learning_ctx.tool_calls,
                    model: &model,
                    system_prompt: &system_prompt,
                    pinned_memories: &pinned_memories,
                    tool_defs: &tool_defs,
                    policy_bundle: &policy_bundle,
                    session_summary: &session_summary,
                    pending_system_messages: &mut pending_system_messages,
                    empty_response_retry_pending,
                    status_tx: &status_tx,
                })
                .await?;

            let llm_outcome = self
                .run_llm_phase(&mut LlmPhaseCtx {
                    messages: &mut messages,
                    emitter: &emitter,
                    task_id: &task_id,
                    session_id,
                    user_text,
                    iteration,
                    force_text_response,
                    task_start,
                    task_tokens_used: &mut task_tokens_used,
                    learning_ctx: &mut learning_ctx,
                    pending_system_messages: &mut pending_system_messages,
                    llm_provider: llm_provider.clone(),
                    llm_router: llm_router.clone(),
                    model: &model,
                    user_role,
                    tool_defs: &tool_defs,
                    status_tx: &status_tx,
                    resolved_goal_id: &resolved_goal_id,
                    is_scheduled_goal,
                    effective_goal_daily_budget: &mut effective_goal_daily_budget,
                    budget_extensions_count: &mut budget_extensions_count,
                    evidence_gain_count,
                    stall_count: &mut stall_count,
                    consecutive_same_tool: &consecutive_same_tool,
                    consecutive_same_tool_arg_hashes: &consecutive_same_tool_arg_hashes,
                    total_successful_tool_calls,
                    pending_external_action_ack: &mut pending_external_action_ack,
                    heartbeat: &heartbeat,
                    empty_response_retry_pending: &mut empty_response_retry_pending,
                    empty_response_retry_note: &mut empty_response_retry_note,
                    identity_prefill_text: &mut identity_prefill_text,
                    deferred_no_tool_streak,
                    tools_required_for_turn: needs_tools_for_turn,
                    max_budget_extensions,
                    hard_token_cap,
                    truncated_text_prefix: &mut truncated_text_prefix,
                    provider_timeout_ms: &mut provider_timeout_ms,
                    thinking_truncation_count: &mut thinking_truncation_count,
                })
                .await?;
            let mut resp = match llm_outcome {
                LlmPhaseOutcome::ContinueLoop => {
                    if execution_state.execution_budget_applies() {
                        execution_state.record_llm_call();
                    }
                    // Propagate accumulated timeout to execution state so
                    // wall-clock budget excludes provider-caused delays.
                    execution_state.provider_timeout_ms = provider_timeout_ms;
                    continue;
                }
                LlmPhaseOutcome::Return(result) => {
                    if execution_state.execution_budget_applies() {
                        execution_state.record_llm_call();
                    }
                    return result;
                }
                LlmPhaseOutcome::Proceed(resp) => resp,
            };

            let response_outcome = self
                .run_response_phase(&mut ResponsePhaseCtx {
                    resp: &mut resp,
                    emitter: &emitter,
                    task_id: &task_id,
                    session_id,
                    user_text,
                    iteration,
                    task_start,
                    task_tokens_used,
                    learning_ctx: &mut learning_ctx,
                    pending_system_messages: &mut pending_system_messages,
                    tool_defs: &mut tool_defs,
                    base_tool_defs: &mut base_tool_defs,
                    available_capabilities: &mut available_capabilities,
                    policy_bundle: &mut policy_bundle,
                    tools_allowed_for_user,
                    restrict_to_personal_memory_tools,
                    is_personal_memory_recall_turn,
                    is_reaffirmation_challenge_turn,
                    requests_external_verification,
                    llm_provider: llm_provider.clone(),
                    llm_router: llm_router.clone(),
                    model: &mut model,
                    user_role,
                    channel_ctx: channel_ctx.clone(),
                    status_tx: status_tx.clone(),
                    total_successful_tool_calls,
                    stall_count: &mut stall_count,
                    consecutive_clean_iterations: &mut consecutive_clean_iterations,
                    deferred_no_tool_streak: &mut deferred_no_tool_streak,
                    deferred_no_tool_model_switches: &mut deferred_no_tool_model_switches,
                    fallback_expanded_once: &mut fallback_expanded_once,
                    empty_response_retry_used: &mut empty_response_retry_used,
                    empty_response_retry_pending: &mut empty_response_retry_pending,
                    empty_response_retry_note: &mut empty_response_retry_note,
                    identity_prefill_text: &mut identity_prefill_text,
                    pending_background_ack: &mut pending_background_ack,
                    pending_external_action_ack: &mut pending_external_action_ack,
                    require_file_recheck_before_answer: &mut require_file_recheck_before_answer,
                    completion_progress: &mut completion_progress,
                    turn_context: &turn_context,
                    needs_tools_for_turn: &mut needs_tools_for_turn,
                    force_text_response: &mut force_text_response,
                    execution_state: &mut execution_state,
                    validation_state: &mut validation_state,
                })
                .await?;
            match response_outcome {
                ResponsePhaseOutcome::ContinueLoop => {
                    if execution_state.execution_budget_applies() {
                        execution_state.record_llm_call();
                    }
                    continue;
                }
                ResponsePhaseOutcome::Return(result) => {
                    if execution_state.execution_budget_applies() {
                        execution_state.record_llm_call();
                    }
                    return result;
                }
                ResponsePhaseOutcome::ProceedToToolExecution => {
                    if !resp.tool_calls.is_empty() && !execution_state.execution_budget_applies() {
                        execution_state
                            .activate_budget_envelope(task_tokens_used, task_start.elapsed());
                    }
                    if !resp.tool_calls.is_empty() || execution_state.execution_budget_applies() {
                        execution_state.record_llm_call();
                    }
                }
            }
            // === EXECUTE TOOL CALLS ===
            let tool_prelude_outcome = self
                .run_tool_prelude_phase(&mut ToolPreludeCtx {
                    resp: &resp,
                    emitter: &emitter,
                    task_id: &task_id,
                    session_id,
                    model: &model,
                    llm_provider: llm_provider.clone(),
                    iteration,
                    task_start,
                    learning_ctx: &mut learning_ctx,
                    evidence_state: &evidence_state,
                    user_text,
                    policy_bundle: &policy_bundle,
                    available_capabilities: &available_capabilities,
                    execution_state: &mut execution_state,
                    validation_state: &mut validation_state,
                    pending_system_messages: &mut pending_system_messages,
                    force_text_response: &mut force_text_response,
                    turn_context: &turn_context,
                })
                .await?;
            match tool_prelude_outcome {
                ToolPreludeOutcome::ContinueLoop => continue,
                ToolPreludeOutcome::Return(result) => return result,
                ToolPreludeOutcome::Proceed => {}
            }

            // Capture baseline for tracking tool calls per plan step
            let tool_calls_before_execution = learning_ctx.tool_calls.len();

            let tool_execution_outcome = self
                .run_tool_execution_phase(&mut ToolExecutionCtx {
                    resp: &resp,
                    emitter: &emitter,
                    task_id: &task_id,
                    session_id,
                    iteration,
                    task_start,
                    learning_ctx: &mut learning_ctx,
                    task_tokens_used,
                    user_text,
                    restrict_to_personal_memory_tools,
                    active_skill_names: &active_skill_names,
                    active_untrusted_external_reference_skills:
                        &active_untrusted_external_reference_skills,
                    restrict_untrusted_external_reference_tools,
                    is_reaffirmation_challenge_turn,
                    personal_memory_tool_call_cap,
                    base_tool_defs: &base_tool_defs,
                    available_capabilities: &available_capabilities,
                    policy_bundle: &policy_bundle,
                    status_tx: status_tx.clone(),
                    channel_ctx: &channel_ctx,
                    user_role,
                    heartbeat: &heartbeat,
                    tool_defs: &mut tool_defs,
                    total_tool_calls_attempted: &mut total_tool_calls_attempted,
                    total_successful_tool_calls: &mut total_successful_tool_calls,
                    tool_failure_count: &mut tool_failure_count,
                    tool_failure_signatures: &mut tool_failure_signatures,
                    tool_transient_failure_count: &mut tool_transient_failure_count,
                    tool_cooldown_until_iteration: &mut tool_cooldown_until_iteration,
                    tool_call_count: &mut tool_call_count,
                    personal_memory_tool_calls: &mut personal_memory_tool_calls,
                    no_evidence_result_streak: &mut no_evidence_result_streak,
                    no_evidence_tools_seen: &mut no_evidence_tools_seen,
                    evidence_gain_count: &mut evidence_gain_count,
                    pending_error_solution_ids: &mut pending_error_solution_ids,
                    tool_error_history: &mut tool_error_history,
                    reflection_completed: &mut reflection_completed,
                    pending_reflection_recoveries: &mut pending_reflection_recoveries,
                    tool_failure_patterns: &mut tool_failure_patterns,
                    last_tool_failure: &mut last_tool_failure,
                    in_session_learned: &mut in_session_learned,
                    unknown_tools: &mut unknown_tools,
                    recent_tool_calls: &mut recent_tool_calls,
                    consecutive_same_tool: &mut consecutive_same_tool,
                    consecutive_same_tool_arg_hashes: &mut consecutive_same_tool_arg_hashes,
                    force_text_response: &mut force_text_response,
                    pending_system_messages: &mut pending_system_messages,
                    recent_tool_names: &mut recent_tool_names,
                    successful_send_file_keys: &mut successful_send_file_keys,
                    cli_agent_boundary_injected: &mut cli_agent_boundary_injected,
                    evidence_state: &mut evidence_state,
                    pending_background_ack: &mut pending_background_ack,
                    pending_external_action_ack: &mut pending_external_action_ack,
                    stall_count: &mut stall_count,
                    deferred_no_tool_streak: &mut deferred_no_tool_streak,
                    consecutive_clean_iterations: &mut consecutive_clean_iterations,
                    fallback_expanded_once: &mut fallback_expanded_once,
                    known_project_dir: &mut known_project_dir,
                    dirs_with_project_inspect_file_evidence:
                        &mut dirs_with_project_inspect_file_evidence,
                    dirs_with_search_no_matches: &mut dirs_with_search_no_matches,
                    require_file_recheck_before_answer: &mut require_file_recheck_before_answer,
                    completion_progress: &mut completion_progress,
                    turn_context: &turn_context,
                    resolved_goal_id: resolved_goal_id.as_deref(),
                    tool_result_cache: &mut tool_result_cache,
                    execution_state: &mut execution_state,
                    validation_state: &mut validation_state,
                })
                .await?;
            match tool_execution_outcome {
                ToolExecutionOutcome::Return(result) => return result,
                ToolExecutionOutcome::NextIteration => {}
            }

            // Re-planner: track tool calls on current plan step and evaluate
            // whether the step is complete. Uses delta of learning_ctx.tool_calls
            // to count ALL calls this round (including failures).
            {
                let tool_calls_this_round = learning_ctx
                    .tool_calls
                    .len()
                    .saturating_sub(tool_calls_before_execution);
                if let Some(ref mut plan) = execution_state.active_linear_intent_plan {
                    plan.record_tool_calls_on_current(tool_calls_this_round);
                }
                if let Some(ref mut plan) = execution_state.active_linear_intent_plan {
                    if plan.current_step_needs_replan() {
                        plan.mark_current_step_evaluated();
                        if let Some(step) = plan.steps.get(plan.current_step_cursor).cloned() {
                            use super::bootstrap_phase::task_planning::{
                                evaluate_step_completion, summarize_tool_calls_for_replan,
                            };
                            let tool_summary =
                                summarize_tool_calls_for_replan(&learning_ctx.tool_calls, 8);
                            if let Some(ref router) = llm_router {
                                if let Some(evidence) = evaluate_step_completion(
                                    llm_provider.clone(),
                                    router,
                                    &step.description,
                                    &tool_summary,
                                )
                                .await
                                {
                                    if let Some(ref mut plan) =
                                        execution_state.active_linear_intent_plan
                                    {
                                        plan.complete_current_step_with_evidence(evidence);
                                        info!(
                                            session_id,
                                            completed_step = plan.current_step_cursor - 1,
                                            "Re-planner advanced plan to next step"
                                        );
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}

#[cfg(test)]
#[path = "characterization_tests.rs"]
mod characterization_tests;

#[cfg(test)]
mod cancel_intent_tests {
    use super::*;

    #[test]
    fn negated_stop_not_detected() {
        // "Do not stop until every test passes" should NOT trigger cancel
        assert!(!cancel_keyword_not_negated(
            "do not stop until every test passes",
            "stop"
        ));
    }

    #[test]
    fn negated_cancel_not_detected() {
        assert!(!cancel_keyword_not_negated(
            "don't cancel anything",
            "cancel"
        ));
    }

    #[test]
    fn bare_stop_detected() {
        assert!(cancel_keyword_not_negated("stop", "stop"));
    }

    #[test]
    fn bare_cancel_detected() {
        assert!(cancel_keyword_not_negated("cancel everything", "cancel"));
    }

    #[test]
    fn never_stop_not_detected() {
        assert!(!cancel_keyword_not_negated("never stop working", "stop"));
    }

    #[test]
    fn long_message_with_stop_no_cancel() {
        let msg = "write a script that does X and Y. do not stop until every test passes.";
        let intent = infer_deterministic_orchestration_intent(msg);
        assert!(!intent.cancel_intent.unwrap_or(false));
    }

    #[test]
    fn short_stop_message_triggers_cancel() {
        let intent = infer_deterministic_orchestration_intent("stop");
        assert!(intent.cancel_intent.unwrap_or(false));
    }

    #[test]
    fn short_cancel_message_triggers_cancel() {
        let intent = infer_deterministic_orchestration_intent("cancel all");
        assert!(intent.cancel_intent.unwrap_or(false));
    }

    #[test]
    fn never_mind_in_long_message() {
        // Multi-word phrases are always unambiguous
        let intent = infer_deterministic_orchestration_intent(
            "actually never mind about that whole thing I asked earlier, let me think about it",
        );
        assert!(intent.cancel_intent.unwrap_or(false));
    }
}