roboticus-agent 0.11.4

//! Task Operating State — introspection-driven state synthesis.
//!
//! This module defines the types and synthesis rules for building a
//! [`TaskOperatingState`] from raw subsystem outputs ([`TaskStateInput`]).
//!
//! ## Design Invariants
//!
//! - `roboticus-agent` owns all synthesis rules. `roboticus-api` supplies inputs only.
//! - No duplicated truth between crates — if both compute "fit," that's a bug.
//! - Memory has two roles:
//!   - **Conversation**: ambient enrichment (passive, automatic)
//!   - **Task**: operational input (active, introspective)
//!   - `InspectMemory` applies to task turns only. Conversational turns get
//!     passive memory via the existing context builder.

use serde::Serialize;

use crate::retrieval::RetrievalMetrics;
use crate::tool_search::ToolSearchStats;

// ── Input: raw facts from subsystems ─────────────────────────────────

/// Raw facts assembled by `roboticus-api` from pipeline subsystems.
/// This is the sole input to [`synthesize`] — the API layer collects
/// these values but does NOT interpret them.
#[derive(Debug, Clone)]
pub struct TaskStateInput {
    /// The user's message text.
    pub user_content: String,
    /// Serialized intent names (from IntentRegistry).
    pub intents: Vec<String>,
    /// Serialized InputAuthority variant name.
    pub authority: String,

    // ── From retrieval subsystem ──
    pub retrieval_metrics: Option<RetrievalMetrics>,

    // ── From tool search subsystem ──
    pub tool_search_stats: Option<ToolSearchStats>,
    /// Whether any MCP-sourced tools are available.
    pub mcp_tools_available: bool,

    // ── From roster query ──
    /// Enabled, non-proxy subagents.
    pub taskable_agent_count: usize,
    /// Agents whose skills overlap with the task.
    pub fit_agent_count: usize,
    /// Names of fitting agents.
    pub fit_agent_names: Vec<String>,

    // ── From skill registry ──
    pub enabled_skill_count: usize,
    /// Skills whose triggers match the user input.
    pub matching_skill_count: usize,
    /// Skills that would be useful but are not registered.
    pub missing_skills: Vec<String>,

    // ── From runtime state ──
    /// Tokens remaining after system prompt.
    pub remaining_budget_tokens: usize,
    /// Whether any LLM provider circuit breaker is open.
    pub provider_breaker_open: bool,
    /// "standard" or "streaming".
    pub inference_mode: String,

    // ── From decomposition gate (scored input, NOT authority) ──
    pub decomposition_proposal: Option<DecompositionProposal>,
    /// Whether the user explicitly requested specialist/delegation workflow.
    pub explicit_specialist_workflow: bool,
    /// Whether the user's message references an existing registered tool by name.
    /// When true, the named capability already exists in the tool registry and will
    /// be available during inference — the planner should NOT propose specialist
    /// creation for it.
    pub named_tool_match: bool,

    // ── Behavioral history (from recent turn memory) ──
    /// Structural fingerprints of the last N assistant responses.
    /// Each string is a compact skeleton like "narrative+question+options".
    pub recent_response_skeletons: Vec<String>,
    /// Word counts of the last N user messages (engagement signal).
    pub recent_user_message_lengths: Vec<usize>,
    /// Notable phrases (8+ words) extracted from recent assistant turns.
    /// Used to detect self-echoing — the agent reusing its own prior phrasing.
    pub self_echo_fragments: Vec<String>,
    /// Whether the user's input contains a declared physical action
    /// (verb + target pattern, e.g. "I attack the goblin").
    pub declared_action: Option<DeclaredAction>,
    /// Whether the previous assistant turn contained protocol normalization
    /// issues (malformed tool calls, narrated next steps, unexecuted streaming
    /// markers). When true, the planner may select NormalizationRetry.
    pub previous_turn_had_protocol_issues: bool,
    /// Number of consecutive turns with protocol normalization issues.
    /// Zero when `previous_turn_had_protocol_issues` is false; allows the
    /// planner to escalate retry strategy for persistent normalization failures.
    pub normalization_retry_streak: u8,
}

/// A user-declared action that must be resolved, not redirected.
#[derive(Debug, Clone, Serialize)]
pub struct DeclaredAction {
    /// The action verb ("attack", "stab", "grab", "throw", etc.).
    pub verb: String,
    /// The target of the action ("the goblin", "the door", "the gem").
    pub target: String,
}

/// Decomposition gate output, used as one scored signal by the planner.
#[derive(Debug, Clone, Serialize)]
pub struct DecompositionProposal {
    pub should_delegate: bool,
    pub rationale: String,
    pub utility_margin: f64,
}

// ── Operating State: synthesized assessment ──────────────────────────

/// Classification of the turn as task-oriented or conversational.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum TaskClassification {
    /// Casual exchange — memory is ambient enrichment.
    Conversation,
    /// Goal-directed work — memory is operational input.
    Task,
}

/// Memory retrieval confidence assessment.
#[derive(Debug, Clone, Serialize)]
pub struct MemoryConfidence {
    /// Average cosine similarity across retrieved memories.
    pub avg_similarity: f64,
    /// Fraction of context budget consumed by memory tokens.
    pub budget_utilization: f64,
    /// Total memories retrieved.
    pub retrieval_count: usize,
    /// Whether any tier returned zero results despite budget availability.
    pub recall_gap: bool,
    /// Names of tiers that returned no results.
    pub empty_tiers: Vec<String>,
}

/// Runtime and storage constraints affecting action selection.
#[derive(Debug, Clone, Serialize)]
pub struct RuntimeConstraints {
    /// Tokens remaining after system prompt.
    pub remaining_budget_tokens: usize,
    /// Whether the token budget is under pressure (>80% used by system prompt + memory).
    pub budget_pressured: bool,
    /// Whether any LLM provider circuit breaker is open.
    pub provider_breaker_open: bool,
    /// Current inference mode.
    pub inference_mode: String,
}

/// Tool availability and relevance assessment.
#[derive(Debug, Clone, Serialize)]
pub struct ToolFit {
    /// Total tools available after search/pruning.
    pub available_count: usize,
    /// Tools with high relevance score (>0.7 cosine).
    pub high_relevance_count: usize,
    /// Tokens saved by pruning low-relevance tools.
    pub token_savings: usize,
    /// Whether MCP-sourced tools are available.
    pub mcp_available: bool,
}

/// Subagent roster fit assessment.
#[derive(Debug, Clone, Serialize)]
pub struct RosterFit {
    /// Total enabled, non-proxy agents.
    pub taskable_count: usize,
    /// Agents with capability overlap for this task.
    pub fit_count: usize,
    /// Names of fitting agents.
    pub fit_names: Vec<String>,
    /// Whether the user explicitly requested specialist/delegation workflow.
    pub explicit_workflow: bool,
}

/// Skill fit assessment.
#[derive(Debug, Clone, Serialize)]
pub struct SkillFit {
    /// Total enabled skills.
    pub enabled_count: usize,
    /// Skills whose triggers match the user input.
    pub matching_count: usize,
    /// Skills that would be useful but are not registered.
    pub missing_skills: Vec<String>,
}

/// Behavioral self-assessment based on recent output history.
#[derive(Debug, Clone, Serialize)]
pub struct BehavioralHistory {
    /// Whether the agent has been producing structurally identical responses.
    pub structural_repetition: bool,
    /// Number of consecutive responses with the same skeleton.
    pub repetition_streak: usize,
    /// The repeated skeleton pattern (if any).
    pub repeated_pattern: Option<String>,
    /// Whether user engagement is declining (messages getting shorter/more directive).
    pub engagement_declining: bool,
    /// Proportion of recent assistant fragments present in the current user message
    /// (0.0 = none, 1.0 = all fragments matched). A value above 0.3 suggests the
    /// user is echoing back the agent's own language — possible confusion signal.
    pub self_echo_risk: f32,
    /// The matched echo fragment, if any.
    pub echo_fragment: Option<String>,
    /// A context hint to inject when structural repetition or echo is detected.
    pub variation_hint: Option<String>,
}

/// Assessment of whether the user declared a specific action.
#[derive(Debug, Clone, Serialize)]
pub struct DeclaredActionState {
    /// Whether the user declared a physical/concrete action.
    pub action_declared: bool,
    /// The declared action details (if any).
    pub action: Option<DeclaredAction>,
    /// Whether the action has significant consequences that should be surfaced.
    pub high_consequence: bool,
}

/// Full introspection-driven operating state for a turn.
///
/// Built by [`synthesize`] from a [`TaskStateInput`]. This struct captures
/// the agent's assessment of its own readiness — memory quality, tool
/// availability, roster fit, runtime constraints, and behavioral history.
#[derive(Debug, Clone, Serialize)]
pub struct TaskOperatingState {
    pub classification: TaskClassification,
    pub memory_confidence: MemoryConfidence,
    pub runtime_constraints: RuntimeConstraints,
    pub tool_fit: ToolFit,
    pub roster_fit: RosterFit,
    pub skill_fit: SkillFit,
    pub behavioral_history: BehavioralHistory,
    pub declared_action: DeclaredActionState,
}

// ── Synthesis: TaskStateInput -> TaskOperatingState ──────────────────

/// Task-like intent names. These intents signal goal-directed work
/// rather than casual conversation.
const TASK_INTENTS: &[&str] = &[
    "execution",
    "delegation",
    "cron",
    "filedistribution",
    "folderscan",
    "randomtooluse",
    "currentevents",
    "walletaddressscan",
    "imagecountscan",
    "markdowncountscan",
    "obsidianinsights",
    "emailtriage",
];

/// Synthesize a [`TaskOperatingState`] from raw subsystem outputs.
///
/// This is the **single source of truth** for operating state derivation.
/// The rules live here in `roboticus-agent`, not in `roboticus-api`.
pub fn synthesize(input: &TaskStateInput) -> TaskOperatingState {
    let classification = classify_turn(input);
    let memory_confidence = assess_memory(input);
    let runtime_constraints = assess_runtime(input);
    let tool_fit = assess_tools(input);
    let roster_fit = assess_roster(input);
    let skill_fit = assess_skills(input);
    let behavioral_history = assess_behavioral_history(input);
    let declared_action = assess_declared_action(input);

    TaskOperatingState {
        classification,
        memory_confidence,
        runtime_constraints,
        tool_fit,
        roster_fit,
        skill_fit,
        behavioral_history,
        declared_action,
    }
}

fn classify_turn(input: &TaskStateInput) -> TaskClassification {
    if input.explicit_specialist_workflow {
        return TaskClassification::Task;
    }
    let intents_lower: Vec<String> = input
        .intents
        .iter()
        .map(|i| i.to_ascii_lowercase())
        .collect();
    if TASK_INTENTS
        .iter()
        .any(|t| intents_lower.iter().any(|i| i == t))
    {
        TaskClassification::Task
    } else {
        TaskClassification::Conversation
    }
}

fn assess_memory(input: &TaskStateInput) -> MemoryConfidence {
    match &input.retrieval_metrics {
        Some(metrics) => {
            let mut empty_tiers = Vec::new();
            if metrics.tiers.working == 0 {
                empty_tiers.push("working".into());
            }
            if metrics.tiers.episodic == 0 {
                empty_tiers.push("episodic".into());
            }
            if metrics.tiers.semantic == 0 {
                empty_tiers.push("semantic".into());
            }
            if metrics.tiers.procedural == 0 {
                empty_tiers.push("procedural".into());
            }
            if metrics.tiers.relationship == 0 {
                empty_tiers.push("relationship".into());
            }
            let recall_gap = !empty_tiers.is_empty() && metrics.budget_utilization < 0.8;
            MemoryConfidence {
                avg_similarity: metrics.avg_similarity,
                budget_utilization: metrics.budget_utilization,
                retrieval_count: metrics.retrieval_count,
                recall_gap,
                empty_tiers,
            }
        }
        None => MemoryConfidence {
            avg_similarity: 0.0,
            budget_utilization: 0.0,
            retrieval_count: 0,
            recall_gap: false,
            empty_tiers: Vec::new(),
        },
    }
}

fn assess_runtime(input: &TaskStateInput) -> RuntimeConstraints {
    RuntimeConstraints {
        remaining_budget_tokens: input.remaining_budget_tokens,
        budget_pressured: input.remaining_budget_tokens < 2000,
        provider_breaker_open: input.provider_breaker_open,
        inference_mode: input.inference_mode.clone(),
    }
}

fn assess_tools(input: &TaskStateInput) -> ToolFit {
    match &input.tool_search_stats {
        Some(stats) => ToolFit {
            available_count: stats.candidates_selected,
            high_relevance_count: stats
                .candidates_selected
                .min(stats.candidates_considered / 3),
            token_savings: stats.token_savings,
            mcp_available: input.mcp_tools_available,
        },
        None => ToolFit {
            available_count: 0,
            high_relevance_count: 0,
            token_savings: 0,
            mcp_available: input.mcp_tools_available,
        },
    }
}

fn assess_roster(input: &TaskStateInput) -> RosterFit {
    RosterFit {
        taskable_count: input.taskable_agent_count,
        fit_count: input.fit_agent_count,
        fit_names: input.fit_agent_names.clone(),
        explicit_workflow: input.explicit_specialist_workflow,
    }
}

fn assess_skills(input: &TaskStateInput) -> SkillFit {
    SkillFit {
        enabled_count: input.enabled_skill_count,
        matching_count: input.matching_skill_count,
        missing_skills: input.missing_skills.clone(),
    }
}

fn assess_behavioral_history(input: &TaskStateInput) -> BehavioralHistory {
    let skeletons = &input.recent_response_skeletons;

    // Detect structural repetition: 3+ consecutive identical skeletons
    let (repetition_streak, repeated_pattern) = if skeletons.len() >= 3 {
        let last = &skeletons[skeletons.len() - 1];
        let streak = skeletons.iter().rev().take_while(|s| s == &last).count();
        if streak >= 3 {
            (streak, Some(last.clone()))
        } else {
            (1, None)
        }
    } else {
        (0, None)
    };
    let structural_repetition = repetition_streak >= 3;

    // Detect declining engagement: user messages getting shorter over 3+ turns
    let engagement_declining = if input.recent_user_message_lengths.len() >= 3 {
        let lens = &input.recent_user_message_lengths;
        let recent = &lens[lens.len().saturating_sub(3)..];
        // Declining if each message is shorter than the previous
        recent.windows(2).all(|w| w[1] < w[0])
            // And the most recent is short (under 30 words)
            && *recent.last().unwrap_or(&100) < 30
    } else {
        false
    };

    // Detect self-echo: compute proportion of fragments present in current user message.
    // A non-zero proportion suggests the user is mirroring the agent's own language.
    let user_lower = input.user_content.to_ascii_lowercase();
    let (self_echo_risk, echo_fragment) = if input.self_echo_fragments.is_empty() {
        (0.0_f32, None)
    } else {
        let total = input.self_echo_fragments.len();
        let mut first_match: Option<String> = None;
        let matched = input
            .self_echo_fragments
            .iter()
            .filter(|frag| {
                let found = user_lower.contains(frag.to_ascii_lowercase().as_str());
                if found && first_match.is_none() {
                    first_match = Some((*frag).clone());
                }
                found
            })
            .count();
        let proportion = matched as f32 / total as f32;
        (proportion, first_match)
    };

    let variation_hint = if structural_repetition {
        Some(format!(
            "Your last {} responses followed the same structure ({}). \
             Vary your response format — try a different approach.",
            repetition_streak,
            repeated_pattern.as_deref().unwrap_or("unknown")
        ))
    } else if self_echo_risk > 0.0 {
        Some(format!(
            "The user's message contains a phrase you recently used: \"{}\". \
             Avoid repeating your own prior phrasing — use fresh language.",
            echo_fragment.as_deref().unwrap_or("")
        ))
    } else if engagement_declining {
        Some(
            "The user's messages are getting shorter and more directive. \
             This may indicate your responses aren't meeting their needs. \
             Consider changing your approach."
                .into(),
        )
    } else {
        None
    };

    BehavioralHistory {
        structural_repetition,
        repetition_streak,
        repeated_pattern,
        engagement_declining,
        self_echo_risk,
        echo_fragment,
        variation_hint,
    }
}

/// Canonical list of physical/mechanical action verbs indicating the user is
/// declaring an action that must be resolved, not a conversational request.
/// Shared between `task_state::assess_declared_action` and the
/// `DeclaredActionGuard` in `guard_registry.rs`.
pub const ACTION_VERBS: &[&str] = &[
    // Combat
    "attack", "stab", "slash", "hit", "strike", "punch", "kick", "shoot", "throw",
    // Object manipulation
    "grab", "pull", "push", "break", "lock", "unlock", // Movement
    "climb", "jump", "swim", "fly", // Equipment / casting
    "cast", "equip", "draw", "sheathe", // Social (declarative, not conversational)
    "shout", "whisper", // Stealth / theft
    "hide", "sneak", "steal", // Defensive / evasive
    "dodge", "block", "flee", "charge", // Physical force
    "tackle", "wrestle", "drag", "cut", "smash", "crush", "burn", "freeze", "lift", "shove",
];

fn assess_declared_action(input: &TaskStateInput) -> DeclaredActionState {
    let content_lower = input.user_content.to_ascii_lowercase();
    let words: Vec<&str> = content_lower.split_whitespace().collect();

    // Look for "I [verb]" or "[verb] the/my/a" patterns
    let mut found_verb = None;
    let mut found_target = None;

    for (i, word) in words.iter().enumerate() {
        let clean = word.trim_matches(|c: char| !c.is_alphabetic());
        if ACTION_VERBS.contains(&clean) {
            found_verb = Some(clean.to_string());

            // Try to extract target: words after the verb until end or punctuation
            let target_words: Vec<&str> = words[i + 1..]
                .iter()
                .take(6)
                .take_while(|w| !w.ends_with('.') && !w.ends_with('!') && !w.ends_with('?'))
                .copied()
                .collect();
            if !target_words.is_empty() {
                found_target = Some(target_words.join(" "));
            }
            break;
        }
    }

    match (found_verb, found_target) {
        (Some(verb), Some(target)) => DeclaredActionState {
            action_declared: true,
            action: Some(DeclaredAction {
                verb: verb.clone(),
                target: target.clone(),
            }),
            // Simple heuristic: combat verbs against characters are high-consequence
            high_consequence: [
                "attack", "stab", "slash", "strike", "punch", "kick", "shoot", "kill", "cast",
                "burn", "crush", "smash",
            ]
            .contains(&verb.as_str()),
        },
        _ => DeclaredActionState {
            action_declared: false,
            action: None,
            high_consequence: false,
        },
    }
}

// ── Output history helpers (called by roboticus-api to build TaskStateInput) ──

/// Derive a compact structural skeleton label from an assistant response.
///
/// This is a best-effort heuristic fingerprint — the goal is to detect when
/// the same *pattern* keeps appearing (e.g., always ending with a question,
/// always using an options list), not to perfectly classify prose.
pub fn response_skeleton(text: &str) -> String {
    let has_options = text.contains("1.") || text.contains("- ") || text.contains("* ");
    let has_question = text.trim_end().ends_with('?');
    let has_dialogue = text.contains('"') || text.contains('\u{201c}') || text.contains('\u{201d}');
    let has_pause = text.contains("...") || text.contains('\u{2026}');
    let has_list = text
        .lines()
        .filter(|l| l.trim_start().starts_with('-') || l.trim_start().starts_with('*'))
        .count()
        >= 3;

    let mut parts: Vec<&str> = Vec::new();
    if has_dialogue {
        parts.push("dialogue");
    } else {
        parts.push("narrative");
    }
    if has_list {
        parts.push("list");
    } else if has_options {
        parts.push("options");
    }
    if has_question {
        parts.push("question");
    }
    if has_pause {
        parts.push("pause");
    }
    if parts.is_empty() {
        "prose".into()
    } else {
        parts.join("+")
    }
}

/// Extract notable phrases (8+ words) from assistant response text.
///
/// These are used as echo-detection fragments so the agent can avoid
/// reusing its own prior phrasing verbatim in subsequent turns.
pub fn extract_echo_fragments(text: &str) -> Vec<String> {
    // Split into sentences, then yield any sentence window of 8–15 words
    text.split(['.', '!', '?', '\n'])
        .filter_map(|sentence| {
            let words: Vec<&str> = sentence.split_whitespace().collect();
            if words.len() >= 8 {
                // Take a 8-word window from the middle to avoid trivial openers
                let start = words.len() / 4;
                let end = (start + 12).min(words.len());
                Some(words[start..end].join(" ").to_ascii_lowercase())
            } else {
                None
            }
        })
        .take(5) // Cap fragments per response to limit prompt bloat
        .collect()
}

// ── Tests ────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    fn base_input() -> TaskStateInput {
        TaskStateInput {
            user_content: "test message".into(),
            intents: vec![],
            authority: "SelfGenerated".into(),
            retrieval_metrics: None,
            tool_search_stats: None,
            mcp_tools_available: false,
            taskable_agent_count: 0,
            fit_agent_count: 0,
            fit_agent_names: vec![],
            enabled_skill_count: 0,
            matching_skill_count: 0,
            missing_skills: vec![],
            remaining_budget_tokens: 8000,
            provider_breaker_open: false,
            inference_mode: "standard".into(),
            decomposition_proposal: None,
            explicit_specialist_workflow: false,
            named_tool_match: false,
            recent_response_skeletons: vec![],
            recent_user_message_lengths: vec![],
            self_echo_fragments: vec![],
            declared_action: None,
            previous_turn_had_protocol_issues: false,
            normalization_retry_streak: 0,
        }
    }

    #[test]
    fn conversation_classification_with_no_task_intents() {
        let input = base_input();
        let state = synthesize(&input);
        assert_eq!(state.classification, TaskClassification::Conversation);
    }

    #[test]
    fn structural_repetition_detected_after_3_identical_skeletons() {
        let mut input = base_input();
        input.recent_response_skeletons = vec![
            "narrative+question+options".into(),
            "narrative+question+options".into(),
            "narrative+question+options".into(),
        ];
        let state = synthesize(&input);
        assert!(state.behavioral_history.structural_repetition);
        assert_eq!(state.behavioral_history.repetition_streak, 3);
        assert!(state.behavioral_history.variation_hint.is_some());
    }

    #[test]
    fn no_repetition_with_varied_skeletons() {
        let mut input = base_input();
        input.recent_response_skeletons = vec![
            "narrative+question+options".into(),
            "dialogue+pause".into(),
            "narrative+question+options".into(),
        ];
        let state = synthesize(&input);
        assert!(!state.behavioral_history.structural_repetition);
    }

    #[test]
    fn engagement_declining_detected() {
        let mut input = base_input();
        input.recent_user_message_lengths = vec![50, 30, 10];
        let state = synthesize(&input);
        assert!(state.behavioral_history.engagement_declining);
    }

    #[test]
    fn self_echo_risk_detected_when_user_repeats_agent_phrase() {
        let mut input = base_input();
        input.user_content = "You said the ancient stone door is the only way forward".into();
        input.self_echo_fragments = vec!["the ancient stone door is the only way forward".into()];
        let state = synthesize(&input);
        assert!(state.behavioral_history.self_echo_risk > 0.0);
        assert!(state.behavioral_history.echo_fragment.is_some());
        assert!(state.behavioral_history.variation_hint.is_some());
    }

    #[test]
    fn self_echo_risk_not_detected_without_match() {
        let mut input = base_input();
        input.user_content = "What happens next?".into();
        input.self_echo_fragments = vec!["the ancient stone door is the only way forward".into()];
        let state = synthesize(&input);
        assert_eq!(state.behavioral_history.self_echo_risk, 0.0);
        assert!(state.behavioral_history.echo_fragment.is_none());
    }

    #[test]
    fn structural_repetition_takes_priority_over_echo_in_variation_hint() {
        let mut input = base_input();
        input.recent_response_skeletons = vec![
            "narrative+question+options".into(),
            "narrative+question+options".into(),
            "narrative+question+options".into(),
        ];
        input.user_content = "the ancient stone door is the only way forward right?".into();
        input.self_echo_fragments = vec!["the ancient stone door is the only way forward".into()];
        let state = synthesize(&input);
        // Both conditions true, but structural repetition hint wins
        assert!(state.behavioral_history.structural_repetition);
        assert!(state.behavioral_history.self_echo_risk > 0.0);
        let hint = state.behavioral_history.variation_hint.unwrap();
        assert!(hint.contains("structure"));
    }

    #[test]
    fn declared_action_detected_for_combat_verb() {
        let mut input = base_input();
        input.user_content = "I attack the goblin with my sword".into();
        let state = synthesize(&input);
        assert!(state.declared_action.action_declared);
        assert_eq!(
            state.declared_action.action.as_ref().unwrap().verb,
            "attack"
        );
        assert!(state.declared_action.high_consequence);
    }

    #[test]
    fn declared_action_not_detected_for_question() {
        let mut input = base_input();
        input.user_content = "What can I see in the room?".into();
        let state = synthesize(&input);
        assert!(!state.declared_action.action_declared);
    }

    #[test]
    fn task_classification_with_execution_intent() {
        let mut input = base_input();
        input.intents = vec!["Execution".into()];
        let state = synthesize(&input);
        assert_eq!(state.classification, TaskClassification::Task);
    }

    #[test]
    fn task_classification_with_explicit_workflow() {
        let mut input = base_input();
        input.explicit_specialist_workflow = true;
        let state = synthesize(&input);
        assert_eq!(state.classification, TaskClassification::Task);
    }

    #[test]
    fn memory_confidence_with_no_retrieval() {
        let input = base_input();
        let state = synthesize(&input);
        assert_eq!(state.memory_confidence.retrieval_count, 0);
        assert!(!state.memory_confidence.recall_gap);
    }

    #[test]
    fn memory_confidence_detects_recall_gap() {
        let mut input = base_input();
        input.retrieval_metrics = Some(RetrievalMetrics {
            retrieval_count: 2,
            retrieval_hit: true,
            avg_similarity: 0.4,
            budget_utilization: 0.3,
            tiers: crate::retrieval::MemoryTierBreakdown {
                working: 1,
                episodic: 1,
                semantic: 0,
                procedural: 0,
                relationship: 0,
            },
        });
        let state = synthesize(&input);
        assert!(state.memory_confidence.recall_gap);
        assert_eq!(state.memory_confidence.empty_tiers.len(), 3);
    }

    #[test]
    fn runtime_pressure_at_low_budget() {
        let mut input = base_input();
        input.remaining_budget_tokens = 1500;
        let state = synthesize(&input);
        assert!(state.runtime_constraints.budget_pressured);
    }

    #[test]
    fn runtime_no_pressure_at_normal_budget() {
        let input = base_input();
        let state = synthesize(&input);
        assert!(!state.runtime_constraints.budget_pressured);
    }

    #[test]
    fn tool_fit_from_search_stats() {
        let mut input = base_input();
        input.tool_search_stats = Some(ToolSearchStats {
            candidates_considered: 30,
            candidates_selected: 12,
            candidates_pruned: 18,
            token_savings: 4500,
            top_scores: vec![],
            embedding_status: "ok".into(),
        });
        input.mcp_tools_available = true;
        let state = synthesize(&input);
        assert_eq!(state.tool_fit.available_count, 12);
        assert_eq!(state.tool_fit.token_savings, 4500);
        assert!(state.tool_fit.mcp_available);
    }

    #[test]
    fn roster_fit_reflects_input() {
        let mut input = base_input();
        input.taskable_agent_count = 5;
        input.fit_agent_count = 2;
        input.fit_agent_names = vec!["research-specialist".into(), "code-specialist".into()];
        input.explicit_specialist_workflow = true;
        let state = synthesize(&input);
        assert_eq!(state.roster_fit.taskable_count, 5);
        assert_eq!(state.roster_fit.fit_count, 2);
        assert!(state.roster_fit.explicit_workflow);
    }

    #[test]
    fn skill_fit_tracks_missing() {
        let mut input = base_input();
        input.enabled_skill_count = 10;
        input.matching_skill_count = 3;
        input.missing_skills = vec!["dnd-rules".into(), "combat-tracker".into()];
        let state = synthesize(&input);
        assert_eq!(state.skill_fit.missing_skills.len(), 2);
    }

    // ── Skeleton extraction tests ─────────────────────────────────────

    #[test]
    fn response_skeleton_prose_only() {
        let text = "The adventurer walks down the long corridor.";
        assert_eq!(response_skeleton(text), "narrative");
    }

    #[test]
    fn response_skeleton_with_question() {
        let text = "What do you want to do next?";
        assert_eq!(response_skeleton(text), "narrative+question");
    }

    #[test]
    fn response_skeleton_dialogue_with_pause() {
        let text = "\"You shall not pass,\" the wizard said... stepping forward.";
        assert_eq!(response_skeleton(text), "dialogue+pause");
    }

    #[test]
    fn response_skeleton_narrative_list_question() {
        let text =
            "You see three paths:\n- North road\n- East gate\n- West forest\nWhich do you take?";
        assert_eq!(response_skeleton(text), "narrative+list+question");
    }

    #[test]
    fn response_skeleton_options_without_enough_bullets() {
        // Only 2 bullet lines — counts as "options", not "list"
        let text = "You can:\n- Fight the guard\n- Sneak past them";
        assert_eq!(response_skeleton(text), "narrative+options");
    }

    #[test]
    fn extract_echo_fragments_returns_empty_for_short_text() {
        let frags = extract_echo_fragments("Short text.");
        assert!(frags.is_empty());
    }

    #[test]
    fn extract_echo_fragments_yields_window_from_long_sentence() {
        let text = "The ancient stone door stood resolute at the end of the corridor blocking all passage forward.";
        let frags = extract_echo_fragments(text);
        assert!(!frags.is_empty());
        // Each fragment should be lowercase
        for frag in &frags {
            assert_eq!(frag.to_ascii_lowercase(), *frag);
        }
    }

    #[test]
    fn extract_echo_fragments_caps_at_five_per_call() {
        // Construct a response with many long sentences
        let sentence =
            "The warrior raised his sword and struck the enemy down with great force and fury";
        let text = (0..20).map(|_| sentence).collect::<Vec<_>>().join(". ");
        let frags = extract_echo_fragments(&text);
        assert!(frags.len() <= 5);
    }

    // ── Pattern detection tests ───────────────────────────────────────

    #[test]
    fn pattern_locked_false_when_fewer_than_3_skeletons() {
        let mut input = base_input();
        input.recent_response_skeletons =
            vec!["narrative+question".into(), "narrative+question".into()];
        let state = synthesize(&input);
        assert!(!state.behavioral_history.structural_repetition);
        assert_eq!(state.behavioral_history.repetition_streak, 0);
    }

    #[test]
    fn pattern_locked_true_with_4_identical_skeletons() {
        let mut input = base_input();
        input.recent_response_skeletons = vec![
            "dialogue+pause".into(),
            "dialogue+pause".into(),
            "dialogue+pause".into(),
            "dialogue+pause".into(),
        ];
        let state = synthesize(&input);
        assert!(state.behavioral_history.structural_repetition);
        assert_eq!(state.behavioral_history.repetition_streak, 4);
        assert_eq!(
            state.behavioral_history.repeated_pattern.as_deref(),
            Some("dialogue+pause")
        );
    }

    #[test]
    fn pattern_locked_false_when_last_two_differ() {
        let mut input = base_input();
        input.recent_response_skeletons = vec![
            "narrative+question".into(),
            "narrative+question".into(),
            "dialogue+pause".into(), // different final skeleton breaks the streak
        ];
        let state = synthesize(&input);
        assert!(!state.behavioral_history.structural_repetition);
    }

    #[test]
    fn self_echo_risk_proportion_computed_correctly() {
        let mut input = base_input();
        // 2 fragments, user message contains exactly 1 of them
        input.self_echo_fragments = vec![
            "the ancient stone door is the only way forward".into(),
            "a completely different phrase that is not in message".into(),
        ];
        input.user_content =
            "You said the ancient stone door is the only way forward right?".into();
        let state = synthesize(&input);
        // 1 of 2 fragments matched → 0.5
        assert!((state.behavioral_history.self_echo_risk - 0.5).abs() < 0.001);
    }

    #[test]
    fn self_echo_risk_zero_when_no_fragments() {
        let mut input = base_input();
        input.self_echo_fragments = vec![];
        input.user_content = "What should I do?".into();
        let state = synthesize(&input);
        assert_eq!(state.behavioral_history.self_echo_risk, 0.0);
    }

    #[test]
    fn user_engagement_declining_false_when_not_monotonic() {
        let mut input = base_input();
        // Not monotonically decreasing (30 > 20, then 25 > 20)
        input.recent_user_message_lengths = vec![50, 30, 40, 20];
        let state = synthesize(&input);
        assert!(!state.behavioral_history.engagement_declining);
    }
}