roboticus-agent 0.11.4

//! Deterministic action planner for task-oriented turns.
//!
//! Maps a [`TaskOperatingState`] + [`TaskStateInput`] to a ranked list of
//! [`ActionCandidate`]s and selects the best one. No LLM call — this is
//! pure heuristic scoring.
//!
//! ## Invariants
//!
//! - All task-oriented next-action selection flows through [`plan`].
//! - Direct execution, memory inspection, and blocker surfacing are
//!   equally first-class — not afterthoughts to delegation.
//! - The decomposition gate is a scored input, not an authority.

use serde::Serialize;

use crate::task_state::{TaskClassification, TaskOperatingState, TaskStateInput};

/// The set of valid next actions the planner can select.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum PlannedAction {
    /// Answer directly — conversational turn, no task routing.
    AnswerDirectly,
    /// Proceed with centralized (non-delegated) inference.
    ContinueCentralized,
    /// Inspect memory deeper — recall gap detected, deeper retrieval warranted.
    InspectMemory,
    /// Compose a new skill to fill a capability gap.
    ComposeSkill,
    /// Compose a new specialist subagent.
    ComposeSubagent,
    /// Delegate to an existing specialist with matching capabilities.
    DelegateToSpecialist,
    /// Cannot proceed — surface a real blocker to the user.
    ReturnBlocker,
    /// Malformed tool output detected — retry with preserved state.
    NormalizationRetry,
}

/// A candidate next action with its confidence score and rationale.
#[derive(Debug, Clone, Serialize)]
pub struct ActionCandidate {
    pub action: PlannedAction,
    /// Confidence score (0.0–1.0). Higher = more confident this is the right action.
    pub confidence: f64,
    /// Human-readable explanation of WHY this action was chosen.
    pub rationale: String,
}

/// The planner's output: ranked candidates and the selected action.
#[derive(Debug, Clone, Serialize)]
pub struct TaskExecutionPlan {
    /// All considered candidates, sorted by confidence descending.
    pub candidates: Vec<ActionCandidate>,
    /// The selected action (first candidate).
    pub selected: PlannedAction,
    /// Rationale for the selected action.
    pub selected_rationale: String,
}

/// Plan the next action for a turn based on the operating state.
///
/// This is the **single authority** on task-oriented next-action selection.
/// No shortcut, guard, or pipeline stage may independently choose
/// delegation, composition, or memory inspection for task turns.
///
/// The planner is deterministic — same inputs always produce the same output.
pub fn plan(state: &TaskOperatingState, input: &TaskStateInput) -> TaskExecutionPlan {
    let mut candidates = Vec::new();

    // ── Rule 1: Conversation => AnswerDirectly (short-circuit) ──
    if state.classification == TaskClassification::Conversation {
        candidates.push(ActionCandidate {
            action: PlannedAction::AnswerDirectly,
            confidence: 0.95,
            rationale: "Turn classified as conversation, not task".into(),
        });
        return finalize(candidates);
    }

    // ── Rule 2: Provider breaker open => ReturnBlocker ──
    if state.runtime_constraints.provider_breaker_open {
        candidates.push(ActionCandidate {
            action: PlannedAction::ReturnBlocker,
            confidence: 0.8,
            rationale: "Provider circuit breaker open; cannot proceed with inference".into(),
        });
    }

    // ── Rule 3: Explicit workflow + matching roster => Delegate ──
    // Only fires when the user EXPLICITLY requested delegation (semantic match at
    // 0.80 threshold). Conversational turns with incidental specialist fit do NOT
    // trigger auto-delegation — the agent responds directly and can delegate via
    // tool call during inference if it decides to.
    if state.roster_fit.explicit_workflow && state.roster_fit.fit_count > 0 {
        candidates.push(ActionCandidate {
            action: PlannedAction::DelegateToSpecialist,
            confidence: 0.9,
            rationale: format!(
                "Explicit delegation requested; {} specialist(s) fit: {}",
                state.roster_fit.fit_count,
                state.roster_fit.fit_names.join(", ")
            ),
        });
    }

    // ── Rule 3b: Explicit workflow + named plugin tool match => ContinueCentralized ──
    // When the user says "relay to Claude Code" (or any named plugin tool), and
    // that tool exists in the registry, the agent should use it during inference
    // rather than creating a new specialist subagent.
    // Confidence 0.88: beats ComposeSubagent (0.85) but defers to a fitting
    // specialist (0.9) when one exists — the specialist is the more specific match.
    if state.roster_fit.explicit_workflow
        && input.named_tool_match
        && state.roster_fit.fit_count == 0
    {
        candidates.push(ActionCandidate {
            action: PlannedAction::ContinueCentralized,
            confidence: 0.88,
            rationale: "Explicit delegation requested for a named plugin tool that exists in the tool registry; routing to centralized inference for tool-call dispatch".into(),
        });
    }

    // ── Rule 4: Explicit workflow + empty roster + creator => Compose ──
    // Only fires when no named tool match was found (Rule 3b above).
    if state.roster_fit.explicit_workflow
        && state.roster_fit.taskable_count == 0
        && !input.named_tool_match
        && is_creator_authority(&input.authority)
    {
        candidates.push(ActionCandidate {
            action: PlannedAction::ComposeSubagent,
            confidence: 0.85,
            rationale: "Explicit delegation requested but roster empty and no matching tool/plugin; composing specialist"
                .into(),
        });
    }

    // ── Rule 5: Decomposition gate recommends delegation + fit exists ──
    // Only fires when BOTH the gate recommends delegation AND the user's turn
    // is NOT a direct conversational address. If the user is talking TO the agent
    // (not requesting task dispatch), the agent should respond directly — delegation
    // happens during inference via tool calls, not pre-inference routing.
    if let Some(ref proposal) = input.decomposition_proposal
        && proposal.should_delegate
        && state.roster_fit.fit_count > 0
        && state.roster_fit.explicit_workflow
    {
        candidates.push(ActionCandidate {
            action: PlannedAction::DelegateToSpecialist,
            confidence: 0.75,
            rationale: format!(
                "Decomposition gate recommends delegation (utility margin {:.2}); {} specialist(s) fit",
                proposal.utility_margin, state.roster_fit.fit_count
            ),
        });
    }

    // ── Rule 6: Memory recall gap + low similarity => InspectMemory ──
    // Only for task turns — conversational turns get passive memory.
    if state.memory_confidence.recall_gap
        && state.memory_confidence.avg_similarity < 0.5
        && !state.runtime_constraints.budget_pressured
    {
        candidates.push(ActionCandidate {
            action: PlannedAction::InspectMemory,
            confidence: 0.7,
            rationale: format!(
                "Memory recall gap detected ({} empty tier(s), avg similarity {:.2}); deeper inspection warranted",
                state.memory_confidence.empty_tiers.len(),
                state.memory_confidence.avg_similarity
            ),
        });
    }

    // ── Rule 7: Missing skills + creator authority => ComposeSkill ──
    if !state.skill_fit.missing_skills.is_empty() && is_creator_authority(&input.authority) {
        candidates.push(ActionCandidate {
            action: PlannedAction::ComposeSkill,
            confidence: 0.65,
            rationale: format!(
                "Missing skills: {}",
                state.skill_fit.missing_skills.join(", ")
            ),
        });
    }

    // ── Rule 8: Previous turn had protocol issues => NormalizationRetry ──
    // When the LLM produced malformed tool protocol or narrated tool calls
    // instead of executing them, inject a correction instruction before
    // the next inference so the model doesn't repeat the pattern.
    // Confidence escalates with streak length — persistent failures are
    // a stronger signal that the model needs explicit correction.
    if input.previous_turn_had_protocol_issues {
        let streak_boost = (input.normalization_retry_streak as f64 * 0.02).min(0.1);
        candidates.push(ActionCandidate {
            action: PlannedAction::NormalizationRetry,
            confidence: 0.75 + streak_boost,
            rationale: format!(
                "Previous turn contained malformed tool protocol (streak: {}); \
                 injecting correction instruction",
                input.normalization_retry_streak
            ),
        });
    }

    // ── Rule 9: Structural repetition detected => inject variation hint ──
    // When the agent has produced structurally identical responses 3+ times in a
    // row, the planner annotates ContinueCentralized with a variation directive so
    // the inference layer knows to break the pattern. This does NOT change the
    // selected action — it enriches the rationale that is surfaced to the prompt.
    if state.behavioral_history.structural_repetition {
        let pattern = state
            .behavioral_history
            .repeated_pattern
            .as_deref()
            .unwrap_or("unknown");
        candidates.push(ActionCandidate {
            action: PlannedAction::ContinueCentralized,
            confidence: 0.55,
            rationale: format!(
                "Pattern-locked: {} consecutive responses with skeleton \"{}\". \
                 Vary response structure before proceeding.",
                state.behavioral_history.repetition_streak, pattern
            ),
        });
    }

    // ── Rule 10: User engagement declining => flag strategy change ──
    // When user message lengths are monotonically decreasing over 3+ turns and
    // the most recent message is short, the planner signals that the current
    // approach may not be meeting the user's needs.
    if state.behavioral_history.engagement_declining {
        candidates.push(ActionCandidate {
            action: PlannedAction::ContinueCentralized,
            confidence: 0.5,
            rationale: "User engagement declining: messages are getting shorter and more \
                        directive. Consider changing strategy or asking a focused question."
                .into(),
        });
    }

    // ── Fallback: ContinueCentralized ──
    if candidates.is_empty() || candidates.iter().all(|c| c.confidence < 0.5) {
        candidates.push(ActionCandidate {
            action: PlannedAction::ContinueCentralized,
            confidence: 0.6,
            rationale:
                "No strong delegation/composition signal; proceeding with centralized inference"
                    .into(),
        });
    }

    finalize(candidates)
}

/// Sort candidates by confidence descending and select the best one.
fn finalize(mut candidates: Vec<ActionCandidate>) -> TaskExecutionPlan {
    candidates.sort_by(|a, b| {
        b.confidence
            .partial_cmp(&a.confidence)
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    let selected = candidates
        .first()
        .map(|c| c.action)
        .unwrap_or(PlannedAction::ContinueCentralized);
    let selected_rationale = candidates
        .first()
        .map(|c| c.rationale.clone())
        .unwrap_or_else(|| "No candidates generated".into());
    TaskExecutionPlan {
        candidates,
        selected,
        selected_rationale,
    }
}

/// Check whether the authority level is sufficient for composition actions.
fn is_creator_authority(authority: &str) -> bool {
    let lower = authority.to_ascii_lowercase();
    lower.contains("creator")
        || lower.contains("selfgenerated")
        || lower.contains("self_generated")
        || lower.contains("admin")
}

// ── Tests ────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use crate::task_state::{MemoryConfidence, RosterFit, RuntimeConstraints, SkillFit, ToolFit};

    fn base_input() -> TaskStateInput {
        TaskStateInput {
            user_content: "do something".into(),
            intents: vec!["Execution".into()],
            authority: "Creator".into(),
            retrieval_metrics: None,
            tool_search_stats: None,
            mcp_tools_available: false,
            taskable_agent_count: 0,
            fit_agent_count: 0,
            fit_agent_names: vec![],
            enabled_skill_count: 5,
            matching_skill_count: 0,
            missing_skills: vec![],
            remaining_budget_tokens: 8000,
            provider_breaker_open: false,
            inference_mode: "standard".into(),
            decomposition_proposal: None,
            explicit_specialist_workflow: false,
            named_tool_match: false,
            recent_response_skeletons: vec![],
            recent_user_message_lengths: vec![],
            self_echo_fragments: vec![],
            declared_action: None,
            previous_turn_had_protocol_issues: false,
            normalization_retry_streak: 0,
        }
    }

    fn task_state(classification: TaskClassification) -> TaskOperatingState {
        TaskOperatingState {
            classification,
            memory_confidence: MemoryConfidence {
                avg_similarity: 0.7,
                budget_utilization: 0.5,
                retrieval_count: 5,
                recall_gap: false,
                empty_tiers: vec![],
            },
            runtime_constraints: RuntimeConstraints {
                remaining_budget_tokens: 8000,
                budget_pressured: false,
                provider_breaker_open: false,
                inference_mode: "standard".into(),
            },
            tool_fit: ToolFit {
                available_count: 10,
                high_relevance_count: 3,
                token_savings: 2000,
                mcp_available: false,
            },
            roster_fit: RosterFit {
                taskable_count: 0,
                fit_count: 0,
                fit_names: vec![],
                explicit_workflow: false,
            },
            skill_fit: SkillFit {
                enabled_count: 5,
                matching_count: 0,
                missing_skills: vec![],
            },
            behavioral_history: crate::task_state::BehavioralHistory {
                structural_repetition: false,
                repetition_streak: 0,
                repeated_pattern: None,
                engagement_declining: false,
                self_echo_risk: 0.0,
                echo_fragment: None,
                variation_hint: None,
            },
            declared_action: crate::task_state::DeclaredActionState {
                action_declared: false,
                action: None,
                high_consequence: false,
            },
        }
    }

    #[test]
    fn conversation_short_circuits_to_answer_directly() {
        let state = task_state(TaskClassification::Conversation);
        let input = base_input();
        let plan = plan(&state, &input);
        assert_eq!(plan.selected, PlannedAction::AnswerDirectly);
        assert_eq!(plan.candidates.len(), 1);
        assert!(plan.candidates[0].confidence >= 0.9);
    }

    #[test]
    fn provider_breaker_open_returns_blocker() {
        let mut state = task_state(TaskClassification::Task);
        state.runtime_constraints.provider_breaker_open = true;
        let input = base_input();
        let plan = plan(&state, &input);
        assert_eq!(plan.selected, PlannedAction::ReturnBlocker);
    }

    #[test]
    fn explicit_workflow_with_fit_delegates() {
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit.explicit_workflow = true;
        state.roster_fit.fit_count = 2;
        state.roster_fit.fit_names = vec!["research-specialist".into()];
        let mut input = base_input();
        input.explicit_specialist_workflow = true;
        let plan = plan(&state, &input);
        assert_eq!(plan.selected, PlannedAction::DelegateToSpecialist);
    }

    #[test]
    fn explicit_workflow_empty_roster_composes() {
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit.explicit_workflow = true;
        state.roster_fit.taskable_count = 0;
        let mut input = base_input();
        input.explicit_specialist_workflow = true;
        let plan = plan(&state, &input);
        assert_eq!(plan.selected, PlannedAction::ComposeSubagent);
    }

    #[test]
    fn memory_gap_triggers_inspect() {
        let mut state = task_state(TaskClassification::Task);
        state.memory_confidence.recall_gap = true;
        state.memory_confidence.avg_similarity = 0.3;
        state.memory_confidence.empty_tiers = vec!["semantic".into(), "procedural".into()];
        let input = base_input();
        let plan = plan(&state, &input);
        // InspectMemory should be a candidate
        assert!(
            plan.candidates
                .iter()
                .any(|c| c.action == PlannedAction::InspectMemory)
        );
    }

    #[test]
    fn missing_skills_triggers_compose_skill() {
        let mut state = task_state(TaskClassification::Task);
        state.skill_fit.missing_skills = vec!["dnd-rules".into()];
        let input = base_input();
        let plan = plan(&state, &input);
        assert!(
            plan.candidates
                .iter()
                .any(|c| c.action == PlannedAction::ComposeSkill)
        );
    }

    #[test]
    fn fallback_is_continue_centralized() {
        let state = task_state(TaskClassification::Task);
        let input = base_input();
        let plan = plan(&state, &input);
        assert_eq!(plan.selected, PlannedAction::ContinueCentralized);
    }

    #[test]
    fn non_creator_cannot_compose() {
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit.explicit_workflow = true;
        state.roster_fit.taskable_count = 0;
        let mut input = base_input();
        input.authority = "Peer".into();
        input.explicit_specialist_workflow = true;
        let plan = plan(&state, &input);
        // Should NOT propose ComposeSubagent for non-creator
        assert!(
            !plan
                .candidates
                .iter()
                .any(|c| c.action == PlannedAction::ComposeSubagent)
        );
    }

    #[test]
    fn candidates_sorted_by_confidence() {
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit.explicit_workflow = true;
        state.roster_fit.fit_count = 1;
        state.roster_fit.fit_names = vec!["specialist".into()];
        state.memory_confidence.recall_gap = true;
        state.memory_confidence.avg_similarity = 0.3;
        state.memory_confidence.empty_tiers = vec!["semantic".into()];
        let mut input = base_input();
        input.explicit_specialist_workflow = true;
        let plan = plan(&state, &input);
        // Verify candidates are sorted descending
        for w in plan.candidates.windows(2) {
            assert!(w[0].confidence >= w[1].confidence);
        }
    }

    #[test]
    fn decomposition_gate_as_scored_input() {
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit.fit_count = 1;
        state.roster_fit.fit_names = vec!["specialist".into()];
        state.roster_fit.explicit_workflow = true; // Rule 5 now requires explicit workflow
        let mut input = base_input();
        input.decomposition_proposal = Some(crate::task_state::DecompositionProposal {
            should_delegate: true,
            rationale: "task complexity warrants delegation".into(),
            utility_margin: 0.7,
        });
        let plan = plan(&state, &input);
        // Delegation should appear as a candidate from the gate signal
        assert!(
            plan.candidates
                .iter()
                .any(|c| c.action == PlannedAction::DelegateToSpecialist)
        );
    }

    #[test]
    fn pattern_locked_injects_variation_hint_into_candidates() {
        let mut state = task_state(TaskClassification::Task);
        state.behavioral_history.structural_repetition = true;
        state.behavioral_history.repetition_streak = 3;
        state.behavioral_history.repeated_pattern = Some("narrative+question+options".into());
        let input = base_input();
        let plan = plan(&state, &input);
        // ContinueCentralized should be present with a variation-directive rationale
        let variation_candidate = plan
            .candidates
            .iter()
            .find(|c| c.action == PlannedAction::ContinueCentralized);
        assert!(
            variation_candidate.is_some(),
            "expected ContinueCentralized candidate for pattern-locked state"
        );
        let rationale = &variation_candidate.unwrap().rationale;
        assert!(
            rationale.contains("Pattern-locked"),
            "rationale should contain 'Pattern-locked': {rationale}"
        );
        assert!(
            rationale.contains("narrative+question+options"),
            "rationale should name the repeated pattern: {rationale}"
        );
    }

    #[test]
    fn user_engagement_declining_injects_strategy_change_hint() {
        let mut state = task_state(TaskClassification::Task);
        state.behavioral_history.engagement_declining = true;
        let input = base_input();
        let plan = plan(&state, &input);
        // ContinueCentralized should be present with an engagement-declining rationale
        let engagement_candidate = plan
            .candidates
            .iter()
            .find(|c| c.action == PlannedAction::ContinueCentralized);
        assert!(
            engagement_candidate.is_some(),
            "expected ContinueCentralized candidate for engagement-declining state"
        );
        let rationale = &engagement_candidate.unwrap().rationale;
        assert!(
            rationale.contains("engagement declining"),
            "rationale should mention engagement: {rationale}"
        );
    }

    #[test]
    fn pattern_locked_does_not_override_higher_priority_actions() {
        // Even with pattern_locked, a provider breaker open should win (confidence 0.8 > 0.55)
        let mut state = task_state(TaskClassification::Task);
        state.behavioral_history.structural_repetition = true;
        state.behavioral_history.repetition_streak = 3;
        state.behavioral_history.repeated_pattern = Some("narrative+question".into());
        state.runtime_constraints.provider_breaker_open = true;
        let input = base_input();
        let plan = plan(&state, &input);
        assert_eq!(
            plan.selected,
            PlannedAction::ReturnBlocker,
            "ReturnBlocker (conf 0.8) must win over pattern-locked ContinueCentralized (conf 0.55)"
        );
    }

    // ── Named tool match: delegation routing ─────────────────────

    #[test]
    fn named_tool_match_prevents_compose_subagent() {
        // When the user says "relay to Claude Code" and the tool exists in
        // the registry, the planner should route to ContinueCentralized (so
        // the tool is invoked during inference) — NOT ComposeSubagent.
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit = RosterFit {
            taskable_count: 0,
            fit_count: 0,
            fit_names: vec![],
            explicit_workflow: true,
        };
        let mut input = base_input();
        input.user_content = "relay that question to the claude code instance".into();
        input.explicit_specialist_workflow = true;
        input.named_tool_match = true;

        let plan = plan(&state, &input);
        assert_eq!(
            plan.selected,
            PlannedAction::ContinueCentralized,
            "Named tool match must route to ContinueCentralized, not ComposeSubagent"
        );
    }

    #[test]
    fn explicit_delegation_without_tool_match_composes_specialist() {
        // When the user explicitly requests delegation but names NO existing
        // tool and the roster is empty, ComposeSubagent is correct.
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit = RosterFit {
            taskable_count: 0,
            fit_count: 0,
            fit_names: vec![],
            explicit_workflow: true,
        };
        let mut input = base_input();
        input.user_content = "compose a specialist for this analysis".into();
        input.explicit_specialist_workflow = true;
        input.named_tool_match = false;

        let plan = plan(&state, &input);
        assert_eq!(
            plan.selected,
            PlannedAction::ComposeSubagent,
            "Without tool match, explicit workflow + empty roster should compose specialist"
        );
    }

    #[test]
    fn named_tool_match_outranks_compose_subagent_confidence() {
        // Rule 3b (named plugin tool, conf 0.88) must beat Rule 4 (compose,
        // conf 0.85). Rule 4 is also gated by !named_tool_match so it won't
        // fire, but the confidence ordering is still verified.
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit = RosterFit {
            taskable_count: 0,
            fit_count: 0,
            fit_names: vec![],
            explicit_workflow: true,
        };
        let mut input = base_input();
        input.explicit_specialist_workflow = true;
        input.named_tool_match = true;

        let plan = plan(&state, &input);
        assert_eq!(
            plan.selected,
            PlannedAction::ContinueCentralized,
            "Named plugin tool match must win over ComposeSubagent"
        );
        let centralized = plan
            .candidates
            .iter()
            .find(|c| c.action == PlannedAction::ContinueCentralized)
            .expect("ContinueCentralized candidate must exist");
        assert!(
            (centralized.confidence - 0.88).abs() < 0.01,
            "ContinueCentralized confidence should be 0.88, got {}",
            centralized.confidence
        );
    }

    #[test]
    fn existing_specialist_fit_delegates_despite_tool_match() {
        // When both a named plugin tool AND a fit specialist exist, the
        // specialist wins (Rule 3 at 0.9) because it's the more specific
        // match. Rule 3b only fires when fit_count == 0.
        let mut state = task_state(TaskClassification::Task);
        state.roster_fit = RosterFit {
            taskable_count: 1,
            fit_count: 1,
            fit_names: vec!["code-analyst".into()],
            explicit_workflow: true,
        };
        let mut input = base_input();
        input.explicit_specialist_workflow = true;
        input.named_tool_match = true;

        let plan = plan(&state, &input);
        assert_eq!(
            plan.selected,
            PlannedAction::DelegateToSpecialist,
            "Fitting specialist (0.9) must win over named tool match when both exist"
        );
    }
}