bamboo-engine 2026.6.18

use std::collections::BTreeSet;
use std::path::PathBuf;
use std::sync::Arc;

use bamboo_agent_core::composition::CompositionExecutor;
use bamboo_agent_core::storage::AttachmentReader;
use bamboo_agent_core::storage::Storage;
use bamboo_agent_core::tools::ToolSchema;
use bamboo_agent_core::GoldConfidence;
use bamboo_compression::TokenBudget;
use bamboo_config::MemoryConfig;
use bamboo_config::PermissionMode;
use bamboo_domain::ReasoningEffort;
use bamboo_domain::RuntimeSessionPersistence;
use bamboo_llm::LLMProvider;
use bamboo_metrics::MetricsCollector;
use bamboo_skills::SkillManager;
use bamboo_tools::ToolRegistry;
use serde::{Deserialize, Serialize};

#[derive(Clone, Default)]
pub struct AuxiliaryModelConfig {
    pub fast_model_name: Option<String>,
    pub fast_model_provider: Option<Arc<dyn LLMProvider>>,
    pub background_model_name: Option<String>,
    pub planning_model_name: Option<String>,
    pub search_model_name: Option<String>,
    pub summarization_model_name: Option<String>,
    pub background_model_provider: Option<Arc<dyn LLMProvider>>,
    pub summarization_model_provider: Option<Arc<dyn LLMProvider>>,
}

fn default_gold_max_output_tokens() -> u32 {
    1024
}

fn default_gold_max_auto_continuations() -> u32 {
    3
}

fn default_gold_min_confidence() -> GoldConfidence {
    GoldConfidence::Medium
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct GoldConfig {
    /// Master switch for Gold observe-only evaluation.
    #[serde(default)]
    pub enabled: bool,
    /// Independent switch for Phase 2 low-risk auto-answer.
    ///
    /// Kept separate from `enabled` so Phase 1 observe-only users do not
    /// implicitly opt into automatic clarification responses.
    #[serde(default)]
    pub auto_answer_enabled: bool,
    /// Independent switch for Phase 3 server-side auto-continue.
    ///
    /// Kept separate from both `enabled` and `auto_answer_enabled` so users can
    /// opt into terminal auto-resume explicitly without enabling other Gold
    /// automation behaviors.
    #[serde(default)]
    pub auto_continue_enabled: bool,
    /// Optional dedicated model for Gold evaluation. Falls back to fast model,
    /// then the main chat model when absent.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub model_name: Option<String>,
    /// The user's goal for this session.
    ///
    /// Unlike `evaluation_prompt` (which only tunes the *judge*), the goal is
    /// surfaced to the *main* executing agent as a persistent system-prompt
    /// block so it actively works toward it. The Gold evaluator also measures
    /// progress against this text.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub goal: Option<String>,
    /// Optional custom prompt suffix appended to the built-in Gold evaluator
    /// prompt. This tunes the judge only; it does not set the goal.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub evaluation_prompt: Option<String>,
    /// Output token limit for the Gold evaluator call.
    #[serde(default = "default_gold_max_output_tokens")]
    pub max_output_tokens: u32,
    /// Maximum number of automatic Gold continuations allowed per session.
    #[serde(default = "default_gold_max_auto_continuations")]
    pub max_auto_continuations: u32,
    /// Minimum evaluator confidence required before Gold auto-continues or
    /// auto-answers. Defaults to `medium` so the loop fires on reasonably
    /// confident verdicts rather than only `high`.
    #[serde(default = "default_gold_min_confidence")]
    pub min_auto_continue_confidence: GoldConfidence,
}

impl Default for GoldConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            auto_answer_enabled: false,
            auto_continue_enabled: false,
            model_name: None,
            goal: None,
            evaluation_prompt: None,
            max_output_tokens: default_gold_max_output_tokens(),
            max_auto_continuations: default_gold_max_auto_continuations(),
            min_auto_continue_confidence: default_gold_min_confidence(),
        }
    }
}

impl GoldConfig {
    /// The session goal text, falling back to the legacy `evaluation_prompt`
    /// for sessions created before the dedicated `goal` field existed.
    ///
    /// Returns `None` when neither field holds non-empty text.
    pub fn effective_goal(&self) -> Option<&str> {
        self.goal
            .as_deref()
            .or(self.evaluation_prompt.as_deref())
            .map(str::trim)
            .filter(|value| !value.is_empty())
    }
}

fn default_guardian_max_reviews() -> u32 {
    2
}

/// Configuration for the guardian adversarial-review terminal gate.
///
/// Mirrors [`GoldConfig`]: a plain, serde-defaulting struct surfaced per run.
/// When `enabled` is false (the default) the guardian gate is inactive and the
/// terminal completion path is unchanged.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(default)]
pub struct GuardianConfig {
    /// Master switch for the guardian review gate.
    #[serde(default)]
    pub enabled: bool,
    /// Optional dedicated reviewer model. Falls back to the run's main model.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub model_name: Option<String>,
    /// Maximum guardian review passes per run (budget; mirrors
    /// [`GoldConfig::max_auto_continuations`]).
    #[serde(default = "default_guardian_max_reviews")]
    pub max_reviews: u32,
}

impl Default for GuardianConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            model_name: None,
            max_reviews: default_guardian_max_reviews(),
        }
    }
}

/// Late-bound spawner for the guardian reviewer child.
///
/// The runner cannot construct a child directly: the `SpawnScheduler` is built
/// *after* the `Agent` that drives the runner (a construction-order cycle), so
/// the terminal gate spawns the reviewer through this trait object, injected
/// per-request on [`AgentLoopConfig`] exactly like `auxiliary_model_resolver`.
/// The implementation lives in the server (it captures the already-built
/// scheduler + child-session adapter); the engine holds only the trait, keeping
/// the engine free of any dependency on server/AppState types.
#[async_trait::async_trait]
pub trait GuardianSpawner: Send + Sync {
    /// Create a read-only reviewer child for `parent_session_id`, seeded with
    /// `review_prompt`, enqueue it to run, and return its session id so the
    /// caller can register a wait on it.
    async fn spawn_guardian_review(
        &self,
        parent_session: &bamboo_agent_core::Session,
        review_prompt: String,
        model: String,
        disabled_tools: Option<BTreeSet<String>>,
    ) -> Result<String, String>;
}

/// A child sub-agent's request to have a gated tool approved by its parent.
///
/// A non-bypassed child cannot answer its own permission prompt (no human is
/// attached to a child session), so the request is delegated up to the parent.
#[derive(Debug, Clone)]
pub struct ChildApprovalRequest {
    pub child_session_id: String,
    pub parent_session_id: String,
    /// The gated tool call on the child to re-execute once approved.
    pub child_tool_call_id: String,
    pub tool_name: String,
    /// Permission type as a string (e.g. "WriteFile", "ExecuteCommand").
    pub permission_type: String,
    /// The concrete resource the permission applies to (path, command, …).
    pub resource: String,
    /// Human-facing approval question to surface on the parent.
    pub question: String,
    /// The raw `awaiting_permission_approval` payload the child's executor built,
    /// so the parent can reuse the existing grant-extraction path verbatim.
    pub approval_payload: serde_json::Value,
}

/// What the executor should do after delegating a child's approval upward.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChildApprovalOutcome {
    /// Registered on the parent; the child must SUSPEND and await the decision.
    Delegated,
    /// Parent policy auto-approved (bypass / existing grant); proceed to execute.
    AutoApproved,
    /// Parent policy auto-denied; the executor must deny the tool.
    AutoDenied,
}

/// Late-bound delegate that routes a child's approval request up to its parent.
///
/// Injected per-request on [`AgentLoopConfig`] exactly like [`GuardianSpawner`];
/// the trait lives in the engine, the implementation in the server (it owns the
/// parent session store + pending-question + notification machinery).
#[async_trait::async_trait]
pub trait ApprovalDelegate: Send + Sync {
    /// Register `request` on its parent (or auto-resolve by policy) and report
    /// what the child's executor should do next.
    async fn delegate_child_approval(
        &self,
        request: ChildApprovalRequest,
    ) -> Result<ChildApprovalOutcome, String>;
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImageFallbackMode {
    Placeholder,
    Error,
    Ocr,
    /// Use a vision-capable LLM to describe the image, then replace the image
    /// with the textual description so that text-only models can understand
    /// the content.
    Vision,
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ImageFallbackConfig {
    pub mode: ImageFallbackMode,
    /// Vision model name for `Vision` mode. Falls back to the session's main model
    /// when `None`.
    pub vision_model: Option<String>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct PromptMemoryFlags {
    pub project_prompt_injection: bool,
    pub relevant_recall: bool,
    pub relevant_recall_rerank: bool,
    pub project_first_dream: bool,
}

impl Default for PromptMemoryFlags {
    fn default() -> Self {
        Self {
            project_prompt_injection: true,
            relevant_recall: true,
            relevant_recall_rerank: false,
            project_first_dream: true,
        }
    }
}

impl From<&MemoryConfig> for PromptMemoryFlags {
    fn from(value: &MemoryConfig) -> Self {
        Self {
            project_prompt_injection: value.project_prompt_injection,
            relevant_recall: value.relevant_recall,
            relevant_recall_rerank: value.relevant_recall_rerank,
            project_first_dream: value.project_first_dream,
        }
    }
}

/// Configuration for the agent loop.
#[non_exhaustive]
pub struct AgentLoopConfig {
    pub(crate) max_rounds: usize,
    pub(crate) system_prompt: Option<String>,
    /// Skill IDs that are disabled globally for this execution.
    pub(crate) disabled_skill_ids: BTreeSet<String>,
    /// Optional explicit skill selection for this execution.
    /// When set, only these skill IDs are considered for skill context and allowlists.
    pub(crate) selected_skill_ids: Option<Vec<String>>,
    /// Optional active skill mode for this execution.
    ///
    /// When set, skill discovery prefers `skills-<mode>` directories over generic
    /// directories for the same skill id.
    pub(crate) selected_skill_mode: Option<String>,
    pub(crate) additional_tool_schemas: Vec<ToolSchema>,
    pub(crate) tool_registry: Arc<ToolRegistry>,
    pub(crate) composition_executor: Option<Arc<CompositionExecutor>>,
    pub(crate) skill_manager: Option<Arc<SkillManager>>,
    /// If true, skip appending the initial user message (already present in session).
    pub(crate) skip_initial_user_message: bool,
    /// Optional storage for persisting session changes
    pub(crate) storage: Option<Arc<dyn Storage>>,
    /// Optional runtime persistence for non-authoritative session saves.
    /// When set, engine save sites use this instead of `storage` for writes.
    pub(crate) persistence: Option<Arc<dyn RuntimeSessionPersistence>>,
    /// Optional attachment reader for resolving `bamboo-attachment://...` references
    /// into `data:` URLs for upstream providers. This must not mutate session storage.
    pub(crate) attachment_reader: Option<Arc<dyn AttachmentReader>>,
    /// Optional asynchronous metrics collector
    pub(crate) metrics_collector: Option<MetricsCollector>,
    /// Model name used for metrics attribution
    pub(crate) model_name: Option<String>,
    /// Fast/cheap model for lightweight tasks (task evaluation, search, etc.).
    ///
    /// Call sites may fall back to `model_name` when this is unset.
    pub(crate) fast_model_name: Option<String>,
    /// Optional provider override for lightweight fast-model LLM calls.
    pub(crate) fast_model_provider: Option<Arc<dyn LLMProvider>>,
    /// Fast/cheap model for memory/background tasks.
    ///
    /// This must not silently fall back to the main interaction model.
    pub(crate) background_model_name: Option<String>,

    /// Model for planning/coordination tasks (task decomposition, architecture).
    /// Falls back to `model_name` when unset.
    pub(crate) planning_model_name: Option<String>,
    /// Model for search/navigation tasks (grep, file listing, symbol resolution).
    /// Falls back to `fast_model_name` when unset.
    pub(crate) search_model_name: Option<String>,
    /// Custom instructions for conversation summarization, injected into the
    /// LLM summary prompt. Lets users control what the summary focuses on.
    ///
    /// Resolution order: session-level > config-level > built-in defaults.
    pub(crate) compression_instructions: Option<String>,
    /// Dedicated model for summarization. Falls back to `background_model_name`.
    pub(crate) summarization_model_name: Option<String>,
    /// Optional provider override for memory/background model LLM calls.
    ///
    /// When set, memory recall rerank and other memory/background tasks use this
    /// provider instead of the shared agent loop provider.
    pub(crate) background_model_provider: Option<Arc<dyn LLMProvider>>,
    /// Optional provider override for summarization / context compression calls.
    ///
    /// When set, conversation/task summarization uses this provider instead of
    /// the shared agent loop provider.
    pub(crate) summarization_model_provider: Option<Arc<dyn LLMProvider>>,
    /// Provider routing key used for provider-specific request behavior.
    ///
    /// In multi-instance mode this may be the instance id.
    pub(crate) provider_name: Option<String>,
    /// Underlying provider type (for example `openai`, `anthropic`, `copilot`).
    ///
    /// This is distinct from `provider_name` so provider-specific behavior can
    /// remain correct when routing keys are instance ids.
    pub(crate) provider_type: Option<String>,
    /// Optional request-time reasoning effort override.
    pub(crate) reasoning_effort: Option<ReasoningEffort>,
    /// Bamboo application data directory (typically `~/.bamboo`).
    ///
    /// Used by runtime features that persist auxiliary artifacts outside the
    /// session store, such as durable plan mode files under `~/.bamboo/plan`.
    pub(crate) app_data_dir: Option<PathBuf>,
    /// Tool names that should be excluded from schemas sent to the LLM.
    pub(crate) disabled_tools: BTreeSet<String>,
    /// Token budget for context management (optional, defaults to model's limits)
    pub(crate) token_budget: Option<TokenBudget>,
    /// Optional image fallback behavior applied to *LLM requests only* (never persisted).
    ///
    /// This is intended for text-only provider paths where image parts must be degraded
    /// (placeholder / OCR / error) without leaking into stored session history or UI.
    pub(crate) image_fallback: Option<ImageFallbackConfig>,
    /// Feature flags controlling prompt-time memory injection behavior.
    pub(crate) prompt_memory_flags: PromptMemoryFlags,
    /// Maximum tool calls allowed per round (default: 80).
    pub(crate) max_tool_calls_per_round: usize,
    /// Maximum consecutive failures per tool before circuit breaker (default: 3).
    pub(crate) max_consecutive_failures_per_tool: usize,
    /// Tool names that require strict argument validation.
    pub(crate) strict_argument_tool_names: Vec<String>,
    /// Per-tool execution timeout in seconds (default: 120).
    pub(crate) per_tool_timeout_secs: u64,
    /// Parallel batch execution timeout in seconds (default: 300).
    pub(crate) parallel_batch_timeout_secs: u64,
    /// Permission mode for this execution (default: None = use PermissionConfig's mode).
    pub(crate) permission_mode: Option<PermissionMode>,
    /// Optional Gold observe-only evaluator configuration.
    ///
    /// When `None` or `enabled == false`, Gold evaluation is disabled and the
    /// existing execute/respond/resume loop remains unchanged.
    pub(crate) gold_config: Option<GoldConfig>,
    /// Optional guardian adversarial-review gate configuration. When `None` or
    /// `enabled == false`, the guardian terminal gate is inactive.
    pub(crate) guardian_config: Option<GuardianConfig>,
    /// Late-bound spawner for the guardian reviewer child. `None` (the default)
    /// leaves the guardian gate inert even when `guardian_config.enabled` is set,
    /// since the runner cannot create a child without it. Wired by the server.
    pub(crate) guardian_spawner: Option<Arc<dyn GuardianSpawner>>,
    /// Late-bound delegate that routes a child's gated-tool approval request up
    /// to its parent (Phase 2). `None` (the default) leaves child gating on its
    /// legacy path. Wired by the server.
    pub(crate) approval_delegate: Option<Arc<dyn ApprovalDelegate>>,
    /// Enable dynamic per-round model routing based on task complexity.
    /// When true, the pipeline classifies complexity at each round end and
    /// stores the result in session metadata.
    pub(crate) features_dynamic_model_routing: bool,
    /// Optional per-round resolver for auxiliary model settings that should
    /// follow live global config rather than stay frozen for the whole run.
    ///
    /// The main chat model remains session/request scoped; this hook is only
    /// for fast/background/planning/search/summarization helpers.
    pub(crate) auxiliary_model_resolver:
        Option<Arc<dyn Fn() -> AuxiliaryModelConfig + Send + Sync>>,
    /// Server-level usage guidance contributed by the run's tool executor —
    /// chiefly the `instructions` connected MCP servers return from `initialize`.
    /// Captured once at config construction (from `ToolExecutor::tool_guidance`)
    /// and appended to the tool-guide section of the system prompt, so a server's
    /// own how-to-use notes appear only while that server is loaded for the run.
    pub(crate) mcp_tool_guidance: Option<String>,
}

impl Default for AgentLoopConfig {
    fn default() -> Self {
        Self {
            max_rounds: 200,
            system_prompt: None,
            disabled_skill_ids: BTreeSet::new(),
            selected_skill_ids: None,
            selected_skill_mode: None,
            additional_tool_schemas: Vec::new(),
            tool_registry: Arc::new(ToolRegistry::new()),
            composition_executor: None,
            skill_manager: None,
            skip_initial_user_message: false,
            storage: None,
            persistence: None,
            attachment_reader: None,
            metrics_collector: None,
            model_name: None,
            fast_model_name: None,
            fast_model_provider: None,
            background_model_name: None,
            planning_model_name: None,
            search_model_name: None,
            compression_instructions: None,
            summarization_model_name: None,
            background_model_provider: None,
            summarization_model_provider: None,
            provider_name: None,
            provider_type: None,
            reasoning_effort: None,
            app_data_dir: None,
            disabled_tools: BTreeSet::new(),
            token_budget: None,
            image_fallback: None,
            prompt_memory_flags: PromptMemoryFlags::default(),
            max_tool_calls_per_round: 80,
            max_consecutive_failures_per_tool: 3,
            strict_argument_tool_names: vec![
                "Write".into(),
                "Edit".into(),
                "NotebookEdit".into(),
                "apply_patch".into(),
                "Bash".into(),
                "Task".into(),
                "SubAgent".into(),
                "scheduler".into(),
                "sub_session_manager".into(),
                "session_note".into(),
                "memory_note".into(),
            ],
            per_tool_timeout_secs: 120,
            parallel_batch_timeout_secs: 300,
            permission_mode: None,
            gold_config: None,
            guardian_config: None,
            guardian_spawner: None,
            approval_delegate: None,
            features_dynamic_model_routing: false,
            auxiliary_model_resolver: None,
            mcp_tool_guidance: None,
        }
    }
}

impl AgentLoopConfig {
    /// The active session goal to surface to the main agent, or `None` when
    /// Gold is disabled or no goal is set. Falls back to the legacy
    /// `evaluation_prompt` for back-compat via [`GoldConfig::effective_goal`].
    pub fn active_goal(&self) -> Option<&str> {
        self.gold_config
            .as_ref()
            .filter(|cfg| cfg.enabled)
            .and_then(GoldConfig::effective_goal)
    }

    /// Whether the Codex-style autonomous goal loop is active for this run.
    ///
    /// This requires Gold to be enabled, a goal to be set, AND auto-continue to
    /// be on. Only then is the `update_goal` self-report tool surfaced to the
    /// model and the terminal double-check allowed to veto a premature stop.
    /// When Gold is enabled without auto-continue, the evaluator stays purely
    /// observational (legacy behavior).
    pub fn goal_loop_active(&self) -> bool {
        self.gold_config.as_ref().is_some_and(|cfg| {
            cfg.enabled && cfg.auto_continue_enabled && cfg.effective_goal().is_some()
        })
    }

    /// Whether the guardian review gate is active for this run: a spawner is
    /// wired (so the runner can actually create the reviewer child) AND the
    /// config is present and enabled.
    pub fn guardian_active(&self) -> bool {
        self.guardian_spawner.is_some()
            && self.guardian_config.as_ref().is_some_and(|cfg| cfg.enabled)
    }

    /// Maximum guardian review passes for this run (the budget). `0` when no
    /// guardian config is set.
    pub fn guardian_max_reviews(&self) -> u32 {
        self.guardian_config
            .as_ref()
            .map_or(0, |cfg| cfg.max_reviews)
    }

    /// The reviewer model override, if a guardian config sets one.
    pub fn guardian_model(&self) -> Option<&str> {
        self.guardian_config
            .as_ref()
            .and_then(|cfg| cfg.model_name.as_deref())
    }

    /// Whether child→parent approval delegation is wired for this run.
    pub fn delegation_active(&self) -> bool {
        self.approval_delegate.is_some()
    }
}

#[cfg(test)]
mod tests;