zeph-config 0.18.1

// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
// SPDX-License-Identifier: MIT OR Apache-2.0

use std::collections::HashMap;

use serde::{Deserialize, Serialize};

use crate::defaults::default_true;

pub use zeph_mcp::{McpTrustLevel, tool::ToolSecurityMeta};

fn default_slack_port() -> u16 {
    3000
}

fn default_slack_webhook_host() -> String {
    "127.0.0.1".into()
}

fn default_a2a_host() -> String {
    "0.0.0.0".into()
}

fn default_a2a_port() -> u16 {
    8080
}

fn default_a2a_rate_limit() -> u32 {
    60
}

fn default_a2a_max_body() -> usize {
    1_048_576
}

fn default_drain_timeout_ms() -> u64 {
    30_000
}

fn default_max_dynamic_servers() -> usize {
    10
}

fn default_mcp_timeout() -> u64 {
    30
}

fn default_oauth_callback_port() -> u16 {
    18766
}

fn default_oauth_client_name() -> String {
    "Zeph".into()
}

#[derive(Clone, Deserialize, Serialize)]
pub struct TelegramConfig {
    pub token: Option<String>,
    #[serde(default)]
    pub allowed_users: Vec<String>,
}

impl std::fmt::Debug for TelegramConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("TelegramConfig")
            .field("token", &self.token.as_ref().map(|_| "[REDACTED]"))
            .field("allowed_users", &self.allowed_users)
            .finish()
    }
}

#[derive(Clone, Deserialize, Serialize)]
pub struct DiscordConfig {
    pub token: Option<String>,
    pub application_id: Option<String>,
    #[serde(default)]
    pub allowed_user_ids: Vec<String>,
    #[serde(default)]
    pub allowed_role_ids: Vec<String>,
    #[serde(default)]
    pub allowed_channel_ids: Vec<String>,
}

impl std::fmt::Debug for DiscordConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("DiscordConfig")
            .field("token", &self.token.as_ref().map(|_| "[REDACTED]"))
            .field("application_id", &self.application_id)
            .field("allowed_user_ids", &self.allowed_user_ids)
            .field("allowed_role_ids", &self.allowed_role_ids)
            .field("allowed_channel_ids", &self.allowed_channel_ids)
            .finish()
    }
}

#[derive(Clone, Deserialize, Serialize)]
pub struct SlackConfig {
    pub bot_token: Option<String>,
    pub signing_secret: Option<String>,
    #[serde(default = "default_slack_webhook_host")]
    pub webhook_host: String,
    #[serde(default = "default_slack_port")]
    pub port: u16,
    #[serde(default)]
    pub allowed_user_ids: Vec<String>,
    #[serde(default)]
    pub allowed_channel_ids: Vec<String>,
}

impl std::fmt::Debug for SlackConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SlackConfig")
            .field("bot_token", &self.bot_token.as_ref().map(|_| "[REDACTED]"))
            .field(
                "signing_secret",
                &self.signing_secret.as_ref().map(|_| "[REDACTED]"), // lgtm[rust/cleartext-logging]
            )
            .field("webhook_host", &self.webhook_host)
            .field("port", &self.port)
            .field("allowed_user_ids", &self.allowed_user_ids)
            .field("allowed_channel_ids", &self.allowed_channel_ids)
            .finish()
    }
}

#[derive(Deserialize, Serialize)]
#[allow(clippy::struct_excessive_bools)] // config struct — boolean flags are idiomatic here
pub struct A2aServerConfig {
    #[serde(default)]
    pub enabled: bool,
    #[serde(default = "default_a2a_host")]
    pub host: String,
    #[serde(default = "default_a2a_port")]
    pub port: u16,
    #[serde(default)]
    pub public_url: String,
    #[serde(default)]
    pub auth_token: Option<String>,
    #[serde(default = "default_a2a_rate_limit")]
    pub rate_limit: u32,
    #[serde(default = "default_true")]
    pub require_tls: bool,
    #[serde(default = "default_true")]
    pub ssrf_protection: bool,
    #[serde(default = "default_a2a_max_body")]
    pub max_body_size: usize,
    #[serde(default = "default_drain_timeout_ms")]
    pub drain_timeout_ms: u64,
    /// When `true`, all requests are rejected with 401 if no `auth_token` is configured.
    /// Default `false` for backward compatibility — existing deployments without a token
    /// continue to operate. Set to `true` in production when authentication is mandatory.
    #[serde(default)]
    pub require_auth: bool,
}

impl std::fmt::Debug for A2aServerConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("A2aServerConfig")
            .field("enabled", &self.enabled)
            .field("host", &self.host)
            .field("port", &self.port)
            .field("public_url", &self.public_url)
            .field(
                "auth_token",
                &self.auth_token.as_ref().map(|_| "[REDACTED]"),
            )
            .field("rate_limit", &self.rate_limit)
            .field("require_tls", &self.require_tls)
            .field("ssrf_protection", &self.ssrf_protection)
            .field("max_body_size", &self.max_body_size)
            .field("drain_timeout_ms", &self.drain_timeout_ms)
            .field("require_auth", &self.require_auth)
            .finish()
    }
}

impl Default for A2aServerConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            host: default_a2a_host(),
            port: default_a2a_port(),
            public_url: String::new(),
            auth_token: None,
            rate_limit: default_a2a_rate_limit(),
            require_tls: true,
            ssrf_protection: true,
            max_body_size: default_a2a_max_body(),
            drain_timeout_ms: default_drain_timeout_ms(),
            require_auth: false,
        }
    }
}

/// Dynamic MCP tool context pruning configuration (#2204).
///
/// When enabled, an LLM call evaluates which MCP tools are relevant to the current task
/// before sending tool schemas to the main LLM, reducing context usage and improving
/// tool selection accuracy for servers with many tools.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(default)]
pub struct ToolPruningConfig {
    /// Enable dynamic tool pruning. Default: `false` (opt-in).
    pub enabled: bool,
    /// Maximum number of MCP tools to include after pruning.
    pub max_tools: usize,
    /// Provider name from `[[llm.providers]]` for the pruning LLM call.
    /// Should be a fast/cheap model. Empty string = use the default provider.
    pub pruning_provider: String,
    /// Minimum number of MCP tools below which pruning is skipped.
    pub min_tools_to_prune: usize,
    /// Tool names that are never pruned (always included in the result).
    pub always_include: Vec<String>,
}

impl Default for ToolPruningConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            max_tools: 15,
            pruning_provider: String::new(),
            min_tools_to_prune: 10,
            always_include: Vec::new(),
        }
    }
}

/// MCP tool discovery strategy (config-side representation).
///
/// Converted to `zeph_mcp::ToolDiscoveryStrategy` in `zeph-core` to avoid a
/// circular crate dependency (`zeph-config` → `zeph-mcp`).
#[derive(Debug, Clone, Copy, Default, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum ToolDiscoveryStrategyConfig {
    /// Embedding-based cosine similarity retrieval.  Fast, no LLM call per turn.
    Embedding,
    /// LLM-based pruning via `prune_tools_cached`.  Existing behavior.
    Llm,
    /// No filtering — all tools are passed through.  This is the default.
    #[default]
    None,
}

/// MCP tool discovery configuration (#2321).
///
/// Nested under `[mcp.tool_discovery]`.  When `strategy = "embedding"`, the
/// `mcp.pruning` section is ignored for this session — the embedding path
/// supersedes LLM pruning entirely.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(default)]
pub struct ToolDiscoveryConfig {
    /// Discovery strategy.  Default: `none` (all tools, safe default).
    pub strategy: ToolDiscoveryStrategyConfig,
    /// Number of top-scoring tools to include per turn (embedding strategy only).
    pub top_k: usize,
    /// Minimum cosine similarity for a tool to be included (embedding strategy only).
    pub min_similarity: f32,
    /// Provider name from `[[llm.providers]]` for embedding computation.
    /// Should reference a fast/cheap embedding model.  Empty = use the agent's
    /// default embedding provider.
    pub embedding_provider: String,
    /// Tool names always included regardless of similarity score.
    pub always_include: Vec<String>,
    /// Minimum tool count below which discovery is skipped (all tools passed through).
    pub min_tools_to_filter: usize,
    /// When `true`, treat any embedding failure as a hard error instead of silently
    /// falling back to all tools.  Default: `false` (soft fallback).
    pub strict: bool,
}

impl Default for ToolDiscoveryConfig {
    fn default() -> Self {
        Self {
            strategy: ToolDiscoveryStrategyConfig::None,
            top_k: 10,
            min_similarity: 0.2,
            embedding_provider: String::new(),
            always_include: Vec::new(),
            min_tools_to_filter: 10,
            strict: false,
        }
    }
}

/// Trust calibration configuration, nested under `[mcp.trust_calibration]`.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[allow(clippy::struct_excessive_bools)]
pub struct TrustCalibrationConfig {
    /// Enable trust calibration (default: false — opt-in).
    #[serde(default)]
    pub enabled: bool,
    /// Run pre-invocation probe on connect (Phase 1).
    #[serde(default = "default_true")]
    pub probe_on_connect: bool,
    /// Monitor invocations for trust score updates (Phase 2).
    #[serde(default = "default_true")]
    pub monitor_invocations: bool,
    /// Persist trust scores to `SQLite` (Phase 3).
    #[serde(default = "default_true")]
    pub persist_scores: bool,
    /// Per-day decay rate applied to trust scores above 0.5.
    #[serde(default = "default_decay_rate")]
    pub decay_rate_per_day: f64,
    /// Score penalty applied when injection is detected.
    #[serde(default = "default_injection_penalty")]
    pub injection_penalty: f64,
    /// Optional LLM provider for trust verification. Empty = disabled.
    #[serde(default)]
    pub verifier_provider: String,
}

fn default_decay_rate() -> f64 {
    0.01
}

fn default_injection_penalty() -> f64 {
    0.25
}

impl Default for TrustCalibrationConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            probe_on_connect: true,
            monitor_invocations: true,
            persist_scores: true,
            decay_rate_per_day: default_decay_rate(),
            injection_penalty: default_injection_penalty(),
            verifier_provider: String::new(),
        }
    }
}

fn default_max_description_bytes() -> usize {
    2048
}

fn default_max_instructions_bytes() -> usize {
    2048
}

#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct McpConfig {
    #[serde(default)]
    pub servers: Vec<McpServerConfig>,
    #[serde(default)]
    pub allowed_commands: Vec<String>,
    #[serde(default = "default_max_dynamic_servers")]
    pub max_dynamic_servers: usize,
    /// Dynamic tool pruning for context optimization.
    #[serde(default)]
    pub pruning: ToolPruningConfig,
    /// Trust calibration settings (opt-in, disabled by default).
    #[serde(default)]
    pub trust_calibration: TrustCalibrationConfig,
    /// Embedding-based tool discovery (#2321).
    #[serde(default)]
    pub tool_discovery: ToolDiscoveryConfig,
    /// Maximum byte length for MCP tool descriptions. Truncated with "..." if exceeded. Default: 2048.
    #[serde(default = "default_max_description_bytes")]
    pub max_description_bytes: usize,
    /// Maximum byte length for MCP server instructions. Truncated with "..." if exceeded. Default: 2048.
    #[serde(default = "default_max_instructions_bytes")]
    pub max_instructions_bytes: usize,
}

impl Default for McpConfig {
    fn default() -> Self {
        Self {
            servers: Vec::new(),
            allowed_commands: Vec::new(),
            max_dynamic_servers: default_max_dynamic_servers(),
            pruning: ToolPruningConfig::default(),
            trust_calibration: TrustCalibrationConfig::default(),
            tool_discovery: ToolDiscoveryConfig::default(),
            max_description_bytes: default_max_description_bytes(),
            max_instructions_bytes: default_max_instructions_bytes(),
        }
    }
}

#[derive(Clone, Deserialize, Serialize)]
pub struct McpServerConfig {
    pub id: String,
    /// Stdio transport: command to spawn.
    pub command: Option<String>,
    #[serde(default)]
    pub args: Vec<String>,
    #[serde(default)]
    pub env: HashMap<String, String>,
    /// HTTP transport: remote MCP server URL.
    pub url: Option<String>,
    #[serde(default = "default_mcp_timeout")]
    pub timeout: u64,
    /// Optional declarative policy for this server (allowlist, denylist, rate limit).
    #[serde(default)]
    pub policy: zeph_mcp::McpPolicy,
    /// Static HTTP headers for the transport (e.g. `Authorization: Bearer <token>`).
    /// Values support vault references: `${VAULT_KEY}`.
    #[serde(default)]
    pub headers: HashMap<String, String>,
    /// OAuth 2.1 configuration for this server.
    #[serde(default)]
    pub oauth: Option<McpOAuthConfig>,
    /// Trust level for this server. Default: Untrusted.
    #[serde(default)]
    pub trust_level: McpTrustLevel,
    /// Tool allowlist. `None` means no override (inherit defaults).
    /// `Some(vec![])` is an explicit empty list (deny all for Untrusted/Sandboxed).
    /// `Some(vec!["a", "b"])` allows only listed tools.
    #[serde(default)]
    pub tool_allowlist: Option<Vec<String>>,
    /// Expected tool names for attestation. Supplements `tool_allowlist`.
    ///
    /// When non-empty: tools not in this list are filtered out (Untrusted/Sandboxed)
    /// or warned about (Trusted). Schema drift is logged when fingerprints change
    /// between connections.
    #[serde(default)]
    pub expected_tools: Vec<String>,
    /// Filesystem roots exposed to this MCP server via `roots/list`.
    /// Each entry is a `{uri, name?}` pair. URI must use `file://` scheme.
    /// When empty, the server receives an empty roots list.
    #[serde(default)]
    pub roots: Vec<McpRootEntry>,
    /// Per-tool security metadata overrides. Keys are tool names.
    /// When absent for a tool, metadata is inferred from the tool name via heuristics.
    #[serde(default)]
    pub tool_metadata: HashMap<String, ToolSecurityMeta>,
}

/// A filesystem root exposed to an MCP server via `roots/list`.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct McpRootEntry {
    /// URI of the root directory. Must use `file://` scheme.
    pub uri: String,
    /// Optional human-readable name for this root.
    #[serde(default)]
    pub name: Option<String>,
}

/// OAuth 2.1 configuration for an MCP server.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct McpOAuthConfig {
    /// Enable OAuth 2.1 for this server.
    #[serde(default)]
    pub enabled: bool,
    /// Token storage backend.
    #[serde(default)]
    pub token_storage: OAuthTokenStorage,
    /// OAuth scopes to request. Empty = server default.
    #[serde(default)]
    pub scopes: Vec<String>,
    /// Port for the local callback server. `0` = auto-assign, `18766` = default fixed port.
    #[serde(default = "default_oauth_callback_port")]
    pub callback_port: u16,
    /// Client name sent during dynamic registration.
    #[serde(default = "default_oauth_client_name")]
    pub client_name: String,
}

impl Default for McpOAuthConfig {
    fn default() -> Self {
        Self {
            enabled: false,
            token_storage: OAuthTokenStorage::default(),
            scopes: Vec::new(),
            callback_port: default_oauth_callback_port(),
            client_name: default_oauth_client_name(),
        }
    }
}

/// Where OAuth tokens are stored.
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum OAuthTokenStorage {
    /// Persisted in the age vault (default).
    #[default]
    Vault,
    /// In-memory only — tokens lost on restart.
    Memory,
}

impl std::fmt::Debug for McpServerConfig {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let redacted_env: HashMap<&str, &str> = self
            .env
            .keys()
            .map(|k| (k.as_str(), "[REDACTED]"))
            .collect();
        // Redact header values to avoid leaking tokens in logs.
        let redacted_headers: HashMap<&str, &str> = self
            .headers
            .keys()
            .map(|k| (k.as_str(), "[REDACTED]"))
            .collect();
        f.debug_struct("McpServerConfig")
            .field("id", &self.id)
            .field("command", &self.command)
            .field("args", &self.args)
            .field("env", &redacted_env)
            .field("url", &self.url)
            .field("timeout", &self.timeout)
            .field("policy", &self.policy)
            .field("headers", &redacted_headers)
            .field("oauth", &self.oauth)
            .field("trust_level", &self.trust_level)
            .field("tool_allowlist", &self.tool_allowlist)
            .field("expected_tools", &self.expected_tools)
            .field("roots", &self.roots)
            .field(
                "tool_metadata_keys",
                &self.tool_metadata.keys().collect::<Vec<_>>(),
            )
            .finish()
    }
}