meerkat-core 0.5.2

//! Core error types for Meerkat

use crate::hooks::{HookPoint, HookReasonCode};
use crate::types::SessionId;

#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum LlmFailureReason {
    RateLimited {
        retry_after: Option<std::time::Duration>,
    },
    ContextExceeded {
        max: u32,
        requested: u32,
    },
    AuthError,
    InvalidModel(String),
    ProviderError(serde_json::Value),
    /// Provider/client-native network timeout (owned by client layer)
    NetworkTimeout {
        duration_ms: u64,
    },
    /// Agent-loop hard call timeout (owned by agent loop policy)
    CallTimeout {
        duration_ms: u64,
    },
}

/// Errors that can occur during tool validation
#[derive(Debug, Clone, thiserror::Error, PartialEq)]
pub enum ToolValidationError {
    /// The requested tool was not found
    #[error("Tool not found: {name}")]
    NotFound { name: String },
    /// The tool arguments failed validation
    #[error("Invalid arguments for tool '{name}': {reason}")]
    InvalidArguments { name: String, reason: String },
}

impl ToolValidationError {
    pub fn not_found(name: impl Into<String>) -> Self {
        Self::NotFound { name: name.into() }
    }
    pub fn invalid_arguments(name: impl Into<String>, reason: impl Into<String>) -> Self {
        Self::InvalidArguments {
            name: name.into(),
            reason: reason.into(),
        }
    }
}

/// Error returned by tool dispatch operations.
#[derive(Debug, Clone, thiserror::Error)]
pub enum ToolError {
    /// The requested tool was not found
    #[error("Tool not found: {name}")]
    NotFound { name: String },

    /// The tool exists but is currently unavailable
    #[error("Tool '{name}' is currently unavailable: {reason}")]
    Unavailable { name: String, reason: String },

    /// The tool arguments failed validation
    #[error("Invalid arguments for tool '{name}': {reason}")]
    InvalidArguments { name: String, reason: String },

    /// The tool execution failed
    #[error("Tool execution failed: {message}")]
    ExecutionFailed { message: String },

    /// The tool execution timed out
    #[error("Tool '{name}' timed out after {timeout_ms}ms")]
    Timeout { name: String, timeout_ms: u64 },

    /// Tool access was denied by policy
    #[error("Tool '{name}' is not allowed by policy")]
    AccessDenied { name: String },

    /// A generic tool error with a message
    #[error("{0}")]
    Other(String),

    /// Tool call must be routed externally (callback pending)
    ///
    /// This variant signals that a tool call cannot be handled internally
    /// and must be routed to an external handler. The payload contains
    /// serialized information about the pending tool call.
    #[error("Callback pending for tool '{tool_name}'")]
    CallbackPending {
        tool_name: String,
        args: serde_json::Value,
    },
}

impl ToolError {
    pub fn error_code(&self) -> &'static str {
        match self {
            Self::NotFound { .. } => "tool_not_found",
            Self::Unavailable { .. } => "tool_unavailable",
            Self::InvalidArguments { .. } => "invalid_arguments",
            Self::ExecutionFailed { .. } => "execution_failed",
            Self::Timeout { .. } => "timeout",
            Self::AccessDenied { .. } => "access_denied",
            Self::Other(_) => "tool_error",
            Self::CallbackPending { .. } => "callback_pending",
        }
    }

    pub fn to_error_payload(&self) -> serde_json::Value {
        serde_json::json!({
            "error": self.error_code(),
            "message": self.to_string(),
        })
    }

    pub fn not_found(name: impl Into<String>) -> Self {
        Self::NotFound { name: name.into() }
    }
    pub fn unavailable(name: impl Into<String>, reason: impl Into<String>) -> Self {
        Self::Unavailable {
            name: name.into(),
            reason: reason.into(),
        }
    }
    pub fn invalid_arguments(name: impl Into<String>, reason: impl Into<String>) -> Self {
        Self::InvalidArguments {
            name: name.into(),
            reason: reason.into(),
        }
    }
    pub fn execution_failed(message: impl Into<String>) -> Self {
        Self::ExecutionFailed {
            message: message.into(),
        }
    }
    pub fn timeout(name: impl Into<String>, timeout_ms: u64) -> Self {
        Self::Timeout {
            name: name.into(),
            timeout_ms,
        }
    }
    pub fn access_denied(name: impl Into<String>) -> Self {
        Self::AccessDenied { name: name.into() }
    }
    pub fn other(message: impl Into<String>) -> Self {
        Self::Other(message.into())
    }

    /// Create a callback pending error for external tool routing
    pub fn callback_pending(tool_name: impl Into<String>, args: serde_json::Value) -> Self {
        Self::CallbackPending {
            tool_name: tool_name.into(),
            args,
        }
    }

    /// Check if this is a callback pending error
    pub fn is_callback_pending(&self) -> bool {
        matches!(self, Self::CallbackPending { .. })
    }

    /// Extract callback pending info if this is a CallbackPending error
    pub fn as_callback_pending(&self) -> Option<(&str, &serde_json::Value)> {
        match self {
            Self::CallbackPending { tool_name, args } => Some((tool_name, args)),
            _ => None,
        }
    }
}

impl From<String> for ToolError {
    fn from(s: String) -> Self {
        Self::Other(s)
    }
}
impl From<&str> for ToolError {
    fn from(s: &str) -> Self {
        Self::Other(s.to_string())
    }
}

/// Errors that can occur during agent execution
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum AgentError {
    #[error("LLM error ({provider}): {message}")]
    Llm {
        provider: &'static str,
        reason: LlmFailureReason,
        message: String,
    },
    #[error("Storage error: {0}")]
    StoreError(String),
    #[error("Tool error: {0}")]
    ToolError(String),
    #[error("MCP error: {0}")]
    McpError(String),
    #[error("Session not found: {0}")]
    SessionNotFound(SessionId),
    #[error("Token budget exceeded: used {used}, limit {limit}")]
    TokenBudgetExceeded { used: u64, limit: u64 },
    #[error("Time budget exceeded: {elapsed_secs}s > {limit_secs}s")]
    TimeBudgetExceeded { elapsed_secs: u64, limit_secs: u64 },
    #[error("Tool call budget exceeded: {count} calls > {limit} limit")]
    ToolCallBudgetExceeded { count: usize, limit: usize },
    #[error("Max tokens reached on turn {turn}, partial output: {partial}")]
    MaxTokensReached { turn: u32, partial: String },
    #[error("Content filtered on turn {turn}")]
    ContentFiltered { turn: u32 },
    #[error("Max turns reached: {turns}")]
    MaxTurnsReached { turns: u32 },
    #[error("Run was cancelled")]
    Cancelled,
    #[error("Invalid state transition: {from} -> {to}")]
    InvalidStateTransition { from: String, to: String },
    #[error("Operation not found: {0}")]
    OperationNotFound(String),
    #[error("Depth limit exceeded: {depth} > {max}")]
    DepthLimitExceeded { depth: u32, max: u32 },
    #[error("Concurrency limit exceeded")]
    ConcurrencyLimitExceeded,
    #[error("Configuration error: {0}")]
    ConfigError(String),
    #[error("Invalid tool in access policy: {tool}")]
    InvalidToolAccess { tool: String },
    #[error("Internal error: {0}")]
    InternalError(String),

    /// Agent construction failed (e.g. missing API key, unknown provider).
    #[error("Build error: {0}")]
    BuildError(String),

    /// A tool call must be routed externally (callback pending)
    #[error("Callback pending for tool '{tool_name}'")]
    CallbackPending {
        tool_name: String,
        args: serde_json::Value,
    },

    /// Structured output validation failed after retries
    #[error("Structured output validation failed after {attempts} attempts: {reason}")]
    StructuredOutputValidationFailed {
        attempts: u32,
        reason: String,
        last_output: String,
    },

    /// Invalid output schema provided
    #[error("Invalid output schema: {0}")]
    InvalidOutputSchema(String),

    #[error("Hook denied at {point:?}: {reason_code:?} - {message}")]
    HookDenied {
        point: HookPoint,
        reason_code: HookReasonCode,
        message: String,
        payload: Option<serde_json::Value>,
    },

    #[error("Hook '{hook_id}' timed out after {timeout_ms}ms")]
    HookTimeout { hook_id: String, timeout_ms: u64 },

    #[error("Hook execution failed for '{hook_id}': {reason}")]
    HookExecutionFailed { hook_id: String, reason: String },

    #[error("Hook configuration invalid: {reason}")]
    HookConfigInvalid { reason: String },

    /// Turn execution reached a terminal outcome classified as HardFailure.
    #[error("Terminal failure: {outcome:?}")]
    TerminalFailure {
        outcome: crate::turn_execution_authority::TurnTerminalOutcome,
    },

    /// The session has no pending user/tool-results boundary for `run_pending`.
    ///
    /// Returned when `RuntimeExecutionKind::ResumePending` is requested but the
    /// session's last message is not `User` or `ToolResults`. The caller should
    /// treat this as a successful no-op (no turn ran, no output produced).
    #[error("no pending boundary for resume")]
    NoPendingBoundary,
}

impl AgentError {
    pub fn llm(
        provider: &'static str,
        reason: LlmFailureReason,
        message: impl Into<String>,
    ) -> Self {
        Self::Llm {
            provider,
            reason,
            message: message.into(),
        }
    }
    pub fn is_graceful(&self) -> bool {
        matches!(
            self,
            Self::TokenBudgetExceeded { .. }
                | Self::TimeBudgetExceeded { .. }
                | Self::ToolCallBudgetExceeded { .. }
                | Self::MaxTurnsReached { .. }
        )
    }
    pub fn is_rate_limited(&self) -> bool {
        matches!(
            self,
            Self::Llm {
                reason: LlmFailureReason::RateLimited { .. },
                ..
            }
        )
    }

    pub fn retry_after_hint(&self) -> Option<std::time::Duration> {
        match self {
            Self::Llm {
                reason: LlmFailureReason::RateLimited { retry_after },
                ..
            } => *retry_after,
            _ => None,
        }
    }

    pub fn is_recoverable(&self) -> bool {
        match self {
            Self::Llm { reason, .. } => match reason {
                LlmFailureReason::RateLimited { .. } => true,
                LlmFailureReason::NetworkTimeout { .. } => true,
                LlmFailureReason::CallTimeout { .. } => true,
                LlmFailureReason::ProviderError(value) => {
                    value.get("retryable").and_then(serde_json::Value::as_bool) == Some(true)
                }
                _ => false,
            },
            _ => false,
        }
    }
}

pub fn store_error(err: impl std::fmt::Display) -> AgentError {
    AgentError::StoreError(store_error_message(err))
}
pub fn invalid_session_id(err: impl std::fmt::Display) -> AgentError {
    AgentError::StoreError(invalid_session_id_message(err))
}
pub fn store_error_message(err: impl std::fmt::Display) -> String {
    err.to_string()
}
pub fn invalid_session_id_message(err: impl std::fmt::Display) -> String {
    format!("Invalid session ID: {err}")
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
mod tests {
    use super::*;

    #[test]
    fn test_network_timeout_is_recoverable() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::NetworkTimeout { duration_ms: 30000 },
            "network timeout after 30s",
        );
        assert!(err.is_recoverable());
    }

    #[test]
    fn test_call_timeout_is_recoverable() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::CallTimeout { duration_ms: 45000 },
            "call timeout after 45s",
        );
        assert!(err.is_recoverable());
    }

    #[test]
    fn test_network_timeout_typed_mapping() {
        let reason = LlmFailureReason::NetworkTimeout { duration_ms: 5000 };
        match reason {
            LlmFailureReason::NetworkTimeout { duration_ms } => {
                assert_eq!(duration_ms, 5000);
            }
            _ => panic!("expected NetworkTimeout"),
        }
    }

    #[test]
    fn test_call_timeout_typed_mapping() {
        let reason = LlmFailureReason::CallTimeout { duration_ms: 60000 };
        match reason {
            LlmFailureReason::CallTimeout { duration_ms } => {
                assert_eq!(duration_ms, 60000);
            }
            _ => panic!("expected CallTimeout"),
        }
    }

    #[test]
    fn test_timeout_variants_are_distinct() {
        let net = LlmFailureReason::NetworkTimeout { duration_ms: 1000 };
        let call = LlmFailureReason::CallTimeout { duration_ms: 1000 };
        assert_ne!(net, call);
    }

    #[test]
    fn test_auth_error_not_recoverable() {
        let err = AgentError::llm("anthropic", LlmFailureReason::AuthError, "bad key");
        assert!(!err.is_recoverable());
    }

    // -- Rate-limit helper tests (PR #156 port) --

    #[test]
    fn test_is_rate_limited_true_for_rate_limit_error() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::RateLimited {
                retry_after: Some(std::time::Duration::from_secs(30)),
            },
            "rate limited",
        );
        assert!(err.is_rate_limited());
    }

    #[test]
    fn test_is_rate_limited_false_for_other_errors() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::NetworkTimeout { duration_ms: 5000 },
            "timeout",
        );
        assert!(!err.is_rate_limited());

        let err = AgentError::llm("anthropic", LlmFailureReason::AuthError, "bad key");
        assert!(!err.is_rate_limited());
    }

    #[test]
    fn test_retry_after_hint_returns_duration_for_rate_limit() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::RateLimited {
                retry_after: Some(std::time::Duration::from_secs(60)),
            },
            "rate limited",
        );
        assert_eq!(
            err.retry_after_hint(),
            Some(std::time::Duration::from_secs(60))
        );
    }

    #[test]
    fn test_retry_after_hint_returns_none_for_non_rate_limit() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::NetworkTimeout { duration_ms: 5000 },
            "timeout",
        );
        assert_eq!(err.retry_after_hint(), None);
    }

    #[test]
    fn test_timeout_variants_not_graceful() {
        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::NetworkTimeout { duration_ms: 1000 },
            "timeout",
        );
        assert!(!err.is_graceful());

        let err = AgentError::llm(
            "anthropic",
            LlmFailureReason::CallTimeout { duration_ms: 1000 },
            "timeout",
        );
        assert!(!err.is_graceful());
    }

    // -- P2-6: Typed BuildError variant --

    #[test]
    fn test_build_error_variant_exists_and_carries_message() {
        let err = AgentError::BuildError("Missing API key for provider 'anthropic'".to_string());
        match &err {
            AgentError::BuildError(msg) => {
                assert!(
                    msg.contains("API key"),
                    "message should contain source text"
                );
            }
            other => panic!("expected BuildError, got: {other}"),
        }
    }

    #[test]
    fn test_build_error_is_not_recoverable() {
        let err = AgentError::BuildError("Unknown provider for model 'llama-3'".to_string());
        assert!(!err.is_recoverable(), "build errors are not recoverable");
    }

    #[test]
    fn test_build_error_is_not_graceful() {
        let err = AgentError::BuildError("Missing API key".to_string());
        assert!(!err.is_graceful(), "build errors are not graceful");
    }

    #[test]
    fn test_build_error_display() {
        let err = AgentError::BuildError("Missing API key for provider 'anthropic'".to_string());
        let display = err.to_string();
        assert!(
            display.contains("Build error")
                || display.contains("build error")
                || display.contains("Missing API key"),
            "display should mention the build error: {display}"
        );
    }

    // -- P2-7: Typed TerminalFailure outcome --

    #[test]
    fn test_terminal_failure_carries_typed_outcome() {
        use crate::turn_execution_authority::TurnTerminalOutcome;

        // TerminalFailure must carry the typed enum, not a Debug-formatted string.
        let err = AgentError::TerminalFailure {
            outcome: TurnTerminalOutcome::Failed,
        };
        match &err {
            AgentError::TerminalFailure { outcome } => {
                // If this compiles, outcome is TurnTerminalOutcome, not String.
                assert_eq!(*outcome, TurnTerminalOutcome::Failed);
            }
            other => panic!("expected TerminalFailure, got: {other}"),
        }
    }

    #[test]
    fn test_terminal_failure_display_includes_outcome() {
        use crate::turn_execution_authority::TurnTerminalOutcome;

        let err = AgentError::TerminalFailure {
            outcome: TurnTerminalOutcome::TimeBudgetExceeded,
        };
        let display = err.to_string();
        assert!(
            display.contains("TimeBudgetExceeded"),
            "display should include the outcome variant name: {display}"
        );
    }

    #[test]
    fn test_terminal_failure_all_hard_failure_outcomes() {
        use crate::turn_execution_authority::TurnTerminalOutcome;

        // Both hard-failure outcomes should be representable.
        for outcome in [
            TurnTerminalOutcome::Failed,
            TurnTerminalOutcome::TimeBudgetExceeded,
        ] {
            let err = AgentError::TerminalFailure { outcome };
            assert!(
                !err.is_graceful(),
                "TerminalFailure({outcome:?}) should not be graceful"
            );
        }
    }
}