klieo-core 0.2.0

Core traits + runtime for the klieo agent framework.
Documentation
//! Error types used across `klieo-core`.
//!
//! Public API of every foundation crate surfaces only `Error`. Each domain
//! has its own sub-error type (e.g. `LlmError`) that converts via
//! `#[from]`. This keeps the public surface narrow while preserving cause
//! chains.
//!
//! Sub-errors classify themselves as `retryable()` so the runtime can
//! apply exponential backoff to transient failures only.

use thiserror::Error;

/// Top-level error returned by `klieo-core` runtime calls.
///
/// Marked `#[non_exhaustive]` so additional variants can be introduced
/// without a major-version bump on impl crates that match on the enum.
///
/// ```
/// use klieo_core::error::{Error, LlmError};
/// let e: Error = LlmError::Timeout.into();
/// assert!(matches!(e, Error::Llm(_)));
/// ```
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum Error {
    /// Underlying LLM provider failure.
    #[error("LLM error: {0}")]
    Llm(#[from] LlmError),
    /// Tool invocation failure.
    #[error("Tool error: {0}")]
    Tool(#[from] ToolError),
    /// Memory persistence failure.
    #[error("Memory error: {0}")]
    Memory(#[from] MemoryError),
    /// Inter-agent bus failure.
    #[error("Bus error: {0}")]
    Bus(#[from] BusError),
    /// Runtime ran the maximum allowed number of LLM/tool steps.
    #[error("max steps exceeded ({steps})")]
    MaxStepsExceeded {
        /// Step count that was exceeded.
        steps: u32,
    },
    /// Cooperatively cancelled.
    #[error("cancelled")]
    Cancelled,
    /// Configuration validation failure.
    #[error("config error: {0}")]
    Config(#[from] ConfigError),
    /// LLM reply could not be parsed into the requested typed shape.
    ///
    /// Surfaced from the structured-output parser when the raw text
    /// content fails JSON deserialization. Always permanent — retrying
    /// the same reply will fail identically.
    #[error("bad response: {0}")]
    BadResponse(String),
    /// Caller-installed guardrail refused the LLM call.
    #[error("guardrail refused: {reason}")]
    Refused {
        /// Human-readable reason supplied by the guardrail.
        reason: String,
    },
    /// Caller-installed guardrail requested a handoff to another agent.
    #[error("guardrail handoff to {agent}: {reason}")]
    Handoff {
        /// Name of the agent the guardrail requested.
        agent: String,
        /// Human-readable reason for the handoff.
        reason: String,
    },
}

impl Error {
    /// Whether the operation is safe to retry without changing inputs.
    pub fn retryable(&self) -> bool {
        match self {
            Self::Llm(e) => e.retryable(),
            Self::Tool(e) => e.retryable(),
            Self::Bus(e) => e.retryable(),
            Self::Memory(_)
            | Self::MaxStepsExceeded { .. }
            | Self::Cancelled
            | Self::Config(_)
            | Self::BadResponse(_)
            | Self::Refused { .. }
            | Self::Handoff { .. } => false,
        }
    }
}

/// LLM provider errors.
///
/// Marked `#[non_exhaustive]` so additional variants can be introduced
/// without a major-version bump on impl crates that match on the enum.
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum LlmError {
    /// Network transport failure (connection refused, DNS, etc).
    #[error("network error: {0}")]
    Network(String),
    /// Request exceeded its deadline.
    #[error("timeout")]
    Timeout,
    /// Provider returned a 429-equivalent rate-limit signal.
    #[error("rate limited (retry after {retry_after_secs}s)")]
    RateLimit {
        /// Server-suggested retry delay.
        retry_after_secs: u32,
    },
    /// Authentication or authorisation failure.
    #[error("unauthorized")]
    Unauthorized,
    /// Provider rejected the request shape.
    #[error("bad request: {0}")]
    BadRequest(String),
    /// Provider returned a 5xx-equivalent.
    #[error("server error: {0}")]
    Server(String),
    /// Response payload could not be decoded.
    #[error("decoding error: {0}")]
    Decoding(String),
    /// Capability declared unsupported by the client.
    #[error("unsupported capability: {0}")]
    Unsupported(String),
    /// Operation was cooperatively cancelled. Surfaced from the
    /// streaming runtime when `ctx.cancel` fires mid-stream so consumers
    /// can distinguish a cooperative cancel from a provider failure.
    #[error("operation cancelled")]
    Cancelled,
}

impl LlmError {
    /// Returns true if the runtime should retry the operation.
    pub fn retryable(&self) -> bool {
        matches!(
            self,
            Self::Network(_) | Self::Timeout | Self::RateLimit { .. } | Self::Server(_)
        )
    }
}

/// Tool invocation errors.
#[derive(Debug, Error)]
pub enum ToolError {
    /// Tool name not registered with the invoker.
    #[error("unknown tool: {0}")]
    UnknownTool(String),
    /// Arguments did not match the declared JSON-schema.
    #[error("invalid args: {0}")]
    InvalidArgs(String),
    /// Tool returned a transient failure; runtime may retry.
    #[error("retryable: {message} (retry after {retry_after_secs}s)")]
    Retryable {
        /// Human-readable reason.
        message: String,
        /// Suggested delay before retry.
        retry_after_secs: u32,
    },
    /// Tool returned a permanent failure.
    #[error("permanent: {0}")]
    Permanent(String),
    /// Tool execution exceeded its timeout.
    #[error("timeout")]
    Timeout,
}

impl ToolError {
    /// Returns true if the runtime should retry the tool call.
    pub fn retryable(&self) -> bool {
        matches!(self, Self::Retryable { .. } | Self::Timeout)
    }
}

/// Memory persistence errors.
#[derive(Debug, Error)]
pub enum MemoryError {
    /// Underlying store rejected the operation.
    #[error("store error: {0}")]
    Store(String),
    /// Requested resource not present.
    #[error("not found")]
    NotFound,
    /// Embedding generation failed.
    #[error("embedding failed: {0}")]
    Embedding(String),
    /// Serialization failure.
    #[error("serialization: {0}")]
    Serialization(String),
}

/// Inter-agent bus errors.
#[derive(Debug, Error)]
pub enum BusError {
    /// Network or broker connectivity failure.
    #[error("connection error: {0}")]
    Connection(String),
    /// Subject / queue / bucket not found.
    #[error("not found: {0}")]
    NotFound(String),
    /// CAS revision mismatch.
    #[error("cas conflict (expected {expected}, got {actual})")]
    CasConflict {
        /// Caller's expected revision.
        expected: u64,
        /// Actual current revision.
        actual: u64,
    },
    /// Operation exceeded its deadline.
    #[error("timeout")]
    Timeout,
    /// Generic transient failure flagged by impl as retryable.
    #[error("retryable: {0}")]
    Retryable(String),
    /// Generic permanent failure.
    #[error("permanent: {0}")]
    Permanent(String),
}

impl BusError {
    /// Returns true if the runtime should retry the operation.
    pub fn retryable(&self) -> bool {
        matches!(
            self,
            Self::Connection(_) | Self::Timeout | Self::Retryable(_)
        )
    }
}

/// Configuration validation errors.
#[derive(Debug, Error)]
pub enum ConfigError {
    /// Required configuration key absent.
    #[error("missing required key: {0}")]
    MissingKey(String),
    /// Configuration value failed validation.
    #[error("invalid value for {key}: {reason}")]
    InvalidValue {
        /// Key that was invalid.
        key: String,
        /// Reason it was invalid.
        reason: String,
    },
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn llm_timeout_is_retryable() {
        let e: Error = LlmError::Timeout.into();
        assert!(e.retryable());
    }

    #[test]
    fn llm_unauthorized_is_not_retryable() {
        let e: Error = LlmError::Unauthorized.into();
        assert!(!e.retryable());
    }

    #[test]
    fn tool_invalid_args_is_not_retryable() {
        let e: Error = ToolError::InvalidArgs("bad json".into()).into();
        assert!(!e.retryable());
    }

    #[test]
    fn config_error_is_not_retryable() {
        let e: Error = ConfigError::MissingKey("x".into()).into();
        assert!(!e.retryable());
    }

    #[test]
    fn refused_is_not_retryable() {
        let e = Error::Refused {
            reason: "policy".into(),
        };
        assert!(!e.retryable());
    }

    #[test]
    fn handoff_is_not_retryable() {
        let e = Error::Handoff {
            agent: "specialist".into(),
            reason: "out of scope".into(),
        };
        assert!(!e.retryable());
    }
}