Skip to main content

cognis_core/
error.rs

1//! Errors for the cognis framework. Operational metadata (`is_retryable`,
2//! `retry_delay`, `category`) lets retry/fallback middleware consume errors
3//! without sniffing strings.
4
5use std::time::Duration;
6
7/// Result alias used throughout cognis.
8pub type Result<T> = std::result::Result<T, CognisError>;
9
10/// When a graph interrupt fires relative to a node's execute call.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum InterruptKind {
13    /// Before the node's execute is invoked.
14    Before,
15    /// After the node's execute completes (state already updated).
16    After,
17}
18
19impl std::fmt::Display for InterruptKind {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        match self {
22            InterruptKind::Before => write!(f, "before"),
23            InterruptKind::After => write!(f, "after"),
24        }
25    }
26}
27
28/// All errors produced by cognis-core and downstream v2 crates.
29#[derive(Debug, thiserror::Error)]
30pub enum CognisError {
31    /// Provider call failed (network, HTTP, parse, etc.).
32    #[error("provider `{provider}` error: {message}")]
33    Provider {
34        /// Provider identifier (e.g. "openai", "ollama").
35        provider: String,
36        /// Human-readable error message.
37        message: String,
38    },
39
40    /// LLM provider rate-limited the request.
41    #[error("rate limited; retry after {retry_after_ms}ms")]
42    RateLimited {
43        /// Suggested retry delay in milliseconds.
44        retry_after_ms: u64,
45    },
46
47    /// Authentication failed (bad API key, expired token, etc.).
48    #[error("authentication failed: {0}")]
49    AuthenticationFailed(String),
50
51    /// Tool dispatch or execution failed.
52    #[error("tool `{name}` failed: {reason}")]
53    Tool {
54        /// Tool name.
55        name: String,
56        /// Failure reason.
57        reason: String,
58    },
59
60    /// Tool argument failed validation.
61    #[error("tool validation: {0}")]
62    ToolValidationError(String),
63
64    /// Configuration is invalid or incomplete.
65    #[error("configuration: {0}")]
66    Configuration(String),
67
68    /// Network / transport error.
69    #[error("network error{}: {message}", status_code.map(|c| format!(" (status {c})")).unwrap_or_default())]
70    Network {
71        /// Optional HTTP status code.
72        status_code: Option<u16>,
73        /// Human-readable error message.
74        message: String,
75    },
76
77    /// Operation timed out.
78    #[error("`{operation}` timed out after {timeout_ms}ms")]
79    Timeout {
80        /// Operation name.
81        operation: String,
82        /// Timeout duration in milliseconds.
83        timeout_ms: u64,
84    },
85
86    /// Operation was cancelled via `RunnableConfig::cancel_token`.
87    #[error("operation cancelled")]
88    Cancelled,
89
90    /// Graph engine ran past its `recursion_limit`.
91    #[error("graph recursion limit ({limit}) exceeded")]
92    RecursionLimit {
93        /// The configured limit that was hit.
94        limit: u32,
95    },
96
97    /// Graph paused at a configured interrupt boundary. State is in the
98    /// configured checkpointer; resume via `CompiledGraph::resume`.
99    #[error("graph interrupted {kind} node `{node}` at step {step} (run_id {run_id})")]
100    GraphInterrupted {
101        /// Run correlation ID. Pass to `Checkpointer::load` to recover state.
102        run_id: uuid::Uuid,
103        /// Superstep at which the interrupt fired.
104        step: u64,
105        /// Node name that triggered the interrupt.
106        node: String,
107        /// Whether the interrupt fired before or after the node's execute.
108        kind: InterruptKind,
109    },
110
111    /// Serialization or deserialization failed.
112    #[error("serialization error: {0}")]
113    Serialization(String),
114
115    /// Catch-all for unexpected errors.
116    #[error("internal error: {0}")]
117    Internal(String),
118}
119
120impl CognisError {
121    /// Stable category string for telemetry / metrics filtering.
122    pub fn category(&self) -> &'static str {
123        match self {
124            Self::Provider { .. } => "provider",
125            Self::RateLimited { .. } => "rate_limit",
126            Self::AuthenticationFailed(_) => "auth",
127            Self::Tool { .. } => "tool",
128            Self::ToolValidationError(_) => "tool_validation",
129            Self::Configuration(_) => "config",
130            Self::Network { .. } => "network",
131            Self::Timeout { .. } => "timeout",
132            Self::Cancelled => "cancelled",
133            Self::RecursionLimit { .. } => "recursion_limit",
134            Self::GraphInterrupted { .. } => "graph_interrupted",
135            Self::Serialization(_) => "serialization",
136            Self::Internal(_) => "internal",
137        }
138    }
139
140    /// Whether retrying this error MAY succeed.
141    pub fn is_retryable(&self) -> bool {
142        matches!(
143            self,
144            Self::RateLimited { .. }
145                | Self::Network { .. }
146                | Self::Timeout { .. }
147                | Self::Provider { .. }
148        )
149    }
150
151    /// Suggested retry delay, if the error type carries one.
152    pub fn retry_delay(&self) -> Option<Duration> {
153        match self {
154            Self::RateLimited { retry_after_ms } => Some(Duration::from_millis(*retry_after_ms)),
155            Self::Timeout { timeout_ms, .. } => Some(Duration::from_millis(*timeout_ms / 2)),
156            _ => None,
157        }
158    }
159}
160
161impl From<serde_json::Error> for CognisError {
162    fn from(e: serde_json::Error) -> Self {
163        Self::Serialization(e.to_string())
164    }
165}