1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
//! Errors for the cognis framework. Operational metadata (`is_retryable`,
//! `retry_delay`, `category`) lets retry/fallback middleware consume errors
//! without sniffing strings.
use std::time::Duration;
/// Result alias used throughout cognis.
pub type Result<T> = std::result::Result<T, CognisError>;
/// When a graph interrupt fires relative to a node's execute call.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InterruptKind {
/// Before the node's execute is invoked.
Before,
/// After the node's execute completes (state already updated).
After,
}
impl std::fmt::Display for InterruptKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InterruptKind::Before => write!(f, "before"),
InterruptKind::After => write!(f, "after"),
}
}
}
/// All errors produced by cognis-core and downstream v2 crates.
#[derive(Debug, thiserror::Error)]
pub enum CognisError {
/// Provider call failed (network, HTTP, parse, etc.).
#[error("provider `{provider}` error: {message}")]
Provider {
/// Provider identifier (e.g. "openai", "ollama").
provider: String,
/// Human-readable error message.
message: String,
},
/// LLM provider rate-limited the request.
#[error("rate limited; retry after {retry_after_ms}ms")]
RateLimited {
/// Suggested retry delay in milliseconds.
retry_after_ms: u64,
},
/// Authentication failed (bad API key, expired token, etc.).
#[error("authentication failed: {0}")]
AuthenticationFailed(String),
/// Tool dispatch or execution failed.
#[error("tool `{name}` failed: {reason}")]
Tool {
/// Tool name.
name: String,
/// Failure reason.
reason: String,
},
/// Tool argument failed validation.
#[error("tool validation: {0}")]
ToolValidationError(String),
/// Configuration is invalid or incomplete.
#[error("configuration: {0}")]
Configuration(String),
/// Network / transport error.
#[error("network error{}: {message}", status_code.map(|c| format!(" (status {c})")).unwrap_or_default())]
Network {
/// Optional HTTP status code.
status_code: Option<u16>,
/// Human-readable error message.
message: String,
},
/// Operation timed out.
#[error("`{operation}` timed out after {timeout_ms}ms")]
Timeout {
/// Operation name.
operation: String,
/// Timeout duration in milliseconds.
timeout_ms: u64,
},
/// Operation was cancelled via `RunnableConfig::cancel_token`.
#[error("operation cancelled")]
Cancelled,
/// Graph engine ran past its `recursion_limit`.
#[error("graph recursion limit ({limit}) exceeded")]
RecursionLimit {
/// The configured limit that was hit.
limit: u32,
},
/// Graph paused at a configured interrupt boundary. State is in the
/// configured checkpointer; resume via `CompiledGraph::resume`.
#[error("graph interrupted {kind} node `{node}` at step {step} (run_id {run_id})")]
GraphInterrupted {
/// Run correlation ID. Pass to `Checkpointer::load` to recover state.
run_id: uuid::Uuid,
/// Superstep at which the interrupt fired.
step: u64,
/// Node name that triggered the interrupt.
node: String,
/// Whether the interrupt fired before or after the node's execute.
kind: InterruptKind,
},
/// Serialization or deserialization failed.
#[error("serialization error: {0}")]
Serialization(String),
/// Catch-all for unexpected errors.
#[error("internal error: {0}")]
Internal(String),
}
impl CognisError {
/// Stable category string for telemetry / metrics filtering.
pub fn category(&self) -> &'static str {
match self {
Self::Provider { .. } => "provider",
Self::RateLimited { .. } => "rate_limit",
Self::AuthenticationFailed(_) => "auth",
Self::Tool { .. } => "tool",
Self::ToolValidationError(_) => "tool_validation",
Self::Configuration(_) => "config",
Self::Network { .. } => "network",
Self::Timeout { .. } => "timeout",
Self::Cancelled => "cancelled",
Self::RecursionLimit { .. } => "recursion_limit",
Self::GraphInterrupted { .. } => "graph_interrupted",
Self::Serialization(_) => "serialization",
Self::Internal(_) => "internal",
}
}
/// Whether retrying this error MAY succeed.
pub fn is_retryable(&self) -> bool {
matches!(
self,
Self::RateLimited { .. }
| Self::Network { .. }
| Self::Timeout { .. }
| Self::Provider { .. }
)
}
/// Suggested retry delay, if the error type carries one.
pub fn retry_delay(&self) -> Option<Duration> {
match self {
Self::RateLimited { retry_after_ms } => Some(Duration::from_millis(*retry_after_ms)),
Self::Timeout { timeout_ms, .. } => Some(Duration::from_millis(*timeout_ms / 2)),
_ => None,
}
}
}
impl From<serde_json::Error> for CognisError {
fn from(e: serde_json::Error) -> Self {
Self::Serialization(e.to_string())
}
}