Skip to main content

actr_protocol/
error.rs

1//! Top-level error types for the Actor-RTC framework.
2//!
3//! ## Design
4//!
5//! Two layers only:
6//!
7//! ```text
8//! NetworkError   (transport-internal, never exposed to users)
9//!      ↓  From
10//! ActrError      (public, flat enum — what callers see)
11//! ```
12//!
13//! `RuntimeError` and `ProtocolError` have been removed.
14//!
15//! ## Error classification
16//!
17//! Every error belongs to one fault domain (`ErrorKind`):
18//!
19//! | Kind      | Meaning                        | Retry? | DLQ? |
20//! |-----------|--------------------------------|--------|------|
21//! | Transient | Environmental fluctuation      | yes    | no   |
22//! | Client    | Caller error (bad request)     | no     | no   |
23//! | Internal  | Framework bug / panic          | no     | no   |
24//! | Corrupt   | Data corruption                | no     | yes  |
25//!
26//! Use the `Classify` trait to query classification from any error type.
27
28use std::fmt;
29use thiserror::Error;
30
31// ── ConnectionNotReadyInfo ────────────────────────────────────────────────────
32
33/// Public payload for send preflight failures.
34///
35/// This error is emitted before the operation enters transport, so callers can
36/// retry by creating a fresh operation. `retry_after_ms` is only a hint; the
37/// readiness hook is the authoritative signal that sending may be attempted
38/// again.
39#[derive(Debug, Clone)]
40pub struct ConnectionNotReadyInfo {
41    pub retry_after_ms: Option<u64>,
42}
43
44impl ConnectionNotReadyInfo {
45    pub fn new(elapsed_ms: u64, timeout_ms: u64) -> Self {
46        let retry_after_ms = timeout_ms.checked_sub(elapsed_ms);
47        Self { retry_after_ms }
48    }
49
50    pub fn without_retry_hint() -> Self {
51        Self {
52            retry_after_ms: None,
53        }
54    }
55}
56
57impl fmt::Display for ConnectionNotReadyInfo {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        write!(f, "retry_after_ms={:?}", self.retry_after_ms)
60    }
61}
62
63// ── ActrError ────────────────────────────────────────────────────────────────
64
65/// Top-level framework error, returned to all callers.
66///
67/// Flat enum — no nested error wrapping. Each variant is self-describing.
68#[derive(Error, Debug, Clone)]
69pub enum ActrError {
70    // ── Transient ──────────────────────────────────────────────────────────
71    /// Target temporarily unavailable: connection lost, overloaded, or reconnecting.
72    ///
73    /// `ErrorKind::Transient` — retry with backoff.
74    #[error("unavailable: {0}")]
75    Unavailable(String),
76
77    /// Connection is not ready to send this operation.
78    ///
79    /// `ErrorKind::Transient` — wait for readiness notification or retry with
80    /// backoff. The operation has not entered transport.
81    #[error("connection not ready: {0}")]
82    ConnectionNotReady(ConnectionNotReadyInfo),
83
84    /// Request deadline exceeded.
85    ///
86    /// `ErrorKind::Transient` — may retry with a fresh deadline.
87    #[error("timed out")]
88    TimedOut,
89
90    // ── Client ─────────────────────────────────────────────────────────────
91    /// Target actor not found.
92    ///
93    /// `ErrorKind::Client` — do not retry; check service discovery first.
94    #[error("not found: {0}")]
95    NotFound(String),
96
97    /// Permission denied by ACL.
98    ///
99    /// `ErrorKind::Client` — do not retry; fix authorization.
100    #[error("permission denied: {0}")]
101    PermissionDenied(String),
102
103    /// Invalid argument or malformed request.
104    ///
105    /// `ErrorKind::Client` — do not retry; fix the request.
106    #[error("invalid argument: {0}")]
107    InvalidArgument(String),
108
109    /// No handler registered for the given route key.
110    ///
111    /// `ErrorKind::Client` — do not retry; check service definition.
112    #[error("unknown route: {0}")]
113    UnknownRoute(String),
114
115    /// Required dependency not found in the lock file.
116    ///
117    /// `ErrorKind::Client` — do not retry; fix the manifest.
118    #[error("dependency '{service_name}' not found: {message}")]
119    DependencyNotFound {
120        service_name: String,
121        message: String,
122    },
123
124    // ── Corrupt ────────────────────────────────────────────────────────────
125    /// Protobuf decode failure — message data is corrupted.
126    ///
127    /// `ErrorKind::Corrupt` — route to Dead Letter Queue; do not retry.
128    #[error("decode failure: {0}")]
129    DecodeFailure(String),
130
131    // ── Internal ───────────────────────────────────────────────────────────
132    /// Feature not yet implemented.
133    ///
134    /// `ErrorKind::Internal` — do not retry.
135    #[error("not implemented: {0}")]
136    NotImplemented(String),
137
138    /// Internal framework error: bug, panic, or unrecoverable state.
139    ///
140    /// `ErrorKind::Internal` — do not retry; investigate logs.
141    #[error("internal error: {0}")]
142    Internal(String),
143}
144
145// ── ErrorKind ────────────────────────────────────────────────────────────────
146
147/// Fault domain classification for any framework error.
148///
149/// All error types implement [`Classify`] to expose their `ErrorKind`.
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151pub enum ErrorKind {
152    /// Environmental fluctuation — retry with exponential backoff.
153    Transient,
154    /// Caller error — bad request or system state; do not retry.
155    Client,
156    /// Framework bug or panic — do not retry; alert.
157    Internal,
158    /// Data corruption — route to Dead Letter Queue; manual intervention required.
159    Corrupt,
160}
161
162// ── Classify trait ───────────────────────────────────────────────────────────
163
164/// Fault-domain classification for error types.
165///
166/// Implement `kind()` only; `is_retryable()` and `requires_dlq()` have
167/// correct default implementations derived from `kind()`.
168pub trait Classify {
169    /// Returns the fault domain this error belongs to.
170    fn kind(&self) -> ErrorKind;
171
172    /// Returns `true` if the operation may be retried.
173    ///
174    /// Only `ErrorKind::Transient` errors are retryable.
175    fn is_retryable(&self) -> bool {
176        matches!(self.kind(), ErrorKind::Transient)
177    }
178
179    /// Returns `true` if the message should be routed to the Dead Letter Queue.
180    ///
181    /// Only `ErrorKind::Corrupt` errors require DLQ routing.
182    fn requires_dlq(&self) -> bool {
183        matches!(self.kind(), ErrorKind::Corrupt)
184    }
185}
186
187impl Classify for ActrError {
188    fn kind(&self) -> ErrorKind {
189        match self {
190            ActrError::Unavailable(_) | ActrError::ConnectionNotReady(_) | ActrError::TimedOut => {
191                ErrorKind::Transient
192            }
193
194            ActrError::NotFound(_)
195            | ActrError::PermissionDenied(_)
196            | ActrError::InvalidArgument(_)
197            | ActrError::UnknownRoute(_)
198            | ActrError::DependencyNotFound { .. } => ErrorKind::Client,
199
200            ActrError::DecodeFailure(_) => ErrorKind::Corrupt,
201
202            ActrError::NotImplemented(_) | ActrError::Internal(_) => ErrorKind::Internal,
203        }
204    }
205}
206
207// ── Convenience type aliases ──────────────────────────────────────────────────
208
209/// Result type for actor RPC calls.
210pub type ActorResult<T> = Result<T, ActrError>;
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215
216    // ── ActrError::kind() classification ─────────────────────────────────────
217
218    #[test]
219    fn transient_variants_classify_correctly() {
220        assert_eq!(
221            ActrError::Unavailable("x".into()).kind(),
222            ErrorKind::Transient
223        );
224        assert_eq!(ActrError::TimedOut.kind(), ErrorKind::Transient);
225    }
226
227    #[test]
228    fn client_variants_classify_correctly() {
229        assert_eq!(ActrError::NotFound("x".into()).kind(), ErrorKind::Client);
230        assert_eq!(
231            ActrError::PermissionDenied("x".into()).kind(),
232            ErrorKind::Client
233        );
234        assert_eq!(
235            ActrError::InvalidArgument("x".into()).kind(),
236            ErrorKind::Client
237        );
238        assert_eq!(
239            ActrError::UnknownRoute("x".into()).kind(),
240            ErrorKind::Client
241        );
242        assert_eq!(
243            ActrError::DependencyNotFound {
244                service_name: "svc".into(),
245                message: "not found".into(),
246            }
247            .kind(),
248            ErrorKind::Client
249        );
250    }
251
252    #[test]
253    fn corrupt_variant_classifies_correctly() {
254        assert_eq!(
255            ActrError::DecodeFailure("x".into()).kind(),
256            ErrorKind::Corrupt
257        );
258    }
259
260    #[test]
261    fn internal_variants_classify_correctly() {
262        assert_eq!(
263            ActrError::NotImplemented("x".into()).kind(),
264            ErrorKind::Internal
265        );
266        assert_eq!(ActrError::Internal("x".into()).kind(), ErrorKind::Internal);
267    }
268
269    // ── Classify default impls ────────────────────────────────────────────────
270
271    #[test]
272    fn only_transient_is_retryable() {
273        assert!(ActrError::Unavailable("x".into()).is_retryable());
274        assert!(ActrError::TimedOut.is_retryable());
275
276        assert!(!ActrError::NotFound("x".into()).is_retryable());
277        assert!(!ActrError::DecodeFailure("x".into()).is_retryable());
278        assert!(!ActrError::Internal("x".into()).is_retryable());
279    }
280
281    #[test]
282    fn only_corrupt_requires_dlq() {
283        assert!(ActrError::DecodeFailure("x".into()).requires_dlq());
284
285        assert!(!ActrError::Unavailable("x".into()).requires_dlq());
286        assert!(!ActrError::TimedOut.requires_dlq());
287        assert!(!ActrError::NotFound("x".into()).requires_dlq());
288        assert!(!ActrError::Internal("x".into()).requires_dlq());
289    }
290
291    // ── Clone ─────────────────────────────────────────────────────────────────
292
293    #[test]
294    fn actr_error_is_clone() {
295        let e = ActrError::InvalidArgument("bad".into());
296        let cloned = e.clone();
297        assert_eq!(format!("{cloned}"), "invalid argument: bad");
298    }
299
300    // ── ConnectionNotReadyInfo Display ────────────────────────────────────
301
302    #[test]
303    fn connection_not_ready_info_display_includes_retry_hint() {
304        let info = ConnectionNotReadyInfo::new(1200, 6000);
305        let s = format!("{info}");
306        assert!(s.contains("retry_after_ms=Some(4800)"));
307    }
308
309    #[test]
310    fn connection_not_ready_info_without_retry_hint_display() {
311        let info = ConnectionNotReadyInfo::without_retry_hint();
312        let s = format!("{info}");
313        assert!(s.contains("retry_after_ms=None"));
314    }
315
316    // ── ConnectionNotReady classification ────────────────────────────────
317
318    #[test]
319    fn connection_not_ready_classifies_as_transient() {
320        let err = ActrError::ConnectionNotReady(ConnectionNotReadyInfo::new(0, 6000));
321        assert_eq!(err.kind(), ErrorKind::Transient);
322    }
323
324    #[test]
325    fn connection_not_ready_is_retryable() {
326        let err = ActrError::ConnectionNotReady(ConnectionNotReadyInfo::without_retry_hint());
327        assert!(err.is_retryable());
328    }
329}