Skip to main content

arcp_core/
error.rs

1//! Canonical error model (RFC §18).
2//!
3//! Two layered types:
4//!
5//! - [`ErrorCode`] — the wire-level taxonomy from §18.2 as a
6//!   `#[non_exhaustive]` enum. Exists so the runtime, the client, and
7//!   external code can pattern-match on a single source of truth.
8//! - [`ARCPError`] — the in-process `Result<_, _>` error returned from
9//!   library APIs. Each variant maps onto an `ErrorCode` via
10//!   [`ARCPError::code`] and carries enough context to reconstruct an
11//!   error envelope (§18.1) without a second lookup.
12//!
13//! The [`ARCPError::retryable`] method follows the RFC §18.3 default
14//! taxonomy. Callers MAY override per-call via the returned envelope's
15//! `retryable` field, but the in-process default is what `retryable()`
16//! reports.
17
18use std::fmt;
19
20use serde::{Deserialize, Serialize};
21
22use crate::ids::{IdParseError, LeaseId};
23
24/// Canonical wire-level error code (RFC §18.2).
25///
26/// `RATE_LIMITED` is an alias for `RESOURCE_EXHAUSTED` per §18.2 and is
27/// represented by the same variant; the alias survives only at the
28/// deserialise boundary.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
30#[non_exhaustive]
31#[allow(clippy::upper_case_acronyms)]
32pub enum ErrorCode {
33    /// `OK` — not an error; reserved.
34    #[serde(rename = "OK")]
35    Ok,
36    /// `CANCELLED`
37    #[serde(rename = "CANCELLED")]
38    Cancelled,
39    /// `UNKNOWN`
40    #[serde(rename = "UNKNOWN")]
41    Unknown,
42    /// `INVALID_ARGUMENT`
43    #[serde(rename = "INVALID_ARGUMENT")]
44    InvalidArgument,
45    /// `DEADLINE_EXCEEDED`
46    #[serde(rename = "DEADLINE_EXCEEDED")]
47    DeadlineExceeded,
48    /// `NOT_FOUND`
49    #[serde(rename = "NOT_FOUND")]
50    NotFound,
51    /// `ALREADY_EXISTS`
52    #[serde(rename = "ALREADY_EXISTS")]
53    AlreadyExists,
54    /// `PERMISSION_DENIED`
55    #[serde(rename = "PERMISSION_DENIED")]
56    PermissionDenied,
57    /// `RESOURCE_EXHAUSTED` (also serialised from the alias `RATE_LIMITED`).
58    #[serde(rename = "RESOURCE_EXHAUSTED", alias = "RATE_LIMITED")]
59    ResourceExhausted,
60    /// `FAILED_PRECONDITION`
61    #[serde(rename = "FAILED_PRECONDITION")]
62    FailedPrecondition,
63    /// `ABORTED`
64    #[serde(rename = "ABORTED")]
65    Aborted,
66    /// `OUT_OF_RANGE`
67    #[serde(rename = "OUT_OF_RANGE")]
68    OutOfRange,
69    /// `UNIMPLEMENTED`
70    #[serde(rename = "UNIMPLEMENTED")]
71    Unimplemented,
72    /// `INTERNAL`
73    #[serde(rename = "INTERNAL")]
74    Internal,
75    /// `UNAVAILABLE`
76    #[serde(rename = "UNAVAILABLE")]
77    Unavailable,
78    /// `DATA_LOSS`
79    #[serde(rename = "DATA_LOSS")]
80    DataLoss,
81    /// `UNAUTHENTICATED`
82    #[serde(rename = "UNAUTHENTICATED")]
83    Unauthenticated,
84    /// `HEARTBEAT_LOST` (RFC §10.3)
85    #[serde(rename = "HEARTBEAT_LOST")]
86    HeartbeatLost,
87    /// `LEASE_EXPIRED` (RFC §15.5)
88    #[serde(rename = "LEASE_EXPIRED")]
89    LeaseExpired,
90    /// `LEASE_REVOKED` (RFC §15.5)
91    #[serde(rename = "LEASE_REVOKED")]
92    LeaseRevoked,
93    /// `BACKPRESSURE_OVERFLOW`
94    #[serde(rename = "BACKPRESSURE_OVERFLOW")]
95    BackpressureOverflow,
96    /// `BUDGET_EXHAUSTED` (ARCP v1.1 §12; §9.6)
97    #[serde(rename = "BUDGET_EXHAUSTED")]
98    BudgetExhausted,
99    /// `LEASE_SUBSET_VIOLATION` (ARCP v1.1 §9.4)
100    #[serde(rename = "LEASE_SUBSET_VIOLATION")]
101    LeaseSubsetViolation,
102    /// `AGENT_VERSION_NOT_AVAILABLE` (ARCP v1.1 §12; §7.5)
103    #[serde(rename = "AGENT_VERSION_NOT_AVAILABLE")]
104    AgentVersionNotAvailable,
105}
106
107impl ErrorCode {
108    /// Default retryability per RFC §18.3.
109    ///
110    /// Errors flagged retryable here MAY still be rejected by application
111    /// policy; conversely, errors flagged non-retryable MAY be retried by
112    /// callers who know more than the protocol does. This method reports
113    /// only the protocol's default.
114    #[must_use]
115    pub const fn retryable(self) -> bool {
116        matches!(
117            self,
118            Self::ResourceExhausted
119                | Self::Unavailable
120                | Self::DeadlineExceeded
121                | Self::Internal
122                | Self::Aborted
123        )
124    }
125
126    /// Wire-level string spelling of the code (`"INVALID_ARGUMENT"`, etc.).
127    #[must_use]
128    pub const fn as_str(self) -> &'static str {
129        match self {
130            Self::Ok => "OK",
131            Self::Cancelled => "CANCELLED",
132            Self::Unknown => "UNKNOWN",
133            Self::InvalidArgument => "INVALID_ARGUMENT",
134            Self::DeadlineExceeded => "DEADLINE_EXCEEDED",
135            Self::NotFound => "NOT_FOUND",
136            Self::AlreadyExists => "ALREADY_EXISTS",
137            Self::PermissionDenied => "PERMISSION_DENIED",
138            Self::ResourceExhausted => "RESOURCE_EXHAUSTED",
139            Self::FailedPrecondition => "FAILED_PRECONDITION",
140            Self::Aborted => "ABORTED",
141            Self::OutOfRange => "OUT_OF_RANGE",
142            Self::Unimplemented => "UNIMPLEMENTED",
143            Self::Internal => "INTERNAL",
144            Self::Unavailable => "UNAVAILABLE",
145            Self::DataLoss => "DATA_LOSS",
146            Self::Unauthenticated => "UNAUTHENTICATED",
147            Self::HeartbeatLost => "HEARTBEAT_LOST",
148            Self::LeaseExpired => "LEASE_EXPIRED",
149            Self::LeaseRevoked => "LEASE_REVOKED",
150            Self::BackpressureOverflow => "BACKPRESSURE_OVERFLOW",
151            Self::BudgetExhausted => "BUDGET_EXHAUSTED",
152            Self::LeaseSubsetViolation => "LEASE_SUBSET_VIOLATION",
153            Self::AgentVersionNotAvailable => "AGENT_VERSION_NOT_AVAILABLE",
154        }
155    }
156}
157
158impl fmt::Display for ErrorCode {
159    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
160        f.write_str(self.as_str())
161    }
162}
163
164/// In-process error type returned from library APIs.
165///
166/// Maps 1:1 onto the canonical [`ErrorCode`] taxonomy, with extra context on
167/// each variant so call sites can build a structured error envelope (§18.1)
168/// directly. The variants are `#[non_exhaustive]` so the taxonomy can grow
169/// without a breaking change.
170#[derive(Debug, thiserror::Error)]
171#[non_exhaustive]
172#[allow(clippy::upper_case_acronyms)]
173pub enum ARCPError {
174    /// Operation was cancelled by the caller, the runtime, or by policy.
175    #[error("operation cancelled: {reason}")]
176    Cancelled {
177        /// Free-form reason for the cancellation.
178        reason: String,
179    },
180
181    /// Malformed or invalid argument.
182    #[error("invalid argument: {detail}")]
183    InvalidArgument {
184        /// Description of the violated constraint.
185        detail: String,
186    },
187
188    /// Operation timed out before completion.
189    #[error("operation timed out: {detail}")]
190    DeadlineExceeded {
191        /// Description of what timed out.
192        detail: String,
193    },
194
195    /// Referenced entity does not exist.
196    #[error("not found: {kind} (id={id})")]
197    NotFound {
198        /// Kind of entity (e.g. `"job"`, `"artifact"`).
199        kind: &'static str,
200        /// Lookup key as a string.
201        id: String,
202    },
203
204    /// Entity creation conflicted with an existing entity.
205    #[error("already exists: {kind} (id={id})")]
206    AlreadyExists {
207        /// Kind of entity that conflicted.
208        kind: &'static str,
209        /// Lookup key as a string.
210        id: String,
211    },
212
213    /// Caller lacks the required permission or lease.
214    #[error("permission denied: {detail}")]
215    PermissionDenied {
216        /// Description of the missing permission.
217        detail: String,
218    },
219
220    /// Quota or rate limit hit.
221    #[error("resource exhausted: {detail}")]
222    ResourceExhausted {
223        /// Description of the exhausted resource.
224        detail: String,
225        /// Floor for the next attempt, if known (§18.3).
226        retry_after_seconds: Option<u64>,
227    },
228
229    /// Required pre-condition unmet (e.g. job not in cancellable state).
230    #[error("failed precondition: {detail}")]
231    FailedPrecondition {
232        /// Description of the unmet pre-condition.
233        detail: String,
234    },
235
236    /// Concurrency conflict or hard termination.
237    #[error("operation aborted: {detail}")]
238    Aborted {
239        /// Description of the abort cause.
240        detail: String,
241    },
242
243    /// Argument outside the valid range.
244    #[error("argument out of range: {detail}")]
245    OutOfRange {
246        /// Description of the range violation.
247        detail: String,
248    },
249
250    /// Feature not supported by this runtime.
251    #[error("not implemented (RFC §{section}): {detail}")]
252    Unimplemented {
253        /// RFC section reference (e.g. `"10.6"`).
254        section: &'static str,
255        /// Description of the missing surface.
256        detail: String,
257    },
258
259    /// Internal runtime error. Should be rare and indicate a bug.
260    #[error("internal error: {detail}")]
261    Internal {
262        /// Description of the internal failure.
263        detail: String,
264    },
265
266    /// Transient unavailability; retry MAY succeed.
267    #[error("service unavailable: {detail}")]
268    Unavailable {
269        /// Description of the unavailable subsystem.
270        detail: String,
271    },
272
273    /// Unrecoverable data loss or corruption (e.g. retention expired).
274    #[error("data loss: {detail}")]
275    DataLoss {
276        /// Description of what was lost.
277        detail: String,
278    },
279
280    /// Missing or invalid credentials.
281    #[error("unauthenticated: {detail}")]
282    Unauthenticated {
283        /// Description of the auth failure.
284        detail: String,
285    },
286
287    /// Job missed required heartbeats (RFC §10.3).
288    #[error("heartbeat lost: missed_count={missed_count}")]
289    HeartbeatLost {
290        /// How many consecutive heartbeats were missed.
291        missed_count: u32,
292    },
293
294    /// Operation attempted with an expired lease (RFC §15.5).
295    #[error("lease expired: lease_id={lease_id}")]
296    LeaseExpired {
297        /// The expired lease.
298        lease_id: LeaseId,
299    },
300
301    /// Operation attempted with a revoked lease (RFC §15.5).
302    #[error("lease revoked: lease_id={lease_id} (reason={reason})")]
303    LeaseRevoked {
304        /// The revoked lease.
305        lease_id: LeaseId,
306        /// Reason supplied by the grantor.
307        reason: String,
308    },
309
310    /// Subscription or stream dropped due to backpressure overflow.
311    #[error("backpressure overflow: {detail}")]
312    BackpressureOverflow {
313        /// Description of the overflowing channel.
314        detail: String,
315    },
316
317    /// A `cost.budget` capability counter reached its maximum (ARCP v1.1 §9.6).
318    #[error("budget exhausted: {detail}")]
319    BudgetExhausted {
320        /// Description of the exhausted budget counter.
321        detail: String,
322    },
323
324    /// A delegated or child lease attempted to exceed its parent envelope.
325    #[error("lease subset violation: {detail}")]
326    LeaseSubsetViolation {
327        /// Description of the violated lease axis.
328        detail: String,
329    },
330
331    /// `job.submit` named an `agent@version` the runtime does not have (ARCP v1.1 §7.5).
332    #[error("agent version not available: {agent}@{version}")]
333    AgentVersionNotAvailable {
334        /// Agent name.
335        agent: String,
336        /// Requested version.
337        version: String,
338    },
339
340    /// Unknown error. Avoid in favour of a specific code.
341    #[error("unknown error: {detail}")]
342    Unknown {
343        /// Description of the failure.
344        detail: String,
345    },
346
347    /// JSON serialisation / deserialisation failure at the wire boundary.
348    #[error("serialisation error: {0}")]
349    Serialization(#[from] serde_json::Error),
350
351    /// Persistent storage failure (event log, artifact store). Carries a
352    /// stringified description of the underlying driver error so this crate
353    /// stays free of storage-backend dependencies.
354    #[error("storage error: {detail}")]
355    Storage {
356        /// Stringified underlying error.
357        detail: String,
358    },
359
360    /// Identifier failed to parse on a wire boundary.
361    #[error("id parse error: {0}")]
362    Id(#[from] IdParseError),
363}
364
365impl ARCPError {
366    /// Map this in-process error to its canonical [`ErrorCode`].
367    #[must_use]
368    pub const fn code(&self) -> ErrorCode {
369        match self {
370            Self::Cancelled { .. } => ErrorCode::Cancelled,
371            Self::InvalidArgument { .. } | Self::Id(_) => ErrorCode::InvalidArgument,
372            Self::DeadlineExceeded { .. } => ErrorCode::DeadlineExceeded,
373            Self::NotFound { .. } => ErrorCode::NotFound,
374            Self::AlreadyExists { .. } => ErrorCode::AlreadyExists,
375            Self::PermissionDenied { .. } => ErrorCode::PermissionDenied,
376            Self::ResourceExhausted { .. } => ErrorCode::ResourceExhausted,
377            Self::FailedPrecondition { .. } => ErrorCode::FailedPrecondition,
378            Self::Aborted { .. } => ErrorCode::Aborted,
379            Self::OutOfRange { .. } => ErrorCode::OutOfRange,
380            Self::Unimplemented { .. } => ErrorCode::Unimplemented,
381            Self::Internal { .. } | Self::Storage { .. } => ErrorCode::Internal,
382            Self::Unavailable { .. } => ErrorCode::Unavailable,
383            Self::DataLoss { .. } => ErrorCode::DataLoss,
384            Self::Unauthenticated { .. } => ErrorCode::Unauthenticated,
385            Self::HeartbeatLost { .. } => ErrorCode::HeartbeatLost,
386            Self::LeaseExpired { .. } => ErrorCode::LeaseExpired,
387            Self::LeaseRevoked { .. } => ErrorCode::LeaseRevoked,
388            Self::BackpressureOverflow { .. } => ErrorCode::BackpressureOverflow,
389            Self::BudgetExhausted { .. } => ErrorCode::BudgetExhausted,
390            Self::LeaseSubsetViolation { .. } => ErrorCode::LeaseSubsetViolation,
391            Self::AgentVersionNotAvailable { .. } => ErrorCode::AgentVersionNotAvailable,
392            Self::Unknown { .. } | Self::Serialization(_) => ErrorCode::Unknown,
393        }
394    }
395
396    /// Convenience: return the §18.3 default retryability for this error.
397    #[must_use]
398    pub const fn retryable(&self) -> bool {
399        self.code().retryable()
400    }
401}
402
403#[cfg(test)]
404#[allow(
405    clippy::expect_used,
406    clippy::unwrap_used,
407    clippy::panic,
408    clippy::missing_panics_doc
409)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn error_code_round_trips_through_serde() {
415        for code in [
416            ErrorCode::Ok,
417            ErrorCode::Cancelled,
418            ErrorCode::InvalidArgument,
419            ErrorCode::DeadlineExceeded,
420            ErrorCode::NotFound,
421            ErrorCode::AlreadyExists,
422            ErrorCode::PermissionDenied,
423            ErrorCode::ResourceExhausted,
424            ErrorCode::FailedPrecondition,
425            ErrorCode::Aborted,
426            ErrorCode::OutOfRange,
427            ErrorCode::Unimplemented,
428            ErrorCode::Internal,
429            ErrorCode::Unavailable,
430            ErrorCode::DataLoss,
431            ErrorCode::Unauthenticated,
432            ErrorCode::HeartbeatLost,
433            ErrorCode::LeaseExpired,
434            ErrorCode::LeaseRevoked,
435            ErrorCode::BackpressureOverflow,
436            ErrorCode::BudgetExhausted,
437            ErrorCode::LeaseSubsetViolation,
438            ErrorCode::AgentVersionNotAvailable,
439            ErrorCode::Unknown,
440        ] {
441            let s = serde_json::to_string(&code).expect("serialize");
442            let back: ErrorCode = serde_json::from_str(&s).expect("deserialize");
443            assert_eq!(code, back, "round-trip for {code}");
444            assert_eq!(s.trim_matches('"'), code.as_str());
445        }
446    }
447
448    #[test]
449    fn rate_limited_alias_decodes_to_resource_exhausted() {
450        let code: ErrorCode = serde_json::from_str("\"RATE_LIMITED\"").expect("alias");
451        assert_eq!(code, ErrorCode::ResourceExhausted);
452    }
453
454    #[test]
455    fn retryability_matches_rfc_18_3() {
456        // Retryable by default
457        for c in [
458            ErrorCode::ResourceExhausted,
459            ErrorCode::Unavailable,
460            ErrorCode::DeadlineExceeded,
461            ErrorCode::Internal,
462            ErrorCode::Aborted,
463        ] {
464            assert!(c.retryable(), "{c} should be retryable");
465        }
466        // Non-retryable by default
467        for c in [
468            ErrorCode::InvalidArgument,
469            ErrorCode::NotFound,
470            ErrorCode::AlreadyExists,
471            ErrorCode::PermissionDenied,
472            ErrorCode::FailedPrecondition,
473            ErrorCode::Unimplemented,
474            ErrorCode::Unauthenticated,
475            ErrorCode::DataLoss,
476            ErrorCode::LeaseSubsetViolation,
477        ] {
478            assert!(!c.retryable(), "{c} should NOT be retryable");
479        }
480    }
481
482    #[test]
483    fn arcp_error_maps_to_canonical_code() {
484        let err = ARCPError::PermissionDenied {
485            detail: "missing lease".into(),
486        };
487        assert_eq!(err.code(), ErrorCode::PermissionDenied);
488        assert!(!err.retryable());
489    }
490
491    #[test]
492    fn id_parse_error_propagates_via_from() {
493        let parse_err: IdParseError = "junk".parse::<crate::ids::SessionId>().unwrap_err();
494        let err: ARCPError = parse_err.into();
495        assert_eq!(err.code(), ErrorCode::InvalidArgument);
496    }
497
498    #[test]
499    fn v1_1_error_codes_serialize_to_wire_strings() {
500        assert_eq!(ErrorCode::BudgetExhausted.as_str(), "BUDGET_EXHAUSTED");
501        assert_eq!(ErrorCode::LeaseExpired.as_str(), "LEASE_EXPIRED");
502        assert_eq!(
503            ErrorCode::LeaseSubsetViolation.as_str(),
504            "LEASE_SUBSET_VIOLATION"
505        );
506        assert_eq!(
507            ErrorCode::AgentVersionNotAvailable.as_str(),
508            "AGENT_VERSION_NOT_AVAILABLE"
509        );
510        assert_eq!(
511            serde_json::to_string(&ErrorCode::BudgetExhausted).expect("serialize"),
512            "\"BUDGET_EXHAUSTED\""
513        );
514        assert_eq!(
515            serde_json::to_string(&ErrorCode::AgentVersionNotAvailable).expect("serialize"),
516            "\"AGENT_VERSION_NOT_AVAILABLE\""
517        );
518        let budget: ErrorCode =
519            serde_json::from_str("\"BUDGET_EXHAUSTED\"").expect("deserialize budget");
520        assert_eq!(budget, ErrorCode::BudgetExhausted);
521        let subset: ErrorCode =
522            serde_json::from_str("\"LEASE_SUBSET_VIOLATION\"").expect("deserialize subset");
523        assert_eq!(subset, ErrorCode::LeaseSubsetViolation);
524        let agent_ver: ErrorCode = serde_json::from_str("\"AGENT_VERSION_NOT_AVAILABLE\"")
525            .expect("deserialize agent version");
526        assert_eq!(agent_ver, ErrorCode::AgentVersionNotAvailable);
527    }
528
529    #[test]
530    fn v1_1_arcp_errors_map_to_canonical_codes() {
531        let budget = ARCPError::BudgetExhausted {
532            detail: "cost.budget USD counter <= 0".into(),
533        };
534        assert_eq!(budget.code(), ErrorCode::BudgetExhausted);
535        assert!(!budget.retryable());
536
537        let subset = ARCPError::LeaseSubsetViolation {
538            detail: "model.use widened".into(),
539        };
540        assert_eq!(subset.code(), ErrorCode::LeaseSubsetViolation);
541        assert!(!subset.retryable());
542
543        let agent_ver = ARCPError::AgentVersionNotAvailable {
544            agent: "summarizer".into(),
545            version: "2.3.0".into(),
546        };
547        assert_eq!(agent_ver.code(), ErrorCode::AgentVersionNotAvailable);
548        assert!(!agent_ver.retryable());
549    }
550
551    #[test]
552    fn serde_error_propagates_via_from() {
553        let parse: Result<serde_json::Value, _> = serde_json::from_str("not-json");
554        let err: ARCPError = parse.unwrap_err().into();
555        assert_eq!(err.code(), ErrorCode::Unknown);
556    }
557}