Skip to main content

ff_core/
engine_error.rs

1//! Typed engine-error surface (issue #58.6).
2//!
3//! **RFC-012 Stage 1a:** moved from `ff-sdk::engine_error` to
4//! `ff-core::engine_error` so it becomes nameable by the
5//! `EngineBackend` trait (which lives in `ff-core::engine_backend`) without
6//! forcing a public-surface dependency from ff-core on ff-script. The
7//! [`ScriptError`]-aware helpers (`From<ScriptError>`, `valkey_kind`,
8//! `transport_script`, `transport_script_ref`) live in ff-script as
9//! free functions (see `ff_script::engine_error_ext`) — ff-core owns
10//! the enum shapes; ff-script owns the transport-downcast plumbing.
11//!
12//! # Mapping shape
13//!
14//! `ScriptError` lives in the `ff-script` crate (transport-adjacent).
15//! `EngineError` lives here in `ff-core` and is what public SDK calls
16//! return via `ff_sdk::SdkError::Engine`. The bidirectional mapping:
17//!
18//! * `From<ScriptError> for EngineError` — every `ScriptError` variant
19//!   is classified into `NotFound` / `Validation` / `Contention` /
20//!   `Conflict` / `State` / `Bug` / `Transport`. `Parse` + `Valkey`
21//!   flow through `Transport { source: Box<ScriptError> }` so the
22//!   underlying `ferriskey::ErrorKind` / parse detail is preserved.
23//! * `DependencyAlreadyExists` is special: per the #58.6 design the
24//!   variant carries the pre-existing [`EdgeSnapshot`] inline.
25//!   Populating that field requires an extra round-trip (the Lua
26//!   script only knows the edge_id), so plain `From<ScriptError>`
27//!   returns a `Transport` fallback for that code — callers in the
28//!   `stage_dependency` path use `ff_sdk::engine_error::enrich_dependency_conflict`
29//!   to perform the follow-up `describe_edge` and upgrade the error
30//!   before returning.
31//!
32//! # Exhaustiveness
33//!
34//! The top-level [`EngineError`] and every sub-kind are
35//! `#[non_exhaustive]`. FF can add new Lua error codes in minors
36//! without a breaking change to this surface — consumers that
37//! `match` on a sub-kind must include a `_` arm.
38
39use crate::error::ErrorClass;
40
41/// Typed engine-error surface. See module docs.
42#[derive(Debug, thiserror::Error)]
43#[non_exhaustive]
44pub enum EngineError {
45    /// A uniquely-identified resource did not exist. `entity` is a
46    /// stable label (e.g. `"execution"`, `"flow"`, `"attempt"`) that
47    /// consumers can match without re-parsing a message.
48    #[error("not found: {entity}")]
49    NotFound { entity: &'static str },
50
51    /// Caller supplied a malformed, out-of-range, or otherwise
52    /// rejected input. `detail` carries the Lua-side payload (field
53    /// name, offending value, or CSV of missing tokens, depending on
54    /// `kind`).
55    #[error("validation: {kind:?}: {detail}")]
56    Validation {
57        kind: ValidationKind,
58        detail: String,
59    },
60
61    /// Transient conflict with another worker or with the current
62    /// state of the execution/flow. Caller should retry per
63    /// RFC-010 §10.7.
64    #[error("contention: {0:?}")]
65    Contention(ContentionKind),
66
67    /// Permanent conflict — the requested mutation conflicts with
68    /// an existing record (e.g. duplicate edge, cycle, already-in-flow).
69    /// Caller must not blindly retry.
70    #[error("conflict: {0:?}")]
71    Conflict(ConflictKind),
72
73    /// Legal but surprising state — lease expired, already-suspended,
74    /// duplicate-signal, budget-exceeded, etc. Per-variant semantics
75    /// documented on [`StateKind`].
76    #[error("state: {0:?}")]
77    State(StateKind),
78
79    /// FF-internal invariant violation that should not be reachable
80    /// in a correctly-behaving deployment. Consumers typically log
81    /// and surface as a 5xx.
82    #[error("bug: {0:?}")]
83    Bug(BugKind),
84
85    /// Backend transport fault or response-parse failure (RFC-012 §4.2
86    /// round-4 shape). Broadened in Stage 0 to carry `Box<dyn Error>`
87    /// so non-Valkey backends (Postgres, future) can route their
88    /// native transport errors through this variant without going via
89    /// `ScriptError`.
90    ///
91    /// * `backend` — static diagnostic label (`"valkey"`, `"postgres"`,
92    ///   etc.). Kept `&'static str` to avoid heap alloc on construction.
93    /// * `source` — boxed error. For the Valkey backend this is
94    ///   `ff_script::error::ScriptError`; downcast with
95    ///   `source.downcast_ref::<ScriptError>()` to recover
96    ///   `ferriskey::ErrorKind` / parse detail. Helper lives in
97    ///   `ff_script::engine_error_ext::transport_script_ref`.
98    #[error("transport ({backend}): {source}")]
99    Transport {
100        backend: &'static str,
101        #[source]
102        source: Box<dyn std::error::Error + Send + Sync + 'static>,
103    },
104
105    /// Backend method not wired up yet (RFC-012 §4.2 K#7 holdover).
106    /// Returned by staged backend impls for methods that are known
107    /// types in the trait but not yet implemented. Graceful degradation
108    /// in place of `unimplemented!()` panics. Additive; does not
109    /// participate in the `From<ScriptError>` mapping.
110    #[error("unavailable: {op}")]
111    Unavailable { op: &'static str },
112
113    /// An inner [`EngineError`] wrapped with a call-site label so
114    /// operators triaging logs can see which op the error came from
115    /// without inferring from surrounding spans. Constructed via
116    /// [`backend_context`]; carries a lightweight string context
117    /// (e.g. `"renew: FCALL ff_renew_lease"`).
118    ///
119    /// Classification helpers (`ErrorClass`, `BackendErrorKind`,
120    /// etc.) transparently descend into `source` so a consumer that
121    /// matches on the wrapper arm keeps the same retry/terminal
122    /// semantics as the unwrapped inner error.
123    #[error("{context}: {source}")]
124    Contextual {
125        #[source]
126        source: Box<EngineError>,
127        context: String,
128    },
129}
130
131/// Wrap an [`EngineError`] with a call-site label when the error is
132/// a transport-family fault — `Transport` or `Unavailable`. Typed
133/// classifications (`NotFound`, `Validation`, `Contention`,
134/// `Conflict`, `State`, `Bug`) form the public contract boundary
135/// for consumers that `match` on the variant, so we return them
136/// unchanged. Repeated wraps on an already-`Contextual` error
137/// nest an additional layer; callers should wrap once per op
138/// boundary.
139///
140/// Promoted to ff-core so `ff-backend-valkey` can annotate its
141/// `EngineBackend` impls with the same context shape ff-sdk's
142/// snapshot helpers use (issue #154).
143pub fn backend_context(err: EngineError, context: impl Into<String>) -> EngineError {
144    match err {
145        EngineError::Transport { .. }
146        | EngineError::Unavailable { .. }
147        | EngineError::Contextual { .. } => EngineError::Contextual {
148            source: Box::new(err),
149            context: context.into(),
150        },
151        // Typed classifications are part of the public contract;
152        // wrapping them would break `match` call sites that inspect
153        // the inner variant (e.g. tests asserting
154        // `EngineError::Validation { kind: Corruption, .. }`).
155        other => other,
156    }
157}
158
159/// Validation sub-kinds. 1:1 with the Lua validation codes.
160#[derive(Debug, Clone, PartialEq, Eq)]
161#[non_exhaustive]
162pub enum ValidationKind {
163    /// Generic caller-supplied input rejected (field-name detail).
164    InvalidInput,
165    /// Worker caps do not satisfy execution's required_capabilities.
166    /// `detail` is the sorted-CSV of missing tokens.
167    CapabilityMismatch,
168    /// Malformed/oversized capability list.
169    InvalidCapabilities,
170    /// `policy_json` not valid JSON or structurally wrong.
171    InvalidPolicyJson,
172    /// Signal payload > 64KB.
173    PayloadTooLarge,
174    /// Max signals per execution reached.
175    SignalLimitExceeded,
176    /// MAC verification failed on waitpoint_key.
177    InvalidWaitpointKey,
178    /// Pending waitpoint has no HMAC token field.
179    WaitpointNotTokenBound,
180    /// Frame > 64KB.
181    RetentionLimitExceeded,
182    /// Lease/attempt binding mismatch on suspend.
183    InvalidLeaseForSuspend,
184    /// Dependency edge not found / invalid dependency ref.
185    InvalidDependency,
186    /// Waitpoint/execution binding mismatch.
187    InvalidWaitpointForExecution,
188    /// Unrecognized blocking reason.
189    InvalidBlockingReason,
190    /// Invalid stream ID offset.
191    InvalidOffset,
192    /// Auth failed.
193    Unauthorized,
194    /// Budget scope malformed.
195    InvalidBudgetScope,
196    /// Operator privileges required.
197    BudgetOverrideNotAllowed,
198    /// Malformed quota definition.
199    InvalidQuotaSpec,
200    /// Rotation kid must be non-empty and dot-free.
201    InvalidKid,
202    /// Rotation secret must be non-empty even-length hex.
203    InvalidSecretHex,
204    /// Rotation grace_ms must be a non-negative integer.
205    InvalidGraceMs,
206    /// Tag key violates reserved-namespace rule.
207    InvalidTagKey,
208    /// Unrecognized stream frame type.
209    InvalidFrameType,
210    /// On-disk corruption or protocol drift: an engine-owned hash /
211    /// key returned a field shape the decoder could not parse (missing
212    /// required field, malformed timestamp, unknown extra field,
213    /// cross-field identity mismatch, etc.). `detail` carries the
214    /// decoder's diagnostic string — the specific field name and/or
215    /// offending value — in the form
216    /// `"<context>: <field?>: <message>"` so operators can locate the
217    /// bad key without reparsing.
218    ///
219    /// Classified as `Terminal`: a consumer retrying the read will
220    /// see the same bytes. Surface to the operator; do not loop.
221    Corruption,
222}
223
224/// Contention sub-kinds (retryable per RFC-010 §10.7). Caller should
225/// re-dispatch or re-read and retry.
226#[derive(Debug, Clone, PartialEq, Eq)]
227#[non_exhaustive]
228pub enum ContentionKind {
229    /// Re-dispatch to `claim_resumed_execution`.
230    UseClaimResumedExecution,
231    /// Re-dispatch to `claim_execution`.
232    NotAResumedExecution,
233    /// State changed since grant. Request new grant.
234    ExecutionNotLeaseable,
235    /// Another worker holds lease. Request a different execution.
236    LeaseConflict,
237    /// Grant missing/mismatched. Request new grant.
238    InvalidClaimGrant,
239    /// Grant TTL elapsed. Request new grant.
240    ClaimGrantExpired,
241    /// No execution currently available.
242    NoEligibleExecution,
243    /// Waitpoint may not exist yet. Retry with backoff.
244    WaitpointNotFound,
245    /// Route to buffer_signal_for_pending_waitpoint.
246    WaitpointPendingUseBufferScript,
247    /// Graph revision changed. Re-read adjacency, retry.
248    StaleGraphRevision,
249    /// Execution is not in `active` state (lease superseded, etc.)
250    /// Carries the Lua-side detail payload for replay reconciliation.
251    ExecutionNotActive {
252        terminal_outcome: String,
253        lease_epoch: String,
254        lifecycle_phase: String,
255        attempt_id: String,
256    },
257    /// State changed. Scheduler skips.
258    ExecutionNotEligible,
259    /// Removed by another scheduler.
260    ExecutionNotInEligibleSet,
261    /// Already reclaimed/cancelled. Skip.
262    ExecutionNotReclaimable,
263    /// Target has no active lease (already revoked/expired/unowned).
264    NoActiveLease,
265    /// Window full; caller should backoff `retry_after_ms`.
266    RateLimitExceeded,
267    /// Concurrency cap hit.
268    ConcurrencyLimitExceeded,
269}
270
271/// Permanent conflict sub-kinds. Caller must reconcile rather than
272/// retry.
273#[derive(Debug, Clone, PartialEq, Eq)]
274#[non_exhaustive]
275pub enum ConflictKind {
276    /// Dependency edge already exists. Carries the pre-existing
277    /// [`EdgeSnapshot`] so callers implementing "409 on re-declare
278    /// with different kind/ref" don't need a follow-up read.
279    ///
280    /// Note: the plain `From<ScriptError> for EngineError` impl
281    /// cannot populate `existing` (that requires an async
282    /// `describe_edge` round trip), so it falls through to
283    /// `EngineError::Transport`. Callers on the `stage_dependency`
284    /// path use `ff_sdk::engine_error::enrich_dependency_conflict`
285    /// to perform the follow-up read and promote the error.
286    ///
287    /// [`EdgeSnapshot`]: crate::contracts::EdgeSnapshot
288    DependencyAlreadyExists {
289        existing: crate::contracts::EdgeSnapshot,
290    },
291    /// Edge would create a cycle.
292    CycleDetected,
293    /// Self-referencing edge (upstream == downstream).
294    SelfReferencingEdge,
295    /// Execution is already a member of another flow.
296    ExecutionAlreadyInFlow,
297    /// Waitpoint already exists (pending or active).
298    WaitpointAlreadyExists,
299    /// Budget already attached or conflicts.
300    BudgetAttachConflict,
301    /// Quota policy already attached.
302    QuotaAttachConflict,
303    /// Rotation: same kid already installed with a different secret.
304    /// String is the conflicting kid.
305    RotationConflict(String),
306    /// Invariant violation: active attempt already exists where one
307    /// was expected absent.
308    ActiveAttemptExists,
309}
310
311/// Legal-but-surprising state sub-kinds. Per-variant semantics vary
312/// (some are benign no-ops, some are terminal). Consult the RFC-010
313/// §10.7 classification table.
314#[derive(Debug, Clone, PartialEq, Eq)]
315#[non_exhaustive]
316pub enum StateKind {
317    /// Lease superseded by reclaim.
318    StaleLease,
319    /// Lease TTL elapsed.
320    LeaseExpired,
321    /// Operator revoked lease.
322    LeaseRevoked,
323    /// Already resumed/cancelled. No-op.
324    ExecutionNotSuspended,
325    /// Open suspension already active. No-op.
326    AlreadySuspended,
327    /// Signal too late — waitpoint already closed.
328    WaitpointClosed,
329    /// Execution not suspended; no valid signal target.
330    TargetNotSignalable,
331    /// Signal already delivered (dedup).
332    DuplicateSignal,
333    /// Resume conditions not satisfied.
334    ResumeConditionNotMet,
335    /// Waitpoint not in pending state.
336    WaitpointNotPending,
337    /// Pending waitpoint aged out before suspension committed.
338    PendingWaitpointExpired,
339    /// Waitpoint is not in an open state.
340    WaitpointNotOpen,
341    /// Cannot replay non-terminal execution.
342    ExecutionNotTerminal,
343    /// Replay limit reached.
344    MaxReplaysExhausted,
345    /// Attempt terminal; no appends.
346    StreamClosed,
347    /// Lease mismatch on stream append.
348    StaleOwnerCannotAppend,
349    /// Grant already issued. Skip.
350    GrantAlreadyExists,
351    /// Execution not in specified flow.
352    ExecutionNotInFlow,
353    /// Flow already in terminal state.
354    FlowAlreadyTerminal,
355    /// Dependencies not yet satisfied.
356    DepsNotSatisfied,
357    /// Not blocked by dependencies.
358    NotBlockedByDeps,
359    /// Execution not runnable.
360    NotRunnable,
361    /// Execution already terminal.
362    Terminal,
363    /// Hard budget limit reached.
364    BudgetExceeded,
365    /// Soft budget limit reached (warning; continue).
366    BudgetSoftExceeded,
367    /// Usage seq already processed. No-op.
368    OkAlreadyApplied,
369    /// Attempt not in started state.
370    AttemptNotStarted,
371    /// Attempt already ended. No-op.
372    AttemptAlreadyTerminal,
373    /// Wrong state for new attempt.
374    ExecutionNotEligibleForAttempt,
375    /// Execution not terminal or replay limit reached.
376    ReplayNotAllowed,
377    /// Retry limit reached.
378    MaxRetriesExhausted,
379    /// Already closed. No-op.
380    StreamAlreadyClosed,
381    /// RFC-013 Stage 1d — strict `suspend` path refuses the
382    /// early-satisfied branch. The underlying backend outcome is
383    /// [`crate::contracts::SuspendOutcome::AlreadySatisfied`]; only the
384    /// SDK's strict `ClaimedTask::suspend` wrapper maps it to this
385    /// error. `ClaimedTask::try_suspend` returns the outcome directly.
386    AlreadySatisfied,
387}
388
389/// FF-internal invariant-violation sub-kinds. Should not be reachable
390/// in a correctly-behaving deployment.
391#[derive(Debug, Clone, PartialEq, Eq)]
392#[non_exhaustive]
393pub enum BugKind {
394    /// `attempt_not_in_created_state`: internal sequencing error.
395    AttemptNotInCreatedState,
396}
397
398/// Backend-agnostic transport error carried across public
399/// ff-sdk / ff-server error surfaces (#88).
400///
401/// The `Valkey` variant is the only one populated today; additional
402/// variants (e.g. `Postgres`) will be added additively as other
403/// backends land. The enum is `#[non_exhaustive]` so consumers must
404/// include a wildcard arm.
405///
406/// Construction from the Valkey-native `ferriskey::Error` lives in
407/// `ff_backend_valkey::backend_error_from_ferriskey` — keeping that
408/// conversion outside ff-core preserves ff-core's ferriskey-free
409/// public surface.
410#[derive(Debug, Clone, thiserror::Error)]
411#[non_exhaustive]
412pub enum BackendError {
413    /// Valkey-backend transport failure. Carries a backend-agnostic
414    /// classification plus the backend-rendered message so downstream
415    /// consumers can inspect without depending on ferriskey.
416    #[error("valkey backend: {kind:?}: {message}")]
417    Valkey {
418        kind: BackendErrorKind,
419        message: String,
420    },
421}
422
423impl BackendError {
424    /// Returns the classified backend kind if this error is a Valkey
425    /// transport fault. Forward-compatible with future backends:
426    /// non-Valkey variants return `None` on a call that names only the
427    /// Valkey kind; code that wants a backend-specific view should
428    /// match directly on [`BackendError`].
429    pub fn kind(&self) -> BackendErrorKind {
430        match self {
431            Self::Valkey { kind, .. } => *kind,
432        }
433    }
434
435    /// Return the backend-rendered message payload.
436    pub fn message(&self) -> &str {
437        match self {
438            Self::Valkey { message, .. } => message.as_str(),
439        }
440    }
441}
442
443/// Classified backend transport errors, kept backend-agnostic on
444/// purpose (#88). Each variant maps a family of native backend error
445/// kinds into a stable, consumer-matchable shape.
446///
447/// Consumers requiring the exact native kind for a Valkey backend
448/// must go through `ff_backend_valkey` explicitly; ff-sdk/ff-server's
449/// public surface will only ever hand out [`BackendErrorKind`].
450#[derive(Debug, Clone, Copy, PartialEq, Eq)]
451#[non_exhaustive]
452pub enum BackendErrorKind {
453    /// Network / I/O failure: the request may or may not have been
454    /// processed. Typically retryable with backoff.
455    Transport,
456    /// Backend rejected the request on protocol / parse grounds. Not
457    /// retryable without a fix.
458    Protocol,
459    /// Backend timed out responding to the request. Retryable.
460    Timeout,
461    /// Authentication / authorization failure. Not retryable.
462    Auth,
463    /// Cluster topology churn (MOVED, ASK, CLUSTERDOWN, MasterDown,
464    /// CrossSlot, ConnectionNotFoundForRoute, AllConnectionsUnavailable).
465    /// Retryable after topology settles.
466    Cluster,
467    /// Backend is temporarily busy loading state (e.g. Valkey
468    /// `LOADING`). Retryable.
469    BusyLoading,
470    /// Backend indicates the referenced script/function does not
471    /// exist. Typically handled by the caller via re-load.
472    ScriptNotLoaded,
473    /// Any other classified error from the backend. Fallback bucket
474    /// for native kinds outside the curated set above.
475    Other,
476}
477
478impl BackendErrorKind {
479    /// Stable, lowercase-kebab label suitable for log fields / HTTP
480    /// `kind` body slots. Guaranteed not to change across releases
481    /// for the existing variants.
482    pub fn as_stable_str(&self) -> &'static str {
483        match self {
484            Self::Transport => "transport",
485            Self::Protocol => "protocol",
486            Self::Timeout => "timeout",
487            Self::Auth => "auth",
488            Self::Cluster => "cluster",
489            Self::BusyLoading => "busy_loading",
490            Self::ScriptNotLoaded => "script_not_loaded",
491            Self::Other => "other",
492        }
493    }
494
495    /// Whether a caller should consider this kind retryable with
496    /// backoff. Conservative — auth + protocol + other are terminal.
497    pub fn is_retryable(&self) -> bool {
498        matches!(
499            self,
500            Self::Transport | Self::Timeout | Self::Cluster | Self::BusyLoading
501        )
502    }
503}
504
505impl EngineError {
506    /// Classify an [`EngineError`] using the underlying
507    /// [`ErrorClass`] table.
508    ///
509    /// **Transport classification in ff-core:** the inner source is
510    /// `Box<dyn std::error::Error>` which ff-core cannot downcast
511    /// without naming `ScriptError`. ff-core returns `Terminal` for
512    /// every `Transport` variant by default. Callers needing the
513    /// Retryable-on-transient-Valkey-error classification use
514    /// `ff_script::engine_error_ext::class` which downcasts to
515    /// `ScriptError` and delegates to `ScriptError::class`. ff-sdk's
516    /// public `SdkError::is_retryable` / `backend_kind` methods wire
517    /// the ff-script helper in so consumers retain the Phase-1
518    /// behavior transparently. (`backend_kind` was renamed from
519    /// `valkey_kind` in #88.)
520    pub fn class(&self) -> ErrorClass {
521        match self {
522            Self::NotFound { .. } => ErrorClass::Terminal,
523            Self::Validation { .. } => ErrorClass::Terminal,
524            Self::Contention(_) => ErrorClass::Retryable,
525            Self::Conflict(_) => ErrorClass::Terminal,
526            Self::State(StateKind::BudgetExceeded) => ErrorClass::Cooperative,
527            Self::State(
528                StateKind::ExecutionNotSuspended
529                | StateKind::AlreadySuspended
530                | StateKind::AlreadySatisfied
531                | StateKind::WaitpointClosed
532                | StateKind::DuplicateSignal
533                | StateKind::GrantAlreadyExists
534                | StateKind::OkAlreadyApplied
535                | StateKind::AttemptAlreadyTerminal
536                | StateKind::StreamAlreadyClosed
537                | StateKind::BudgetSoftExceeded
538                | StateKind::WaitpointNotOpen
539                | StateKind::WaitpointNotPending
540                | StateKind::PendingWaitpointExpired
541                | StateKind::NotBlockedByDeps
542                | StateKind::DepsNotSatisfied,
543            ) => ErrorClass::Informational,
544            Self::State(_) => ErrorClass::Terminal,
545            Self::Bug(_) => ErrorClass::Bug,
546            // ff-core cannot name ScriptError. Safe default: Terminal.
547            // ff-script's engine_error_ext::class upgrades to
548            // ScriptError::class when the inner source is a
549            // ScriptError.
550            Self::Transport { .. } => ErrorClass::Terminal,
551            // Unavailable is terminal at the call site — the method is
552            // not implemented; the caller must either fall back to a
553            // different code path or surface to the user.
554            Self::Unavailable { .. } => ErrorClass::Terminal,
555            // Descend into the wrapped error — context is diagnostic;
556            // classification follows the inner cause.
557            Self::Contextual { source, .. } => source.class(),
558        }
559    }
560}
561
562#[cfg(test)]
563mod tests {
564    use super::*;
565
566    #[test]
567    fn class_contention_is_retryable() {
568        let err = EngineError::Contention(ContentionKind::LeaseConflict);
569        assert_eq!(err.class(), ErrorClass::Retryable);
570    }
571
572    #[test]
573    fn class_budget_exceeded_is_cooperative() {
574        let err = EngineError::State(StateKind::BudgetExceeded);
575        assert_eq!(err.class(), ErrorClass::Cooperative);
576    }
577
578    #[test]
579    fn class_duplicate_signal_is_informational() {
580        let err = EngineError::State(StateKind::DuplicateSignal);
581        assert_eq!(err.class(), ErrorClass::Informational);
582    }
583
584    #[test]
585    fn class_bug_variant() {
586        let err = EngineError::Bug(BugKind::AttemptNotInCreatedState);
587        assert_eq!(err.class(), ErrorClass::Bug);
588    }
589
590    #[test]
591    fn class_transport_defaults_terminal() {
592        // ff-core has no ScriptError downcast; Transport is Terminal
593        // until ff-script's engine_error_ext::class is called.
594        let raw = std::io::Error::other("simulated transport error");
595        let err = EngineError::Transport {
596            backend: "test",
597            source: Box::new(raw),
598        };
599        assert_eq!(err.class(), ErrorClass::Terminal);
600    }
601
602    #[test]
603    fn unavailable_is_terminal() {
604        assert_eq!(
605            EngineError::Unavailable { op: "foo" }.class(),
606            ErrorClass::Terminal
607        );
608    }
609
610    #[test]
611    fn backend_context_wraps_transport_and_preserves_typed() {
612        // Transport gets wrapped with the call-site label (issue #154).
613        let raw = std::io::Error::other("simulated transport error");
614        let wrapped = backend_context(
615            EngineError::Transport {
616                backend: "valkey",
617                source: Box::new(raw),
618            },
619            "renew: FCALL ff_renew_lease",
620        );
621        let rendered = format!("{wrapped}");
622        assert!(
623            rendered.starts_with("renew: FCALL ff_renew_lease: transport (valkey): "),
624            "expected context prefix, got: {rendered}"
625        );
626        // Unavailable also wraps so callers can still filter on the op.
627        let wrapped = backend_context(EngineError::Unavailable { op: "x" }, "ctx");
628        assert!(matches!(wrapped, EngineError::Contextual { .. }));
629
630        // Typed classifications pass through unchanged so existing
631        // `match` call sites keep working.
632        let inner = EngineError::Validation {
633            kind: ValidationKind::Corruption,
634            detail: "bad".into(),
635        };
636        let passthrough = backend_context(inner, "describe_edge: HGETALL edge");
637        match passthrough {
638            EngineError::Validation { kind, .. } => {
639                assert_eq!(kind, ValidationKind::Corruption);
640            }
641            other => panic!("expected Validation, got {other:?}"),
642        }
643        let inner = EngineError::Contention(ContentionKind::LeaseConflict);
644        assert_eq!(
645            backend_context(inner, "renew: FCALL ff_renew_lease").class(),
646            ErrorClass::Retryable
647        );
648    }
649
650    #[test]
651    fn backend_error_kind_round_trip() {
652        let be = BackendError::Valkey {
653            kind: BackendErrorKind::Transport,
654            message: "connection reset".into(),
655        };
656        assert_eq!(be.kind(), BackendErrorKind::Transport);
657        assert_eq!(be.message(), "connection reset");
658    }
659
660    #[test]
661    fn backend_kind_stable_strings_fixed() {
662        // Stability fence: these strings are part of the public
663        // contract (log field values, HTTP body `kind` slots). Adding
664        // a variant is additive; changing an existing string is a
665        // break.
666        assert_eq!(BackendErrorKind::Transport.as_stable_str(), "transport");
667        assert_eq!(BackendErrorKind::Protocol.as_stable_str(), "protocol");
668        assert_eq!(BackendErrorKind::Timeout.as_stable_str(), "timeout");
669        assert_eq!(BackendErrorKind::Auth.as_stable_str(), "auth");
670        assert_eq!(BackendErrorKind::Cluster.as_stable_str(), "cluster");
671        assert_eq!(
672            BackendErrorKind::BusyLoading.as_stable_str(),
673            "busy_loading"
674        );
675        assert_eq!(
676            BackendErrorKind::ScriptNotLoaded.as_stable_str(),
677            "script_not_loaded"
678        );
679        assert_eq!(BackendErrorKind::Other.as_stable_str(), "other");
680    }
681
682    #[test]
683    fn backend_kind_retryability() {
684        for k in [
685            BackendErrorKind::Transport,
686            BackendErrorKind::Timeout,
687            BackendErrorKind::Cluster,
688            BackendErrorKind::BusyLoading,
689        ] {
690            assert!(k.is_retryable(), "{k:?} should be retryable");
691        }
692        for k in [
693            BackendErrorKind::Protocol,
694            BackendErrorKind::Auth,
695            BackendErrorKind::ScriptNotLoaded,
696            BackendErrorKind::Other,
697        ] {
698            assert!(!k.is_retryable(), "{k:?} should NOT be retryable");
699        }
700    }
701}