Skip to main content

ff_core/
engine_backend.rs

1//! The `EngineBackend` trait — abstracting FlowFabric's write surface.
2//!
3//! **RFC-012 Stage 1a:** this is the trait landing. The
4//! Valkey-backed impl lives in `ff-backend-valkey`; future backends
5//! (Postgres) add a sibling crate with their own impl. ff-sdk's
6//! `FlowFabricWorker` gains `connect_with(backend)` /
7//! `backend(&self)` accessors so consumers that want to bring their
8//! own backend (tests, future non-Valkey deployments) can hand one
9//! in. The hot-path migration of `ClaimedTask` / `FlowFabricWorker`
10//! to forward through the trait lands across Stages 1b-1d.
11//!
12//! # Object safety
13//!
14//! `EngineBackend` is object-safe: all methods are `async fn` behind
15//! `#[async_trait]` and take `&self`. Consumers can hold
16//! `Arc<dyn EngineBackend>` for heterogenous-backend deployments.
17//! The trait is `Send + Sync + 'static` per RFC-012 §4.1; every impl
18//! must honour that bound.
19//!
20//! # Error surface
21//!
22//! Every method returns [`Result<_, EngineError>`]. `EngineError`'s
23//! `Transport` variant carries a boxed `dyn Error + Send + Sync`;
24//! Valkey-backed transport faults box a
25//! `ff_script::error::ScriptError` (downcast via
26//! `ff_script::engine_error_ext::transport_script_ref`). Other
27//! backends box their native error type and set the `backend` tag
28//! accordingly.
29//!
30//! # Atomicity contract
31//!
32//! Per-op state transitions MUST be atomic (RFC-012 §3.4). On Valkey
33//! this is the single-FCALL-per-op property; on Postgres it is the
34//! per-transaction property. A backend that cannot honour atomicity
35//! for a given op either MUST NOT implement `EngineBackend` or MUST
36//! return `EngineError::Unavailable { op }` for the affected method.
37//!
38//! # Replay semantics
39//!
40//! `complete`, `fail`, `cancel`, `suspend`, `delay`, `wait_children`
41//! are idempotent under replay — calling twice with the same handle
42//! and args returns the same outcome (success on first call, typed
43//! `State` / `Contention` on subsequent calls where the fence triple
44//! no longer matches a live lease).
45
46use std::time::Duration;
47
48use async_trait::async_trait;
49
50use crate::backend::{
51    AdmissionDecision, CancelFlowPolicy, CancelFlowWait, CapabilitySet, ClaimPolicy,
52    FailOutcome, FailureClass, FailureReason, Frame, Handle, LeaseRenewal, ReclaimToken,
53    ResumeSignal, WaitpointSpec,
54};
55use crate::contracts::{CancelFlowResult, ExecutionSnapshot, FlowSnapshot};
56use crate::engine_error::EngineError;
57use crate::types::{BudgetId, ExecutionId, FlowId, LaneId, TimestampMs};
58
59/// The engine write surface — a single trait a backend implementation
60/// honours to serve a `FlowFabricWorker`.
61///
62/// See RFC-012 §3.1 for the inventory rationale and §3.3 for the
63/// type-level shape. 15 methods.
64///
65/// # Note on `complete` payload shape
66///
67/// The RFC §3.3 sketch uses `Option<Bytes>`; the Stage 1a trait uses
68/// `Option<Vec<u8>>` to match the existing
69/// `ff_sdk::ClaimedTask::complete` signature and avoid adding a
70/// `bytes` public-type dep for zero consumer benefit. Round-4 §7.17
71/// resolved the payload container debate to `Box<[u8]>` in the
72/// public type (see `HandleOpaque`); `Option<Vec<u8>>` is the
73/// zero-churn choice consistent with today's code. Consumers that
74/// need `&[u8]` can borrow via `.as_deref()` on the Option.
75#[async_trait]
76pub trait EngineBackend: Send + Sync + 'static {
77    // ── Claim + lifecycle ──
78
79    /// Fresh-work claim. Returns `Ok(None)` when no work is currently
80    /// available; `Err` only on transport or input-validation faults.
81    async fn claim(
82        &self,
83        lane: &LaneId,
84        capabilities: &CapabilitySet,
85        policy: ClaimPolicy,
86    ) -> Result<Option<Handle>, EngineError>;
87
88    /// Renew a held lease. Returns the updated expiry + epoch on
89    /// success; typed `State::StaleLease` / `State::LeaseExpired`
90    /// when the lease has been stolen or timed out.
91    async fn renew(&self, handle: &Handle) -> Result<LeaseRenewal, EngineError>;
92
93    /// Numeric-progress heartbeat.
94    async fn progress(
95        &self,
96        handle: &Handle,
97        percent: Option<u8>,
98        message: Option<String>,
99    ) -> Result<(), EngineError>;
100
101    /// Append one stream frame. Distinct from [`progress`](Self::progress)
102    /// per RFC-012 §3.1.1 K#6.
103    async fn append_frame(&self, handle: &Handle, frame: Frame) -> Result<(), EngineError>;
104
105    /// Terminal success. Borrows `handle` (round-4 M-D2) so callers
106    /// can retry under `EngineError::Transport` without losing the
107    /// cookie. Payload is `Option<Vec<u8>>` per the note above.
108    async fn complete(
109        &self,
110        handle: &Handle,
111        payload: Option<Vec<u8>>,
112    ) -> Result<(), EngineError>;
113
114    /// Terminal failure with classification. Returns [`FailOutcome`]
115    /// so the caller learns whether a retry was scheduled.
116    async fn fail(
117        &self,
118        handle: &Handle,
119        reason: FailureReason,
120        classification: FailureClass,
121    ) -> Result<FailOutcome, EngineError>;
122
123    /// Cooperative cancel by the worker holding the lease.
124    async fn cancel(&self, handle: &Handle, reason: &str) -> Result<(), EngineError>;
125
126    /// Suspend the execution awaiting one or more waitpoints. Returns
127    /// a fresh `Handle` whose `HandleKind::Suspended` supersedes the
128    /// caller's pre-suspend handle.
129    async fn suspend(
130        &self,
131        handle: &Handle,
132        waitpoints: Vec<WaitpointSpec>,
133        timeout: Option<Duration>,
134    ) -> Result<Handle, EngineError>;
135
136    /// Non-mutating observation of signals that satisfied the handle's
137    /// resume condition.
138    async fn observe_signals(&self, handle: &Handle)
139        -> Result<Vec<ResumeSignal>, EngineError>;
140
141    /// Consume a reclaim grant to mint a resumed-kind handle. Returns
142    /// `Ok(None)` when the grant's target execution is no longer
143    /// resumable (already reclaimed, terminal, etc.).
144    async fn claim_from_reclaim(
145        &self,
146        token: ReclaimToken,
147    ) -> Result<Option<Handle>, EngineError>;
148
149    // Round-5 amendment: lease-releasing peers of `suspend`.
150
151    /// Park the execution until `delay_until`, releasing the lease.
152    async fn delay(
153        &self,
154        handle: &Handle,
155        delay_until: TimestampMs,
156    ) -> Result<(), EngineError>;
157
158    /// Mark the execution as waiting for its child flow to complete,
159    /// releasing the lease.
160    async fn wait_children(&self, handle: &Handle) -> Result<(), EngineError>;
161
162    // ── Read / admin ──
163
164    /// Snapshot an execution by id. `Ok(None)` ⇒ no such execution.
165    async fn describe_execution(
166        &self,
167        id: &ExecutionId,
168    ) -> Result<Option<ExecutionSnapshot>, EngineError>;
169
170    /// Snapshot a flow by id. `Ok(None)` ⇒ no such flow.
171    async fn describe_flow(
172        &self,
173        id: &FlowId,
174    ) -> Result<Option<FlowSnapshot>, EngineError>;
175
176    /// Operator-initiated cancellation of a flow and (optionally) its
177    /// member executions. See RFC-012 §3.1.1 for the policy /wait
178    /// matrix.
179    async fn cancel_flow(
180        &self,
181        id: &FlowId,
182        policy: CancelFlowPolicy,
183        wait: CancelFlowWait,
184    ) -> Result<CancelFlowResult, EngineError>;
185
186    // ── Budget ──
187
188    /// Report usage and check admission. Returns the admission
189    /// decision; backends enforce idempotency via the caller-supplied
190    /// dedup key embedded in `UsageDimensions::custom` (today's
191    /// `ff_report_usage_and_check` contract).
192    async fn report_usage(
193        &self,
194        handle: &Handle,
195        budget: &BudgetId,
196        dimensions: crate::backend::UsageDimensions,
197    ) -> Result<AdmissionDecision, EngineError>;
198}
199
200/// Object-safety assertion: `dyn EngineBackend` compiles iff every
201/// method is dyn-compatible. Kept as a compile-time guard so a future
202/// trait change that accidentally breaks dyn-safety fails the build
203/// at this site rather than at every downstream `Arc<dyn
204/// EngineBackend>` use.
205#[allow(dead_code)]
206fn _assert_dyn_compatible(_: &dyn EngineBackend) {}
207