ff_core/engine_backend.rs
1//! The `EngineBackend` trait — abstracting FlowFabric's write surface.
2//!
3//! **RFC-012 Stage 1a:** this is the trait landing. The
4//! Valkey-backed impl lives in `ff-backend-valkey`; future backends
5//! (Postgres) add a sibling crate with their own impl. ff-sdk's
6//! `FlowFabricWorker` gains `connect_with(backend)` /
7//! `backend(&self)` accessors so consumers that want to bring their
8//! own backend (tests, future non-Valkey deployments) can hand one
9//! in. The hot-path migration of `ClaimedTask` / `FlowFabricWorker`
10//! to forward through the trait lands across Stages 1b-1d.
11//!
12//! # Object safety
13//!
14//! `EngineBackend` is object-safe: all methods are `async fn` behind
15//! `#[async_trait]` and take `&self`. Consumers can hold
16//! `Arc<dyn EngineBackend>` for heterogenous-backend deployments.
17//! The trait is `Send + Sync + 'static` per RFC-012 §4.1; every impl
18//! must honour that bound.
19//!
20//! # Error surface
21//!
22//! Every method returns [`Result<_, EngineError>`]. `EngineError`'s
23//! `Transport` variant carries a boxed `dyn Error + Send + Sync`;
24//! Valkey-backed transport faults box a
25//! `ff_script::error::ScriptError` (downcast via
26//! `ff_script::engine_error_ext::transport_script_ref`). Other
27//! backends box their native error type and set the `backend` tag
28//! accordingly.
29//!
30//! # Atomicity contract
31//!
32//! Per-op state transitions MUST be atomic (RFC-012 §3.4). On Valkey
33//! this is the single-FCALL-per-op property; on Postgres it is the
34//! per-transaction property. A backend that cannot honour atomicity
35//! for a given op either MUST NOT implement `EngineBackend` or MUST
36//! return `EngineError::Unavailable { op }` for the affected method.
37//!
38//! # Replay semantics
39//!
40//! `complete`, `fail`, `cancel`, `suspend`, `delay`, `wait_children`
41//! are idempotent under replay — calling twice with the same handle
42//! and args returns the same outcome (success on first call, typed
43//! `State` / `Contention` on subsequent calls where the fence triple
44//! no longer matches a live lease).
45
46use std::time::Duration;
47
48use async_trait::async_trait;
49
50use crate::backend::{
51 AdmissionDecision, CancelFlowPolicy, CancelFlowWait, CapabilitySet, ClaimPolicy,
52 FailOutcome, FailureClass, FailureReason, Frame, Handle, LeaseRenewal, ReclaimToken,
53 ResumeSignal, WaitpointSpec,
54};
55use crate::contracts::{CancelFlowResult, ExecutionSnapshot, FlowSnapshot};
56use crate::engine_error::EngineError;
57use crate::types::{BudgetId, ExecutionId, FlowId, LaneId, TimestampMs};
58
59/// The engine write surface — a single trait a backend implementation
60/// honours to serve a `FlowFabricWorker`.
61///
62/// See RFC-012 §3.1 for the inventory rationale and §3.3 for the
63/// type-level shape. 15 methods.
64///
65/// # Note on `complete` payload shape
66///
67/// The RFC §3.3 sketch uses `Option<Bytes>`; the Stage 1a trait uses
68/// `Option<Vec<u8>>` to match the existing
69/// `ff_sdk::ClaimedTask::complete` signature and avoid adding a
70/// `bytes` public-type dep for zero consumer benefit. Round-4 §7.17
71/// resolved the payload container debate to `Box<[u8]>` in the
72/// public type (see `HandleOpaque`); `Option<Vec<u8>>` is the
73/// zero-churn choice consistent with today's code. Consumers that
74/// need `&[u8]` can borrow via `.as_deref()` on the Option.
75#[async_trait]
76pub trait EngineBackend: Send + Sync + 'static {
77 // ── Claim + lifecycle ──
78
79 /// Fresh-work claim. Returns `Ok(None)` when no work is currently
80 /// available; `Err` only on transport or input-validation faults.
81 async fn claim(
82 &self,
83 lane: &LaneId,
84 capabilities: &CapabilitySet,
85 policy: ClaimPolicy,
86 ) -> Result<Option<Handle>, EngineError>;
87
88 /// Renew a held lease. Returns the updated expiry + epoch on
89 /// success; typed `State::StaleLease` / `State::LeaseExpired`
90 /// when the lease has been stolen or timed out.
91 async fn renew(&self, handle: &Handle) -> Result<LeaseRenewal, EngineError>;
92
93 /// Numeric-progress heartbeat.
94 async fn progress(
95 &self,
96 handle: &Handle,
97 percent: Option<u8>,
98 message: Option<String>,
99 ) -> Result<(), EngineError>;
100
101 /// Append one stream frame. Distinct from [`progress`](Self::progress)
102 /// per RFC-012 §3.1.1 K#6.
103 async fn append_frame(&self, handle: &Handle, frame: Frame) -> Result<(), EngineError>;
104
105 /// Terminal success. Borrows `handle` (round-4 M-D2) so callers
106 /// can retry under `EngineError::Transport` without losing the
107 /// cookie. Payload is `Option<Vec<u8>>` per the note above.
108 async fn complete(
109 &self,
110 handle: &Handle,
111 payload: Option<Vec<u8>>,
112 ) -> Result<(), EngineError>;
113
114 /// Terminal failure with classification. Returns [`FailOutcome`]
115 /// so the caller learns whether a retry was scheduled.
116 async fn fail(
117 &self,
118 handle: &Handle,
119 reason: FailureReason,
120 classification: FailureClass,
121 ) -> Result<FailOutcome, EngineError>;
122
123 /// Cooperative cancel by the worker holding the lease.
124 async fn cancel(&self, handle: &Handle, reason: &str) -> Result<(), EngineError>;
125
126 /// Suspend the execution awaiting one or more waitpoints. Returns
127 /// a fresh `Handle` whose `HandleKind::Suspended` supersedes the
128 /// caller's pre-suspend handle.
129 async fn suspend(
130 &self,
131 handle: &Handle,
132 waitpoints: Vec<WaitpointSpec>,
133 timeout: Option<Duration>,
134 ) -> Result<Handle, EngineError>;
135
136 /// Non-mutating observation of signals that satisfied the handle's
137 /// resume condition.
138 async fn observe_signals(&self, handle: &Handle)
139 -> Result<Vec<ResumeSignal>, EngineError>;
140
141 /// Consume a reclaim grant to mint a resumed-kind handle. Returns
142 /// `Ok(None)` when the grant's target execution is no longer
143 /// resumable (already reclaimed, terminal, etc.).
144 async fn claim_from_reclaim(
145 &self,
146 token: ReclaimToken,
147 ) -> Result<Option<Handle>, EngineError>;
148
149 // Round-5 amendment: lease-releasing peers of `suspend`.
150
151 /// Park the execution until `delay_until`, releasing the lease.
152 async fn delay(
153 &self,
154 handle: &Handle,
155 delay_until: TimestampMs,
156 ) -> Result<(), EngineError>;
157
158 /// Mark the execution as waiting for its child flow to complete,
159 /// releasing the lease.
160 async fn wait_children(&self, handle: &Handle) -> Result<(), EngineError>;
161
162 // ── Read / admin ──
163
164 /// Snapshot an execution by id. `Ok(None)` ⇒ no such execution.
165 async fn describe_execution(
166 &self,
167 id: &ExecutionId,
168 ) -> Result<Option<ExecutionSnapshot>, EngineError>;
169
170 /// Snapshot a flow by id. `Ok(None)` ⇒ no such flow.
171 async fn describe_flow(
172 &self,
173 id: &FlowId,
174 ) -> Result<Option<FlowSnapshot>, EngineError>;
175
176 /// Operator-initiated cancellation of a flow and (optionally) its
177 /// member executions. See RFC-012 §3.1.1 for the policy /wait
178 /// matrix.
179 async fn cancel_flow(
180 &self,
181 id: &FlowId,
182 policy: CancelFlowPolicy,
183 wait: CancelFlowWait,
184 ) -> Result<CancelFlowResult, EngineError>;
185
186 // ── Budget ──
187
188 /// Report usage and check admission. Returns the admission
189 /// decision; backends enforce idempotency via the caller-supplied
190 /// dedup key embedded in `UsageDimensions::custom` (today's
191 /// `ff_report_usage_and_check` contract).
192 async fn report_usage(
193 &self,
194 handle: &Handle,
195 budget: &BudgetId,
196 dimensions: crate::backend::UsageDimensions,
197 ) -> Result<AdmissionDecision, EngineError>;
198}
199
200/// Object-safety assertion: `dyn EngineBackend` compiles iff every
201/// method is dyn-compatible. Kept as a compile-time guard so a future
202/// trait change that accidentally breaks dyn-safety fails the build
203/// at this site rather than at every downstream `Arc<dyn
204/// EngineBackend>` use.
205#[allow(dead_code)]
206fn _assert_dyn_compatible(_: &dyn EngineBackend) {}
207