Skip to main content

bb_runtime/snapshot/
transient.rs

1//! `TransientSnapshot` — the runtime ephemeral state surfaced for
2//! `Node::snapshot` / `Node::restore`.
3//!
4//! Stable framework state (counters, lifecycle phases, address book,
5//! peer governor, backoff table) round-trips today via the populated
6//! `framework` + `bus` fields. The remaining fields (`frontier`,
7//! `slot_table`, `pending_async`, `execution_state`, `ingress`,
8//! `wire_states`, `pending_completions`) exist on the struct so the
9//! shape matches the future in-flight execution snapshot but are not
10//! yet populated by `Node::snapshot`; restored Nodes start from a
11//! fresh frontier.
12
13use std::collections::HashMap;
14
15use serde::{Deserialize, Serialize};
16
17/// Runtime ephemeral state per ENGINE.md §15.1.
18#[derive(Clone, Debug, Default, Serialize, Deserialize)]
19pub struct TransientSnapshot {
20    /// In-cycle DAG-walking queue.
21    pub frontier: Vec<(u64, u64)>,
22    /// `(NodeSiteId, ExecId) → Option<serialized slot bytes>`.
23    /// `None` means "slot allocated but empty".
24    pub slot_table: HashMap<(u64, u64), Option<Vec<u8>>>,
25    /// Suspended Ops awaiting CommandId completion.
26    pub pending_async: HashMap<u64, PendingAsyncSnapshot>,
27    /// Per-execution liveness state.
28    pub execution_state: HashMap<u64, ExecutionStateSnapshot>,
29    /// Framework-primitive state (counters, backoff_table,
30    /// inbound_dedup, etc.) snapshotted at quiesce. Per
31    /// ENGINE.md §15.1 line 1402.
32    pub framework: FrameworkSnapshot,
33    /// Typed-bus state - subscription table + any queued events
34    /// that survive the cycle boundary. Per ENGINE.md §15.1 line
35    /// 1403.
36    pub bus: TypedBusSnapshot,
37    /// In-flight ingress events.
38    pub ingress: Vec<IngressEventSnapshot>,
39    /// Per-component wire-state. Currently empty; macro
40    /// populates as components grow per-wire state.
41    pub wire_states: HashMap<u32, Vec<u8>>,
42    /// Mid-cycle pending completions surfaced by `ProtocolRuntime`
43    /// hooks via `ctx.complete_command(...)`. Phase 5 drains these
44    /// post-dispatch per ENGINE.md §15.1 line 1406.
45    pub pending_completions: Vec<PendingCompletionSnapshot>,
46}
47
48/// Serializable view of `FrameworkComponents` per ENGINE.md §16.
49/// Captures counters, queued lifecycle phases, the multiaddr-keyed
50/// peer registries per `docs/ADDRESSING.md`, plus the
51/// `PeerGovernor` + `BackoffTable` policy/health state introduced
52/// in  - so a restored Node remembers
53/// blocklisted peers, allowlist policy, and in-flight backoff
54/// cooldowns across restarts (no thundering herd on cold start).
55///
56/// Other framework primitives (peer_gate inflight counts) stay
57/// transient - they're meaningful only within a single poll cycle.
58#[derive(Clone, Debug, Default, Serialize, Deserialize)]
59pub struct FrameworkSnapshot {
60    /// Counters from `Engine.counters` keyed by metric name.
61    pub counters: HashMap<String, u64>,
62    /// Lifecycle phases queued for next `poll()` per
63    /// `Engine.fired_phases`.
64    pub fired_phases: Vec<String>,
65    /// `AddressBook` entries - ordered address list + reference
66    /// count per peer (see `AddressBookEntrySnapshot`).
67    pub address_book: Vec<AddressBookEntrySnapshot>,
68    /// `PeerGovernor` policy + health.
69    #[serde(default)]
70    pub peer_governor: PeerGovernorSnapshot,
71    /// Per-peer `BackoffTable` state.
72    #[serde(default)]
73    pub backoff_table: Vec<BackoffEntry>,
74    /// Pending outbound envelopes that didn't make it to a Phase 8
75    /// drain before snapshotting. Each entry carries the
76    /// `redelivered` flag so the host's transport adapter can
77    /// decide whether to retry or drop after restore.
78    #[serde(default)]
79    pub pending_outbound: Vec<PendingOutboundEntry>,
80    /// Canonical multihash bytes for the Node's `PeerId`. Restore
81    /// reconstructs the PeerId via `PeerId::from_bytes(&peer_id_bytes)`,
82    /// round-tripping every multihash code (identity, sha2-256,
83    /// blake2b, ...) without information loss.
84    #[serde(default)]
85    pub peer_id_bytes: Vec<u8>,
86    /// Engine ID counter persistence. The previous
87    /// snapshot dropped `next_command_id` / `next_exec_id`, so a
88    /// restored Node would mint ID 0 again — colliding with any
89    /// in-flight command/exec the pre-snapshot Node had issued.
90    #[serde(default)]
91    pub next_command_id: u64,
92    /// Same for ExecIds.
93    #[serde(default)]
94    pub next_exec_id: u64,
95    /// Snapshot schema version (incarnation distinct).
96    /// Bumped when the FrameworkSnapshot shape changes in a way
97    /// older code cannot soundly restore from.
98    #[serde(default = "default_spec_version")]
99    pub spec_version: u32,
100}
101
102fn default_spec_version() -> u32 {
103    CURRENT_SNAPSHOT_SPEC_VERSION
104}
105
106/// Current snapshot spec version this build can soundly restore.
107/// Bumped when the `FrameworkSnapshot` shape changes in a way
108/// older code cannot replay (e.g. field-encoding change, removed
109/// invariant). Restore rejects snapshots stamped with any other
110/// version.
111pub const CURRENT_SNAPSHOT_SPEC_VERSION: u32 = 1;
112
113/// One peer's `AddressBook` entry: ordered list of `Address`
114/// byte vectors + reference count. Preserves the multi-address +
115/// ref-counted shape across snapshot/restore.
116#[derive(Clone, Debug, Default, Serialize, Deserialize)]
117pub struct AddressBookEntrySnapshot {
118    /// Canonical multihash bytes — `PeerId::to_bytes()`.
119    pub peer_id: Vec<u8>,
120    /// Ordered address list - each entry is `Address::to_bytes()`.
121    pub addresses: Vec<Vec<u8>>,
122    /// Reference count owned by overlay protocols / transport /
123    /// the application. Preserved across restore so peers stay
124    /// alive at their proper grip count.
125    pub ref_count: u64,
126}
127
128/// Serializable view of `PeerGovernor`.
129#[derive(Clone, Debug, Default, Serialize, Deserialize)]
130pub struct PeerGovernorSnapshot {
131    /// Blocklist entries, each `PeerId::to_bytes()`.
132    pub blocklist: Vec<Vec<u8>>,
133    /// `None` ⇒ open policy. `Some(vec)` ⇒ only the listed peers
134    /// (`PeerId::to_bytes()`) may communicate.
135    pub allowlist: Option<Vec<Vec<u8>>>,
136    /// `(PeerId::to_bytes(), consecutive_failures, last_event_ns,
137    /// down)` per peer.
138    pub health: Vec<(Vec<u8>, u32, u64, bool)>,
139    /// Failure threshold (consecutive failures to mark a peer
140    /// down).
141    pub failure_threshold: u32,
142}
143
144/// Serializable view of one peer's `BackoffState`.
145#[derive(Clone, Debug, Serialize, Deserialize)]
146pub struct BackoffEntry {
147    /// Canonical multihash bytes — `PeerId::to_bytes()`.
148    pub peer: Vec<u8>,
149    /// Consecutive failures.
150    pub attempts: u32,
151    /// `now_ns` at most recent failure.
152    pub last_attempt_ns: u64,
153    /// Earliest `now_ns` at which a retry is permitted.
154    pub next_retry_ns: u64,
155}
156
157/// One outbound envelope that hadn't been shipped yet when the
158/// snapshot was taken. The `redelivered` flag tells the transport
159/// adapter "I've seen this before, decide whether to ship again."
160#[derive(Clone, Debug, Serialize, Deserialize)]
161pub struct PendingOutboundEntry {
162    /// Prost-serialized `WireEnvelope` bytes.
163    pub envelope_bytes: Vec<u8>,
164    /// `true` once a previous snapshot/restore cycle already
165    /// surfaced this envelope.
166    pub redelivered: bool,
167}
168
169/// Serializable view of `TypedBus` per ENGINE.md §16. Captures the
170/// subscription table - `(event_kind → Vec<NodeSiteId.0>)` matching
171/// the multiaddr-routed delivery model in `docs/ADDRESSING.md`.
172#[derive(Clone, Debug, Default, Serialize, Deserialize)]
173pub struct TypedBusSnapshot {
174    /// Event-kind → subscriber `NodeSiteId.0` map. Mirrors
175    /// `Engine.event_subscriptions` keyed by string discriminator.
176    pub event_subscriptions: HashMap<String, Vec<u64>>,
177}
178
179/// Serializable view of `PendingCompletion` per ENGINE.md §10.2.
180/// The opaque `results` payload is serialized via the same wire
181/// path as ordinary slot values - SlotValue implementors carry
182/// proto-mirroring; non-tensor `WireValue`s round-trip as raw bytes
183/// here.
184#[derive(Clone, Debug, Serialize, Deserialize)]
185pub struct PendingCompletionSnapshot {
186    /// `CommandId.0` being fulfilled.
187    pub cmd_id: u64,
188    /// `(slot-name, serialized payload)` pairs to write to the
189    /// suspended Op's output sites.
190    pub results: Vec<(String, Vec<u8>)>,
191}
192
193/// Serializable view of `PendingAsync`.
194#[derive(Clone, Debug, Serialize, Deserialize)]
195pub struct PendingAsyncSnapshot {
196    /// `OpRef.0` of the suspended Op.
197    pub op_ref: u64,
198    /// `ExecId.0` of the suspended execution.
199    pub exec_id: u64,
200    /// Captured output sites as `NodeSiteId.0` values.
201    pub output_sites: Vec<u64>,
202    /// Engine-side deadline (`scheduler.now_ns()` clock) past
203    /// which the suspension expires. `None` ⇒ no engine deadline.
204    /// .
205    #[serde(default)]
206    pub deadline_ns: Option<u64>,
207}
208
209/// Serializable view of `ExecutionState`.
210#[derive(Clone, Debug, Default, Serialize, Deserialize)]
211pub struct ExecutionStateSnapshot {
212    /// Number of outputs written so far.
213    pub outputs_written: u32,
214}
215
216/// Serializable view of `IngressEvent`. Only the variants that can
217/// realistically survive a snapshot boundary are recorded; Waker /
218/// Control variants are dropped on snapshot.
219#[derive(Clone, Debug, Serialize, Deserialize)]
220pub enum IngressEventSnapshot {
221    /// Inbound envelope (encoded as raw bytes - the routing table
222    /// re-routes on restore).
223    Envelope(Vec<u8>),
224    /// App-event delivery.
225    AppEvent {
226        /// Module name.
227        module_name: String,
228        /// Input port name.
229        input_name: String,
230        /// Encoded payload bytes.
231        value_bytes: Vec<u8>,
232    },
233    /// Module invocation.
234    Invoke {
235        /// Module name.
236        module_name: String,
237        /// Input port + value-bytes pairs.
238        inputs: Vec<(String, Vec<u8>)>,
239    },
240    /// Timer maturity signal.
241    TimerMatured {
242        /// Maturity timestamp (nanoseconds).
243        at_ns: u64,
244    },
245    /// Async completion landing back at the engine.
246    Completion {
247        /// `CommandId.0` being fulfilled.
248        cmd_id: u64,
249        /// Encoded result payloads.
250        results: Vec<Vec<u8>>,
251    },
252}
253