bb_runtime/snapshot/transient.rs
1//! `TransientSnapshot` — the runtime ephemeral state surfaced for
2//! `Node::snapshot` / `Node::restore`.
3//!
4//! Stable framework state (counters, lifecycle phases, address book,
5//! peer governor, backoff table) round-trips today via the populated
6//! `framework` + `bus` fields. The remaining fields (`frontier`,
7//! `slot_table`, `pending_async`, `execution_state`, `ingress`,
8//! `wire_states`, `pending_completions`) exist on the struct so the
9//! shape matches the future in-flight execution snapshot but are not
10//! yet populated by `Node::snapshot`; restored Nodes start from a
11//! fresh frontier.
12
13use std::collections::HashMap;
14
15use serde::{Deserialize, Serialize};
16
17/// Runtime ephemeral state per ENGINE.md §15.1.
18#[derive(Clone, Debug, Default, Serialize, Deserialize)]
19pub struct TransientSnapshot {
20 /// In-cycle DAG-walking queue.
21 pub frontier: Vec<(u64, u64)>,
22 /// `(NodeSiteId, ExecId) → Option<serialized slot bytes>`.
23 /// `None` means "slot allocated but empty".
24 pub slot_table: HashMap<(u64, u64), Option<Vec<u8>>>,
25 /// Suspended Ops awaiting CommandId completion.
26 pub pending_async: HashMap<u64, PendingAsyncSnapshot>,
27 /// Per-execution liveness state.
28 pub execution_state: HashMap<u64, ExecutionStateSnapshot>,
29 /// Framework-primitive state (counters, backoff_table,
30 /// inbound_dedup, etc.) snapshotted at quiesce. Per
31 /// ENGINE.md §15.1 line 1402.
32 pub framework: FrameworkSnapshot,
33 /// Typed-bus state - subscription table + any queued events
34 /// that survive the cycle boundary. Per ENGINE.md §15.1 line
35 /// 1403.
36 pub bus: TypedBusSnapshot,
37 /// In-flight ingress events.
38 pub ingress: Vec<IngressEventSnapshot>,
39 /// Per-component wire-state. Currently empty; macro
40 /// populates as components grow per-wire state.
41 pub wire_states: HashMap<u32, Vec<u8>>,
42 /// Mid-cycle pending completions surfaced by `ProtocolRuntime`
43 /// hooks via `ctx.complete_command(...)`. Phase 5 drains these
44 /// post-dispatch per ENGINE.md §15.1 line 1406.
45 pub pending_completions: Vec<PendingCompletionSnapshot>,
46}
47
48/// Serializable view of `FrameworkComponents` per ENGINE.md §16.
49/// Captures counters, queued lifecycle phases, the multiaddr-keyed
50/// peer registries per `docs/ADDRESSING.md`, plus the
51/// `PeerGovernor` + `BackoffTable` policy/health state introduced
52/// in - so a restored Node remembers
53/// blocklisted peers, allowlist policy, and in-flight backoff
54/// cooldowns across restarts (no thundering herd on cold start).
55///
56/// Other framework primitives (peer_gate inflight counts) stay
57/// transient - they're meaningful only within a single poll cycle.
58#[derive(Clone, Debug, Default, Serialize, Deserialize)]
59pub struct FrameworkSnapshot {
60 /// Counters from `Engine.counters` keyed by metric name.
61 pub counters: HashMap<String, u64>,
62 /// Lifecycle phases queued for next `poll()` per
63 /// `Engine.fired_phases`.
64 pub fired_phases: Vec<String>,
65 /// `AddressBook` entries - ordered address list + reference
66 /// count per peer (see `AddressBookEntrySnapshot`).
67 pub address_book: Vec<AddressBookEntrySnapshot>,
68 /// `PeerGovernor` policy + health.
69 #[serde(default)]
70 pub peer_governor: PeerGovernorSnapshot,
71 /// Per-peer `BackoffTable` state.
72 #[serde(default)]
73 pub backoff_table: Vec<BackoffEntry>,
74 /// Pending outbound envelopes that didn't make it to a Phase 8
75 /// drain before snapshotting. Each entry carries the
76 /// `redelivered` flag so the host's transport adapter can
77 /// decide whether to retry or drop after restore.
78 #[serde(default)]
79 pub pending_outbound: Vec<PendingOutboundEntry>,
80 /// Canonical multihash bytes for the Node's `PeerId`. Restore
81 /// reconstructs the PeerId via `PeerId::from_bytes(&peer_id_bytes)`,
82 /// round-tripping every multihash code (identity, sha2-256,
83 /// blake2b, ...) without information loss.
84 #[serde(default)]
85 pub peer_id_bytes: Vec<u8>,
86 /// Engine ID counter persistence. The previous
87 /// snapshot dropped `next_command_id` / `next_exec_id`, so a
88 /// restored Node would mint ID 0 again — colliding with any
89 /// in-flight command/exec the pre-snapshot Node had issued.
90 #[serde(default)]
91 pub next_command_id: u64,
92 /// Same for ExecIds.
93 #[serde(default)]
94 pub next_exec_id: u64,
95 /// Snapshot schema version (incarnation distinct).
96 /// Bumped when the FrameworkSnapshot shape changes in a way
97 /// older code cannot soundly restore from.
98 #[serde(default = "default_spec_version")]
99 pub spec_version: u32,
100}
101
102fn default_spec_version() -> u32 {
103 CURRENT_SNAPSHOT_SPEC_VERSION
104}
105
106/// Current snapshot spec version this build can soundly restore.
107/// Bumped when the `FrameworkSnapshot` shape changes in a way
108/// older code cannot replay (e.g. field-encoding change, removed
109/// invariant). Restore rejects snapshots stamped with any other
110/// version.
111pub const CURRENT_SNAPSHOT_SPEC_VERSION: u32 = 1;
112
113/// One peer's `AddressBook` entry: ordered list of `Address`
114/// byte vectors + reference count. Preserves the multi-address +
115/// ref-counted shape across snapshot/restore.
116#[derive(Clone, Debug, Default, Serialize, Deserialize)]
117pub struct AddressBookEntrySnapshot {
118 /// Canonical multihash bytes — `PeerId::to_bytes()`.
119 pub peer_id: Vec<u8>,
120 /// Ordered address list - each entry is `Address::to_bytes()`.
121 pub addresses: Vec<Vec<u8>>,
122 /// Reference count owned by overlay protocols / transport /
123 /// the application. Preserved across restore so peers stay
124 /// alive at their proper grip count.
125 pub ref_count: u64,
126}
127
128/// Serializable view of `PeerGovernor`.
129#[derive(Clone, Debug, Default, Serialize, Deserialize)]
130pub struct PeerGovernorSnapshot {
131 /// Blocklist entries, each `PeerId::to_bytes()`.
132 pub blocklist: Vec<Vec<u8>>,
133 /// `None` ⇒ open policy. `Some(vec)` ⇒ only the listed peers
134 /// (`PeerId::to_bytes()`) may communicate.
135 pub allowlist: Option<Vec<Vec<u8>>>,
136 /// `(PeerId::to_bytes(), consecutive_failures, last_event_ns,
137 /// down)` per peer.
138 pub health: Vec<(Vec<u8>, u32, u64, bool)>,
139 /// Failure threshold (consecutive failures to mark a peer
140 /// down).
141 pub failure_threshold: u32,
142}
143
144/// Serializable view of one peer's `BackoffState`.
145#[derive(Clone, Debug, Serialize, Deserialize)]
146pub struct BackoffEntry {
147 /// Canonical multihash bytes — `PeerId::to_bytes()`.
148 pub peer: Vec<u8>,
149 /// Consecutive failures.
150 pub attempts: u32,
151 /// `now_ns` at most recent failure.
152 pub last_attempt_ns: u64,
153 /// Earliest `now_ns` at which a retry is permitted.
154 pub next_retry_ns: u64,
155}
156
157/// One outbound envelope that hadn't been shipped yet when the
158/// snapshot was taken. The `redelivered` flag tells the transport
159/// adapter "I've seen this before, decide whether to ship again."
160#[derive(Clone, Debug, Serialize, Deserialize)]
161pub struct PendingOutboundEntry {
162 /// Prost-serialized `WireEnvelope` bytes.
163 pub envelope_bytes: Vec<u8>,
164 /// `true` once a previous snapshot/restore cycle already
165 /// surfaced this envelope.
166 pub redelivered: bool,
167}
168
169/// Serializable view of `TypedBus` per ENGINE.md §16. Captures the
170/// subscription table - `(event_kind → Vec<NodeSiteId.0>)` matching
171/// the multiaddr-routed delivery model in `docs/ADDRESSING.md`.
172#[derive(Clone, Debug, Default, Serialize, Deserialize)]
173pub struct TypedBusSnapshot {
174 /// Event-kind → subscriber `NodeSiteId.0` map. Mirrors
175 /// `Engine.event_subscriptions` keyed by string discriminator.
176 pub event_subscriptions: HashMap<String, Vec<u64>>,
177}
178
179/// Serializable view of `PendingCompletion` per ENGINE.md §10.2.
180/// The opaque `results` payload is serialized via the same wire
181/// path as ordinary slot values - SlotValue implementors carry
182/// proto-mirroring; non-tensor `WireValue`s round-trip as raw bytes
183/// here.
184#[derive(Clone, Debug, Serialize, Deserialize)]
185pub struct PendingCompletionSnapshot {
186 /// `CommandId.0` being fulfilled.
187 pub cmd_id: u64,
188 /// `(slot-name, serialized payload)` pairs to write to the
189 /// suspended Op's output sites.
190 pub results: Vec<(String, Vec<u8>)>,
191}
192
193/// Serializable view of `PendingAsync`.
194#[derive(Clone, Debug, Serialize, Deserialize)]
195pub struct PendingAsyncSnapshot {
196 /// `OpRef.0` of the suspended Op.
197 pub op_ref: u64,
198 /// `ExecId.0` of the suspended execution.
199 pub exec_id: u64,
200 /// Captured output sites as `NodeSiteId.0` values.
201 pub output_sites: Vec<u64>,
202 /// Engine-side deadline (`scheduler.now_ns()` clock) past
203 /// which the suspension expires. `None` ⇒ no engine deadline.
204 /// .
205 #[serde(default)]
206 pub deadline_ns: Option<u64>,
207}
208
209/// Serializable view of `ExecutionState`.
210#[derive(Clone, Debug, Default, Serialize, Deserialize)]
211pub struct ExecutionStateSnapshot {
212 /// Number of outputs written so far.
213 pub outputs_written: u32,
214}
215
216/// Serializable view of `IngressEvent`. Only the variants that can
217/// realistically survive a snapshot boundary are recorded; Waker /
218/// Control variants are dropped on snapshot.
219#[derive(Clone, Debug, Serialize, Deserialize)]
220pub enum IngressEventSnapshot {
221 /// Inbound envelope (encoded as raw bytes - the routing table
222 /// re-routes on restore).
223 Envelope(Vec<u8>),
224 /// App-event delivery.
225 AppEvent {
226 /// Module name.
227 module_name: String,
228 /// Input port name.
229 input_name: String,
230 /// Encoded payload bytes.
231 value_bytes: Vec<u8>,
232 },
233 /// Module invocation.
234 Invoke {
235 /// Module name.
236 module_name: String,
237 /// Input port + value-bytes pairs.
238 inputs: Vec<(String, Vec<u8>)>,
239 },
240 /// Timer maturity signal.
241 TimerMatured {
242 /// Maturity timestamp (nanoseconds).
243 at_ns: u64,
244 },
245 /// Async completion landing back at the engine.
246 Completion {
247 /// `CommandId.0` being fulfilled.
248 cmd_id: u64,
249 /// Encoded result payloads.
250 results: Vec<Vec<u8>>,
251 },
252}
253