dig_slashing/orchestration.rs
1//! Epoch-boundary orchestration.
2//!
3//! Traces to: [SPEC §10](../docs/resources/SPEC.md).
4//!
5//! # Role
6//!
7//! [`run_epoch_boundary`] is the single public entry point an
8//! embedder calls once per epoch-boundary to drive every
9//! per-epoch piece of slashing state forward in a FIXED,
10//! spec-mandated order. Each downstream step depends on the
11//! state produced by earlier steps; reordering is a protocol
12//! error and pinned by DSL-127's order tests.
13//!
14//! Spec-mandated step order:
15//!
16//! 1. Compute flag deltas over `participation`'s previous-epoch
17//! flags.
18//! 2. Update inactivity scores over the same previous-epoch
19//! flags.
20//! 3. Compute inactivity-leak penalties for the ending epoch.
21//! 4. Finalise expired slashes (correlation penalty + reporter-
22//! bond release + exit lock).
23//! 5. Rotate `ParticipationTracker` to `current_epoch_ending + 1`.
24//! 6. Advance `SlashingManager` epoch.
25//! 7. Resize trackers if `validator_count` changed.
26//! 8. Prune old processed evidence + correlation-window
27//! entries.
28//!
29//! # Why this order
30//!
31//! - **1 before 2** — `update_for_epoch` reads the same
32//! previous-epoch flags the flag-delta computation reads.
33//! Running the update first would rotate the tracker before
34//! the delta pass, losing the previous-epoch data permanently.
35//! - **3 before 4** — finalise uses correlation data that must
36//! reflect the most recent inactivity update; if penalties
37//! were computed after finalise, the cohort would use stale
38//! scores.
39//! - **4 before 5** — `finalise_expired_slashes` reads
40//! `correlation_window` entries keyed by the CURRENT epoch;
41//! rotating the participation tracker first would confuse
42//! other consumers into believing the new epoch is active
43//! while the manager is still mid-finalise.
44//! - **8 last** — pruning drops evidence and correlation rows
45//! that would otherwise be needed by earlier steps.
46
47use std::collections::BTreeMap;
48
49use dig_epoch::CORRELATION_WINDOW_EPOCHS;
50
51use dig_protocol::Bytes32;
52use serde::{Deserialize, Serialize};
53
54use crate::bonds::BondEscrow;
55use crate::error::SlashingError;
56use crate::inactivity::{InactivityScoreTracker, in_finality_stall};
57use crate::manager::{FinalisationResult, SlashingManager};
58use crate::participation::{FlagDelta, ParticipationTracker, compute_flag_deltas};
59use crate::protection::SlashingProtection;
60use crate::traits::{CollateralSlasher, EffectiveBalanceView, RewardPayout, ValidatorView};
61
62/// Per-epoch finality view. Returns the epoch of the most
63/// recently FINALIZED Casper-FFG checkpoint. DSL-127 consults
64/// this to derive [`in_finality_stall`]; the orchestrator does
65/// not require a full Casper view, only the finalized-epoch
66/// height.
67///
68/// Implemented by the embedder's consensus integration (DSL-143
69/// full surface). Shipped here early because DSL-127 is the
70/// first caller.
71pub trait JustificationView {
72 /// Epoch of the most recent finalized checkpoint. `0` at
73 /// genesis before any checkpoint has finalized. DSL-127
74 /// derives `in_finality_stall` from this.
75 fn latest_finalized_epoch(&self) -> u64;
76
77 /// Most recently justified checkpoint in the current epoch.
78 /// DSL-075 source-justified appeal check consumer. Default:
79 /// zero checkpoint so DSL-127 fixtures that only care about
80 /// the stall flag don't have to implement the full surface.
81 fn current_justified_checkpoint(&self) -> crate::evidence::Checkpoint {
82 crate::evidence::Checkpoint {
83 epoch: 0,
84 root: dig_protocol::Bytes32::new([0u8; 32]),
85 }
86 }
87
88 /// Checkpoint justified in the previous epoch. DSL-075
89 /// consumer. Default: zero checkpoint.
90 fn previous_justified_checkpoint(&self) -> crate::evidence::Checkpoint {
91 crate::evidence::Checkpoint {
92 epoch: 0,
93 root: dig_protocol::Bytes32::new([0u8; 32]),
94 }
95 }
96
97 /// Most recently finalized checkpoint. Default: zero-root
98 /// at [`latest_finalized_epoch`] so the epoch leg matches
99 /// DSL-127's minimum contract even when the root is
100 /// uninitialised. DSL-076 consumer.
101 fn finalized_checkpoint(&self) -> crate::evidence::Checkpoint {
102 crate::evidence::Checkpoint {
103 epoch: self.latest_finalized_epoch(),
104 root: dig_protocol::Bytes32::new([0u8; 32]),
105 }
106 }
107
108 /// Canonical block root at `slot`, or `None` for
109 /// uncommitted / future slots. DSL-076/077 head check
110 /// consumer. Default: always `None`.
111 fn canonical_block_root_at_slot(&self, _slot: u64) -> Option<dig_protocol::Bytes32> {
112 None
113 }
114
115 /// Canonical target root for `epoch` (start-of-epoch
116 /// block root), or `None` past chain tip. DSL-076 target-root
117 /// consumer. Default: `None`.
118 fn canonical_target_root_for_epoch(&self, _epoch: u64) -> Option<dig_protocol::Bytes32> {
119 None
120 }
121}
122
123/// Summary produced by [`run_epoch_boundary`]. Carries every
124/// side-effect the caller needs to route downstream (logging,
125/// reward payouts, state snapshots).
126///
127/// The struct intentionally contains vectors rather than
128/// callback channels — the orchestrator is infallible by
129/// construction and produces a complete report in one pass.
130#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
131pub struct EpochBoundaryReport {
132 /// Per-validator reward/penalty deltas from DSL-082/083.
133 pub flag_deltas: Vec<FlagDelta>,
134 /// Per-validator inactivity-leak penalties from DSL-091/092.
135 /// Empty outside a finality stall.
136 pub inactivity_penalties: Vec<(u32, u64)>,
137 /// Slashes finalised this epoch (DSL-029..033).
138 pub finalisations: Vec<FinalisationResult>,
139 /// Whether a finality stall was in effect at the start of
140 /// the epoch boundary. Drives inactivity-leak branches.
141 pub in_finality_stall: bool,
142 /// Number of stale processed-evidence entries pruned
143 /// (step 8). Observability only.
144 pub pruned_entries: usize,
145}
146
147/// Drive one epoch-boundary pass. See module docs for order.
148///
149/// # Signatures
150///
151/// Every trait argument is `&mut dyn` / `&dyn` so the embedder
152/// can inject concrete state views without committing to
153/// generics on the slashing crate. The `usize` +`u64` scalars
154/// are snapshot values measured at block N-1 (the block that
155/// closes the epoch).
156///
157/// # Invariants
158///
159/// - After the call:
160/// - `participation.current_epoch_number() == current_epoch_ending + 1`
161/// - `manager.current_epoch() == current_epoch_ending + 1`
162/// - `inactivity.validator_count() == validator_count`
163#[allow(clippy::too_many_arguments)]
164pub fn run_epoch_boundary(
165 manager: &mut SlashingManager,
166 participation: &mut ParticipationTracker,
167 inactivity: &mut InactivityScoreTracker,
168 validator_set: &mut dyn ValidatorView,
169 effective_balances: &dyn EffectiveBalanceView,
170 bond_escrow: &mut dyn BondEscrow,
171 reward_payout: &mut dyn RewardPayout,
172 justification: &dyn JustificationView,
173 current_epoch_ending: u64,
174 validator_count: usize,
175 total_active_balance: u64,
176) -> EpochBoundaryReport {
177 // Derive finality-stall state ONCE up front. Both the
178 // inactivity-score update (step 2) and the penalty
179 // computation (step 3) branch on it; deriving here keeps
180 // them consistent even if `justification` is a racing
181 // reference (should not happen under the chain lock, but
182 // defensive).
183 let finalized_epoch = justification.latest_finalized_epoch();
184 let stall = in_finality_stall(current_epoch_ending, finalized_epoch);
185
186 // ── Step 1: flag deltas over previous-epoch flags ─────
187 let flag_deltas = compute_flag_deltas(
188 participation,
189 effective_balances,
190 total_active_balance,
191 stall,
192 );
193
194 // ── Step 1b (DSL-169): route flag-delta rewards through
195 // RewardPayout. For every delta with reward > 0, call
196 // `reward_payout.pay(puzzle_hash, reward)`. Zero-reward
197 // deltas are filtered so no-op payments do not spam the
198 // embedder's accumulator. Validators missing from the view
199 // are silently skipped (defensive tolerance — the view may
200 // drift between DSL-082 computation and here if a parallel
201 // mutation is in flight, though under the chain lock this
202 // should not happen).
203 for fd in &flag_deltas {
204 if fd.reward == 0 {
205 continue;
206 }
207 if let Some(entry) = validator_set.get(fd.validator_index) {
208 reward_payout.pay(entry.puzzle_hash(), fd.reward);
209 }
210 }
211
212 // ── Step 2: inactivity-score update (reads same flags) ─
213 inactivity.update_for_epoch(participation, stall);
214
215 // ── Step 3: inactivity-leak penalties for ending epoch ─
216 let inactivity_penalties = inactivity.epoch_penalties(effective_balances, stall);
217
218 // ── Step 3b (DSL-169): apply inactivity-leak penalties to
219 // validator stakes via `ValidatorEntry::slash_absolute`.
220 // DSL-091/092 computes the per-validator penalty_mojos; the
221 // wiring here actually debits the stake. Missing validators
222 // skipped (same rationale as step 1b).
223 for &(idx, penalty_mojos) in &inactivity_penalties {
224 if let Some(entry) = validator_set.get_mut(idx) {
225 entry.slash_absolute(penalty_mojos, current_epoch_ending);
226 }
227 }
228
229 // ── Step 4: finalise expired slashes ─────────────────
230 let finalisations = manager.finalise_expired_slashes(
231 validator_set,
232 effective_balances,
233 bond_escrow,
234 total_active_balance,
235 );
236
237 // ── Step 5: rotate participation tracker ──────────────
238 participation.rotate_epoch(current_epoch_ending + 1, validator_count);
239
240 // ── Step 6: advance SlashingManager epoch ─────────────
241 manager.set_epoch(current_epoch_ending + 1);
242
243 // ── Step 7: resize trackers if validator count changed ─
244 if inactivity.validator_count() != validator_count {
245 inactivity.resize_for(validator_count);
246 }
247
248 // ── Step 8: prune old processed evidence + corr-window ─
249 // Cutoff = current_epoch_ending.saturating_sub(CORRELATION_WINDOW_EPOCHS).
250 // Keeps everything within the correlation window reachable
251 // by future DSL-030 cohort-sum computations.
252 let cutoff = current_epoch_ending.saturating_sub(u64::from(CORRELATION_WINDOW_EPOCHS));
253 let pruned_entries = manager.prune_processed_older_than(cutoff);
254
255 EpochBoundaryReport {
256 flag_deltas,
257 inactivity_penalties,
258 finalisations,
259 in_finality_stall: stall,
260 pruned_entries,
261 }
262}
263
264// `BTreeMap` imported above for Visualiser-friendly diff when
265// the module grows; currently not used directly. Suppress with
266// a no-op to avoid unused-import churn.
267#[allow(dead_code)]
268type _KeepBTreeMap<K, V> = BTreeMap<K, V>;
269
270/// Summary produced by [`rewind_all_on_reorg`]. Carries per-
271/// subsystem rewind outcomes so the caller (a chain-shell
272/// orchestrator) can log or emit metrics without re-deriving
273/// the rewind scope from internal tracker state.
274#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
275pub struct ReorgReport {
276 /// Evidence hashes rewound by
277 /// [`SlashingManager::rewind_on_reorg`] (DSL-129).
278 pub rewound_pending_slashes: Vec<Bytes32>,
279 /// Epochs dropped from the participation tracker (= reorg
280 /// depth at the moment the tracker was rewound).
281 pub participation_epochs_dropped: u64,
282 /// Epochs dropped from the inactivity tracker (same depth —
283 /// the inactivity tracker does not carry an epoch counter,
284 /// so the caller's computed depth is carried through for
285 /// uniform metric reporting).
286 pub inactivity_epochs_dropped: u64,
287 /// Whether `SlashingProtection::reconcile_with_chain_tip`
288 /// was called. `true` in every successful rewind; exposed
289 /// as a field for symmetry / future branching.
290 pub protection_rewound: bool,
291}
292
293/// Global reorg orchestrator. Rewinds every slashing-state
294/// subsystem in a fixed order.
295///
296/// Implements [DSL-130](../docs/requirements/domains/orchestration/specs/DSL-130.md).
297/// Traces to SPEC §13.
298///
299/// # Step order
300///
301/// 1. [`SlashingManager::rewind_on_reorg`] (DSL-129) — must
302/// run FIRST because it reads validator-set state that the
303/// other rewinds do not touch; running it after a
304/// participation rewind would confuse the `is_slashed`
305/// check inside `credit_stake` / `restore_status`.
306/// 2. [`ParticipationTracker::rewind_on_reorg`] — zero-fills
307/// both flag vectors and anchors current_epoch at the
308/// new tip.
309/// 3. [`InactivityScoreTracker::rewind_on_reorg`] — zero-
310/// fills every score.
311/// 4. [`SlashingProtection::reconcile_with_chain_tip`]
312/// (DSL-099) — caps proposal + attestation watermarks at
313/// the new tip and clears the attested-block hash binding.
314///
315/// After success, `manager.current_epoch()` is reset to
316/// `new_tip_epoch` so the orchestration state carries the
317/// post-reorg epoch forward.
318///
319/// # Depth limit
320///
321/// `current - new_tip_epoch > CORRELATION_WINDOW_EPOCHS` ⇒
322/// `SlashingError::ReorgTooDeep`. The correlation window is
323/// the deepest state we can reconstruct — older `slashed_in_window`
324/// rows have been pruned (DSL-127 step 8) and no subsystem
325/// retains snapshots further back.
326///
327/// # Errors
328///
329/// - [`SlashingError::ReorgTooDeep`] — reorg depth exceeds
330/// retention. No state is mutated; caller must recover via a
331/// longer-range reconciliation path (checkpoint restore /
332/// full resync).
333#[allow(clippy::too_many_arguments)]
334pub fn rewind_all_on_reorg(
335 manager: &mut SlashingManager,
336 participation: &mut ParticipationTracker,
337 inactivity: &mut InactivityScoreTracker,
338 protection: &mut SlashingProtection,
339 validator_set: &mut dyn ValidatorView,
340 collateral: Option<&mut dyn CollateralSlasher>,
341 bond_escrow: &mut dyn BondEscrow,
342 new_tip_epoch: u64,
343 new_tip_slot: u64,
344 validator_count: usize,
345) -> Result<ReorgReport, SlashingError> {
346 let current_epoch = manager.current_epoch();
347 let depth = current_epoch.saturating_sub(new_tip_epoch);
348 let limit = u64::from(CORRELATION_WINDOW_EPOCHS);
349 if depth > limit {
350 return Err(SlashingError::ReorgTooDeep { depth, limit });
351 }
352
353 // ── Step 1: manager rewind ────────────────────────────
354 let rewound_pending_slashes =
355 manager.rewind_on_reorg(new_tip_epoch, validator_set, collateral, bond_escrow);
356
357 // ── Step 2: participation rewind ──────────────────────
358 let participation_epochs_dropped =
359 participation.rewind_on_reorg(new_tip_epoch, validator_count);
360
361 // ── Step 3: inactivity rewind ─────────────────────────
362 let inactivity_epochs_dropped = inactivity.rewind_on_reorg(depth);
363
364 // ── Step 4: protection reconcile ──────────────────────
365 protection.reconcile_with_chain_tip(new_tip_slot, new_tip_epoch);
366
367 // Anchor the manager's epoch at the new tip so future
368 // epoch-boundary passes compute correctly.
369 manager.set_epoch(new_tip_epoch);
370
371 Ok(ReorgReport {
372 rewound_pending_slashes,
373 participation_epochs_dropped,
374 inactivity_epochs_dropped,
375 protection_rewound: true,
376 })
377}